Update Eigen to commit:d4ae542ed1c6f3eaad29445100052489471e38ea

CHANGELOG
=========
d4ae542ed - Fix nullptr dereference issue in triangular product.
7769eb1b2 - Fix problems with recent changes and Tensorflow in Power
ba1cb6e45 - Fixes #2703 by adding max_digits10 function
9995c3da6 - Fix -Wmaybe-uninitialized in SVD
4e9e493b4 - Fix -Waggressive-loop-optimizations
6e7abeae6 - fix arm build warnings
81fe2d424 - Fix more gcc compiler warnings / sort-of bugs
21cd3fe20 - Optimize check_rows_cols_for_overflow
9297aae66 - Fix AVX512 nomalloc issues in trsm.
1a2bfca8f - Fix annoying warnings
63dcb429c - Fix use of arg function in CUDA.
8f927fb52 - Altivec: fix compilation with C++20 and higher
d4b05454a - Fix argument for _mm256_cvtps_ph imm parameter
15ac3765c - Fix ivcSize return type in IndexedViewMethods.h
3791ac8a1 - Fix supportsMMA to obey EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH compilation flag and compiler support.
bc57b926a - Add Quaternion constructor from real scalar and imaginary vector
31cd2ad37 - Ensure EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC is always defined on arm.
7465b7651 - Disable FP16 arithmetic for arm32.
b3267f693 - Remove unused variable in  test/svd_common.h.

PiperOrigin-RevId: 552130715
Change-Id: I71c627f279ef0cbc6ce0a231a9fef9acd22e244e
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 5805763..dad2af8 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -357,7 +357,7 @@
       */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr, Index i)
-      : Base(add_to_nullable_pointer(xpr.data(),
+      : Base((BlockRows == 0 || BlockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(),
                  i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
                        || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride())),
              BlockRows==1 ? 1 : xpr.rows(),
@@ -373,7 +373,7 @@
       */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
-      : Base(add_to_nullable_pointer(xpr.data(),
+      : Base((BlockRows == 0 || BlockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(),
                  xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol))),
         m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
     {
@@ -386,7 +386,7 @@
     BlockImpl_dense(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
-      : Base(add_to_nullable_pointer(xpr.data(),
+      : Base((blockRows == 0 || blockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(),
                  xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
              blockRows, blockCols),
         m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 0c13192..139e2ca 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -72,8 +72,9 @@
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
   EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
 #if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
-  typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
-  EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
+  EIGEN_CHECK_BINARY_COMPATIBILIY(
+      Eigen::internal::scalar_conj_product_op<Scalar EIGEN_COMMA typename OtherDerived::Scalar>, 
+      Scalar, typename OtherDerived::Scalar);
 #endif
   
   eigen_assert(size() == other.size());
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 3d2f144..ceb7a0a 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -1366,8 +1366,7 @@
 /** \internal \returns the argument of \a a as a complex number */
 template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
-  using Scalar = typename unpacket_traits<Packet>::type;
-  EIGEN_STATIC_ASSERT(NumTraits<Scalar>::IsComplex, THIS METHOD IS FOR COMPLEX TYPES ONLY)
+  EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex, THIS METHOD IS FOR COMPLEX TYPES ONLY)
   using RealPacket = typename unpacket_traits<Packet>::as_real;
   // a                                              // r     i    r     i    ...
   RealPacket aflip = pcplxflip(a).v;                // i     r    i     r    ...
diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h
index 053905f..6993729 100644
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -117,13 +117,13 @@
 
 // NOTE: This helper is kept for backward compatibility with previous code specializing
 //       this internal::significant_decimals_impl structure. In the future we should directly
-//       call digits10() which has been introduced in July 2016 in 3.3.
+//       call max_digits10().
 template<typename Scalar>
 struct significant_decimals_impl
 {
   static inline int run()
   {
-    return NumTraits<Scalar>::digits10();
+    return NumTraits<Scalar>::max_digits10();
   }
 };
 
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 822701b..5507707 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -482,12 +482,8 @@
   EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
-    #if defined(EIGEN_HIP_DEVICE_COMPILE)
-    // HIP does not seem to have a native device side implementation for the math routine "arg"
+    // There is no official ::arg on device in CUDA/HIP, so we always need to use std::arg.
     using std::arg;
-    #else
-    EIGEN_USING_STD(arg);
-    #endif
     return static_cast<RealScalar>(arg(x));
   }
 };
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index ff33aa6..f5275d8 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -16,37 +16,6 @@
 
 namespace internal {
 
-// default implementation of digits10(), based on numeric_limits if specialized,
-// 0 for integer types, and log10(epsilon()) otherwise.
-template< typename T,
-          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
-          bool is_integer = NumTraits<T>::IsInteger>
-struct default_digits10_impl
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int run() { return std::numeric_limits<T>::digits10; }
-};
-
-template<typename T>
-struct default_digits10_impl<T,false,false> // Floating point
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int run() {
-    using std::log10;
-    using std::ceil;
-    typedef typename NumTraits<T>::Real Real;
-    return int(ceil(-log10(NumTraits<Real>::epsilon())));
-  }
-};
-
-template<typename T>
-struct default_digits10_impl<T,false,true> // Integer
-{
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static int run() { return 0; }
-};
-
-
 // default implementation of digits(), based on numeric_limits if specialized,
 // 0 for integer types, and log2(epsilon()) otherwise.
 template< typename T,
@@ -77,6 +46,66 @@
   static int run() { return 0; }
 };
 
+// default implementation of digits10(), based on numeric_limits if specialized,
+// 0 for integer types, and floor((digits()-1)*log10(2)) otherwise.
+template< typename T,
+          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
+          bool is_integer = NumTraits<T>::IsInteger>
+struct default_digits10_impl
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() { return std::numeric_limits<T>::digits10; }
+};
+
+template<typename T>
+struct default_digits10_impl<T,false,false> // Floating point
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() {
+    using std::log10;
+    using std::floor;
+    typedef typename NumTraits<T>::Real Real;
+    return int(floor((internal::default_digits_impl<Real>::run()-1)*log10(2)));
+  }
+};
+
+template<typename T>
+struct default_digits10_impl<T,false,true> // Integer
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() { return 0; }
+};
+
+// default implementation of max_digits10(), based on numeric_limits if specialized,
+// 0 for integer types, and log10(2) * digits() + 1 otherwise.
+template< typename T,
+          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
+          bool is_integer = NumTraits<T>::IsInteger>
+struct default_max_digits10_impl
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() { return std::numeric_limits<T>::max_digits10; }
+};
+
+template<typename T>
+struct default_max_digits10_impl<T,false,false> // Floating point
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() {
+    using std::log10;
+    using std::ceil;
+    typedef typename NumTraits<T>::Real Real;
+    return int(ceil(internal::default_digits_impl<Real>::run()*log10(2)+1));
+  }
+};
+
+template<typename T>
+struct default_max_digits10_impl<T,false,true> // Integer
+{
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static int run() { return 0; }
+};
+
 } // end namespace internal
 
 namespace numext {
@@ -143,6 +172,9 @@
   * \li digits10() function returning the number of decimal digits that can be represented without change. This is
   *     the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
   *     which is used as the default implementation if specialized.
+  * \li max_digits10() function returning the number of decimal digits required to uniquely represent all distinct values of the type. This is
+  *     the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10">std::numeric_limits<T>::max_digits10</a>
+  *     which is used as the default implementation if specialized.
   * \li min_exponent() and max_exponent() functions returning the highest and lowest possible values, respectively,
   *     such that the radix raised to the power exponent-1 is a normalized floating-point number.  These are equivalent to
   *     <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/min_exponent">std::numeric_limits<T>::min_exponent</a>/
@@ -181,6 +213,12 @@
   }
 
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static inline int max_digits10()
+  {
+    return internal::default_max_digits10_impl<T>::run();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
   static inline int digits()
   {
     return internal::default_digits_impl<T>::run();
@@ -282,6 +320,8 @@
   static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
   static inline int digits10() { return NumTraits<Real>::digits10(); }
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  static inline int max_digits10() { return NumTraits<Real>::max_digits10(); }
 };
 
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
@@ -312,6 +352,8 @@
 
   EIGEN_CONSTEXPR
   static inline int digits10() { return NumTraits<Scalar>::digits10(); }
+  EIGEN_CONSTEXPR
+  static inline int max_digits10() { return NumTraits<Scalar>::max_digits10(); }
 };
 
 template<> struct NumTraits<std::string>
@@ -326,8 +368,10 @@
 
   EIGEN_CONSTEXPR
   static inline int digits10() { return 0; }
+  EIGEN_CONSTEXPR
+  static inline int max_digits10() { return 0; }
 
-private:
+ private:
   static inline std::string epsilon();
   static inline std::string dummy_precision();
   static inline std::string lowest();
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index fc2e941..ba6e51d 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -28,21 +28,40 @@
 
 namespace internal {
 
-template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
+template <int MaxSizeAtCompileTime, int MaxRowsAtCompileTime, int MaxColsAtCompileTime>
+struct check_rows_cols_for_overflow {
+  EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime * MaxColsAtCompileTime == MaxSizeAtCompileTime,YOU MADE A PROGRAMMING MISTAKE)
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {}
 };
 
-template<> struct check_rows_cols_for_overflow<Dynamic> {
+template <int MaxRowsAtCompileTime>
+struct check_rows_cols_for_overflow<Dynamic, MaxRowsAtCompileTime, Dynamic> {
+  template <typename Index>
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) {
+    constexpr Index MaxIndex = NumTraits<Index>::highest();
+    bool error = cols > MaxIndex / MaxRowsAtCompileTime;
+    if (error) throw_std_bad_alloc();
+  }
+};
+
+template <int MaxColsAtCompileTime>
+struct check_rows_cols_for_overflow<Dynamic, Dynamic, MaxColsAtCompileTime> {
+  template <typename Index>
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) {
+    constexpr Index MaxIndex = NumTraits<Index>::highest();
+    bool error = rows > MaxIndex / MaxColsAtCompileTime;
+    if (error) throw_std_bad_alloc();
+  }
+};
+
+template <>
+struct check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic> {
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
-    // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
-    // we assume Index is signed
-    Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
-    bool error = (rows == 0 || cols == 0) ? false
-               : (rows > max_index / cols);
-    if (error)
-      throw_std_bad_alloc();
+    constexpr Index MaxIndex = NumTraits<Index>::highest();
+    bool error = cols == 0 ? false : (rows > MaxIndex / cols);
+    if (error) throw_std_bad_alloc();
   }
 };
 
@@ -268,7 +287,7 @@
                    && internal::check_implication(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic, rows<=MaxRowsAtCompileTime)
                    && internal::check_implication(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic, cols<=MaxColsAtCompileTime)
                    && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(rows, cols);
       #ifdef EIGEN_INITIALIZE_COEFFS
         Index size = rows*cols;
         bool size_changed = size != this->size();
@@ -340,7 +359,7 @@
     EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
     {
       const OtherDerived& other = _other.derived();
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(other.rows(), other.cols());
       const Index othersize = other.rows()*other.cols();
       if(RowsAtCompileTime == 1)
       {
@@ -775,11 +794,9 @@
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, std::enable_if_t<Base::SizeAtCompileTime!=2,T0>* = 0)
     {
-      const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
-      const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;
-      EIGEN_STATIC_ASSERT(t0_is_integer_alike &&
-                          t1_is_integer_alike,
-                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(internal::is_valid_index_type<T0>::value &&
+                          internal::is_valid_index_type<T1>::value,
+                          T0 AND T1 MUST BE INTEGER TYPES)
       resize(rows,cols);
     }
 
@@ -967,7 +984,7 @@
           && (( Derived::IsRowMajor && _this.cols() == cols) ||  // row-major and we change only the number of rows
               (!Derived::IsRowMajor && _this.rows() == rows) ))  // column-major and we change only the number of columns
     {
-      internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+      internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime, Derived::MaxRowsAtCompileTime, Derived::MaxColsAtCompileTime>::run(rows, cols);
       _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
     }
     else
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index e347a0f..32d51d2 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -334,14 +334,14 @@
     typedef internal::traits<Ref> Traits;
 
     static constexpr bool may_map_m_object_successfully = 
-      (StrideType::InnerStrideAtCompileTime == 0 ||
-       StrideType::InnerStrideAtCompileTime == 1 ||
-       StrideType::InnerStrideAtCompileTime == Dynamic) &&
+      (static_cast<int>(StrideType::InnerStrideAtCompileTime) == 0 ||
+       static_cast<int>(StrideType::InnerStrideAtCompileTime) == 1 ||
+       static_cast<int>(StrideType::InnerStrideAtCompileTime) == Dynamic) &&
       (TPlainObjectType::IsVectorAtCompileTime ||
-       StrideType::OuterStrideAtCompileTime == 0 ||
-       StrideType::OuterStrideAtCompileTime == Dynamic ||
-       StrideType::OuterStrideAtCompileTime == TPlainObjectType::InnerSizeAtCompileTime ||
-       TPlainObjectType::InnerSizeAtCompileTime == Dynamic);
+       static_cast<int>(StrideType::OuterStrideAtCompileTime) == 0 ||
+       static_cast<int>(StrideType::OuterStrideAtCompileTime) == Dynamic ||
+       static_cast<int>(StrideType::OuterStrideAtCompileTime) == static_cast<int>(TPlainObjectType::InnerSizeAtCompileTime) ||
+       static_cast<int>(TPlainObjectType::InnerSizeAtCompileTime) == Dynamic);
   public:
 
     typedef RefBase<Ref> Base;
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index ee28da1..2715a1e 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -69,8 +69,7 @@
   public:
     EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
     EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock)
-
-    using Base::operator=;
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(VectorBlock)
 
     /** Dynamic-size constructor
       */
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 30eca82..9bbbc13 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -1847,7 +1847,7 @@
 
 EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
 #ifdef EIGEN_HAS_FP16_C
-  return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
+  return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT);
 #else
   __m128i lo = float2half(_mm256_extractf128_ps(a, 0));
   __m128i hi = float2half(_mm256_extractf128_ps(a, 1));
diff --git a/Eigen/src/Core/arch/AVX512/TrsmKernel.h b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
index 8fbf161..714afac 100644
--- a/Eigen/src/Core/arch/AVX512/TrsmKernel.h
+++ b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
@@ -16,6 +16,13 @@
 #define EIGEN_USE_AVX512_TRSM_KERNELS 1
 #endif
 
+// TRSM kernels currently unconditionally rely on malloc with AVX512.
+// Disable them if malloc is explicitly disabled at compile-time.
+#ifdef EIGEN_NO_MALLOC
+#undef EIGEN_USE_AVX512_TRSM_KERNELS
+#define EIGEN_USE_AVX512_TRSM_KERNELS 0
+#endif
+
 #if EIGEN_USE_AVX512_TRSM_KERNELS
 #if !defined(EIGEN_USE_AVX512_TRSM_R_KERNELS)
 #define EIGEN_USE_AVX512_TRSM_R_KERNELS 1
@@ -860,32 +867,6 @@
   }
 }
 
-#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
-/**
- * Reduce blocking sizes so that the size of the temporary workspace needed is less than "limit" bytes,
- *  - kB must be at least psize
- *  - numM must be at least EIGEN_AVX_MAX_NUM_ROW
- */
-template <typename Scalar, bool isBRowMajor>
-constexpr std::pair<int64_t, int64_t> trsmBlocking(const int64_t limit) {
-  constexpr int64_t psize = packet_traits<Scalar>::size;
-  int64_t kB = 15 * psize;
-  int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW;
-  // If B is rowmajor, no temp workspace needed, so use default blocking sizes.
-  if (isBRowMajor) return {kB, numM};
-
-  // Very simple heuristic, prefer keeping kB as large as possible to fully use vector registers.
-  for (int64_t k = kB; k > psize; k -= psize) {
-    for (int64_t m = numM; m > EIGEN_AVX_MAX_NUM_ROW; m -= EIGEN_AVX_MAX_NUM_ROW) {
-      if ((((k + psize - 1) / psize + 4) * psize) * m * sizeof(Scalar) < limit) {
-        return {k, m};
-      }
-    }
-  }
-  return {psize, EIGEN_AVX_MAX_NUM_ROW};  // Minimum blocking size required
-}
-#endif  // (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
-
 /**
  * Main triangular solve driver
  *
@@ -930,30 +911,8 @@
    * large enough to allow GEMM updates to have larger "K"s (see below.) No benchmarking has been done so far to
    * determine optimal values for numM.
    */
-#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
-  /**
-   * If EIGEN_NO_MALLOC is requested, we try to reduce kB and numM so the maximum temp workspace required is less
-   * than EIGEN_STACK_ALLOCATION_LIMIT. Actual workspace size may be less, depending on the number of vectors to
-   * solve.
-   *  - kB must be at least psize
-   *  - numM must be at least EIGEN_AVX_MAX_NUM_ROW
-   *
-   * If B is row-major, the blocking sizes are not reduced (no temp workspace needed).
-   */
-  constexpr std::pair<int64_t, int64_t> blocking_ = trsmBlocking<Scalar, isBRowMajor>(EIGEN_STACK_ALLOCATION_LIMIT);
-  constexpr int64_t kB = blocking_.first;
-  constexpr int64_t numM = blocking_.second;
-  /**
-   * If the temp workspace size exceeds EIGEN_STACK_ALLOCATION_LIMIT even with the minimum blocking sizes,
-   * we throw an assertion. Use -DEIGEN_USE_AVX512_TRSM_L_KERNELS=0 if necessary
-   */
-  static_assert(!(((((kB + psize - 1) / psize + 4) * psize) * numM * sizeof(Scalar) >= EIGEN_STACK_ALLOCATION_LIMIT) &&
-                  !isBRowMajor),
-                "Temp workspace required is too large.");
-#else
   constexpr int64_t kB = (3 * psize) * 5;  // 5*U3
   constexpr int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW;
-#endif
 
   int64_t sizeBTemp = 0;
   Scalar *B_temp = NULL;
@@ -966,13 +925,7 @@
     sizeBTemp = (((std::min(kB, numRHS) + psize - 1) / psize + 4) * psize) * numM;
   }
 
-#if !defined(EIGEN_NO_MALLOC)
   EIGEN_IF_CONSTEXPR(!isBRowMajor) B_temp = (Scalar *)handmade_aligned_malloc(sizeof(Scalar) * sizeBTemp, 64);
-#elif (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC)
-  // Use alloca if malloc not allowed, requested temp workspace size should be less than EIGEN_STACK_ALLOCATION_LIMIT
-  ei_declare_aligned_stack_constructed_variable(Scalar, B_temp_alloca, sizeBTemp, 0);
-  B_temp = B_temp_alloca;
-#endif
 
   for (int64_t k = 0; k < numRHS; k += kB) {
     int64_t bK = numRHS - k > kB ? kB : numRHS - k;
@@ -1102,43 +1055,55 @@
     }
   }
 
-#if !defined(EIGEN_NO_MALLOC)
   EIGEN_IF_CONSTEXPR(!isBRowMajor) handmade_aligned_free(B_temp);
-#endif
 }
 
 // Template specializations of trsmKernelL/R for float/double and inner strides of 1.
 #if (EIGEN_USE_AVX512_TRSM_KERNELS)
 #if (EIGEN_USE_AVX512_TRSM_R_KERNELS)
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized>
 struct trsmKernelR;
 
 template <typename Index, int Mode, int TriStorageOrder>
-struct trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1> {
+struct trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, true> {
   static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
                      Index otherStride);
 };
 
 template <typename Index, int Mode, int TriStorageOrder>
-struct trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1> {
+struct trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, true> {
   static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
                      Index otherStride);
 };
 
 template <typename Index, int Mode, int TriStorageOrder>
-EIGEN_DONT_INLINE void trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1>::kernel(
+EIGEN_DONT_INLINE void trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, true>::kernel(
     Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
     Index otherStride) {
   EIGEN_UNUSED_VARIABLE(otherIncr);
+#ifdef EIGEN_NO_RUNTIME_MALLOC
+  if (!is_malloc_allowed()) {
+    trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel(
+          size, otherSize, _tri, triStride, _other, otherIncr, otherStride);
+    return;
+  }
+#endif
   triSolve<float, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>(
       const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride);
 }
 
 template <typename Index, int Mode, int TriStorageOrder>
-EIGEN_DONT_INLINE void trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1>::kernel(
+EIGEN_DONT_INLINE void trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, true>::kernel(
     Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
     Index otherStride) {
   EIGEN_UNUSED_VARIABLE(otherIncr);
+#ifdef EIGEN_NO_RUNTIME_MALLOC
+  if (!is_malloc_allowed()) {
+    trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel(
+          size, otherSize, _tri, triStride, _other, otherIncr, otherStride);
+    return;
+  }
+#endif
   triSolve<double, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>(
       const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride);
 }
@@ -1146,35 +1111,49 @@
 
 // These trsm kernels require temporary memory allocation
 #if (EIGEN_USE_AVX512_TRSM_L_KERNELS)
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized = true>
 struct trsmKernelL;
 
 template <typename Index, int Mode, int TriStorageOrder>
-struct trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1> {
+struct trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, true> {
   static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
                      Index otherStride);
 };
 
 template <typename Index, int Mode, int TriStorageOrder>
-struct trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1> {
+struct trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, true> {
   static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
                      Index otherStride);
 };
 
 template <typename Index, int Mode, int TriStorageOrder>
-EIGEN_DONT_INLINE void trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1>::kernel(
+EIGEN_DONT_INLINE void trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, true>::kernel(
     Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr,
     Index otherStride) {
   EIGEN_UNUSED_VARIABLE(otherIncr);
+#ifdef EIGEN_NO_RUNTIME_MALLOC
+  if (!is_malloc_allowed()) {
+    trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel(
+          size, otherSize, _tri, triStride, _other, otherIncr, otherStride);
+    return;
+  }
+#endif
   triSolve<float, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>(
       const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride);
 }
 
 template <typename Index, int Mode, int TriStorageOrder>
-EIGEN_DONT_INLINE void trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1>::kernel(
+EIGEN_DONT_INLINE void trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, true>::kernel(
     Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr,
     Index otherStride) {
   EIGEN_UNUSED_VARIABLE(otherIncr);
+#ifdef EIGEN_NO_RUNTIME_MALLOC
+  if (!is_malloc_allowed()) {
+    trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel(
+          size, otherSize, _tri, triStride, _other, otherIncr, otherStride);
+    return;
+  }
+#endif
   triSolve<double, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>(
       const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride);
 }
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index e86cc5b..1c5c048 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -15,8 +15,6 @@
 #define EIGEN_ALTIVEC_USE_CUSTOM_PACK    1
 #endif
 
-#include "MatrixProductCommon.h"
-
 #if !defined(EIGEN_ALTIVEC_DISABLE_MMA)
 #define EIGEN_ALTIVEC_DISABLE_MMA 0
 #endif
@@ -45,6 +43,8 @@
 
 #endif // EIGEN_ALTIVEC_MMA_SUPPORT
 
+#include "MatrixProductCommon.h"
+
 #if defined(EIGEN_ALTIVEC_MMA_ONLY) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
   #include "MatrixProductMMA.h"
 #endif
@@ -2713,12 +2713,10 @@
 {
 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
   return true;
-#else
-#if EIGEN_COMP_LLVM
-  return false;  // No dynamic dispatch for LLVM
-#else
+#elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) && defined(__BUILTIN_CPU_SUPPORTS__)
   return __builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma");
-#endif
+#else
+  return false;  // No dynamic dispatch for LLVM or older GCC
 #endif
 }
 
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
index cb18311..daed8c1 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
@@ -5,7 +5,7 @@
 #define EIGEN_POWER_PREFETCH(p)
 #endif
 
-#ifdef _ARCH_PWR9
+#if defined(_ARCH_PWR9) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
 #define USE_PARTIAL_PACKETS
 #endif
 
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
index 011d68e..5094118 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
@@ -394,10 +394,12 @@
     b0 = vec_mergeh(b0.m_val, b1.m_val);
   }
 
-  LhsMapper lhs2 = lhs.getSubMapper(0, j);
+  using LhsSubMapper = typename LhsMapper::SubMapper;
+
+  LhsSubMapper lhs2 = lhs.getSubMapper(0, j);
   BFLOAT16_UNROLL
   for(Index k = 0; k < num_acc; k += 2) {
-    loadVecLoop<num_acc, LhsMapper, zero>(k, lhs2, a0, b1);
+    loadVecLoop<num_acc, LhsSubMapper, zero>(k, lhs2, a0, b1);
   }
 
   multVec<num_acc>(quad_acc, a0, b0);
@@ -418,12 +420,14 @@
 
     zeroAccumulators<num_acc>(quad_acc);
 
-    LhsMapper lhs2 = lhs.getSubMapper(row, 0);
+    using LhsSubMapper = typename LhsMapper::SubMapper;
+
+    LhsSubMapper lhs2 = lhs.getSubMapper(row, 0);
     for(Index j = 0; j + 2 <= cend; j += 2) {
-      vecColLoop<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, quad_acc);
+      vecColLoop<num_acc, LhsSubMapper, RhsMapper, false, linear>(j, lhs2, rhs, quad_acc);
     }
     if (cend & 1) {
-      vecColLoop<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, quad_acc);
+      vecColLoop<num_acc, LhsSubMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, quad_acc);
     }
 
     disassembleAccumulators<num_acc>(quad_acc, acc);
@@ -490,6 +494,33 @@
   }
 }
 
+template<typename RhsMapper, typename LhsMapper, typename = void>
+struct UseMMAStride : std::false_type {
+  static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result)
+  {
+    using RhsSubMapper = typename RhsMapper::SubMapper;
+
+    RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0);
+    calcVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+  }
+};
+
+template<typename RhsMapper, typename LhsMapper>
+struct UseMMAStride<RhsMapper, LhsMapper, std::enable_if_t<std::is_member_function_pointer<
+                           decltype(&RhsMapper::stride)>::value>> : std::true_type {
+  static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result)
+  {
+    using RhsSubMapper = typename RhsMapper::SubMapper;
+
+    RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0);
+    if (rhs.stride() == 1) {
+      calcVecColLoops<LhsMapper, RhsSubMapper, true>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+    } else {
+      calcVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+    }
+  }
+};
+
 template<typename LhsMapper, typename RhsMapper>
 void gemvMMA_bfloat16_col(
   Index rows, Index cols,
@@ -498,8 +529,6 @@
   bfloat16* res, Index resIncr,
   bfloat16 alpha)
 {
-  typedef typename RhsMapper::LinearMapper LinearMapper;
-
   EIGEN_UNUSED_VARIABLE(resIncr);
   eigen_internal_assert(resIncr == 1);
 
@@ -523,14 +552,10 @@
   {
     Index jend = numext::mini(j2 + block_cols, cols);
 
-    LhsMapper lhs2 = lhs.getSubMapper(0, j2);
-    if (rhs.stride() == 1) {
-      LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0);
-      calcVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
-    } else {
-      RhsMapper rhs3 = rhs2.getSubMapper(j2, 0);
-      calcVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
-    }
+    using LhsSubMapper = typename LhsMapper::SubMapper;
+
+    LhsSubMapper lhs2 = lhs.getSubMapper(0, j2);
+    UseMMAStride<RhsMapper, LhsSubMapper>::run(j2, jend, rows, lhs2, rhs2, pAlpha, result);
   }
 
   convertArrayPointerF32toBF16(result, rows, res);
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
index b8e1ab6..9170230 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
@@ -527,7 +527,13 @@
   // linear == false
   static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper& rhs, Index j)
   {
-    return pgather<bfloat16, Packet8bf>(&rhs(j + 0, 0), rhs.stride());
+    const Index n = unpacket_traits<Packet8bf>::size;
+    EIGEN_ALIGN16 bfloat16 to[n];
+    LOAD_STORE_UNROLL_16
+    for (Index i = 0; i < n; i++) {
+      to[i] = rhs(j + i, 0);
+    }
+    return pload<Packet8bf>(to);
   }
 };
 
@@ -537,7 +543,7 @@
   // linear == true
   static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper& rhs, Index j)
   {
-    return rhs.template loadPacket<Packet8bf>(j + 0);
+    return rhs.template loadPacket<Packet8bf>(j + 0, 0);
   }
 };
 
@@ -558,9 +564,11 @@
     b0[1] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V2);
   }
 
-  LhsMapper lhs2 = lhs.getSubMapper(0, j);
+  using LhsSubMapper = typename LhsMapper::SubMapper;
+
+  LhsSubMapper lhs2 = lhs.getSubMapper(0, j);
   for(Index k = 0; k < num_acc; k += 2) {
-    loadVecLoopVSX<num_acc, LhsMapper, zero>(k, lhs2, a0);
+    loadVecLoopVSX<num_acc, LhsSubMapper, zero>(k, lhs2, a0);
   }
 
   multVecVSX<num_acc, zero>(acc, a0, b0);
@@ -589,12 +597,14 @@
 
     zeroAccumulators<num_acc, 2>(acc);
 
-    LhsMapper lhs2 = lhs.getSubMapper(row, 0);
+    using LhsSubMapper = typename LhsMapper::SubMapper;
+
+    LhsSubMapper lhs2 = lhs.getSubMapper(row, 0);
     for(Index j = 0; j + 2 <= cend; j += 2) {
-      vecColLoopVSX<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc);
+      vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc);
     }
     if (cend & 1) {
-      vecColLoopVSX<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc);
+      vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc);
     }
 
     addResultsVSX<num_acc>(acc);
@@ -716,6 +726,33 @@
   convertPointerF32toBF16VSX<1,inc>(i, result, rows, dst, resInc);
 }
 
+template<typename RhsMapper, typename LhsMapper, typename = void>
+struct UseStride : std::false_type {
+  static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result)
+  {
+    using RhsSubMapper = typename RhsMapper::SubMapper;
+
+    RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0);
+    calcVSXVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+  }
+};
+
+template<typename RhsMapper, typename LhsMapper>
+struct UseStride<RhsMapper, LhsMapper, std::enable_if_t<std::is_member_function_pointer<
+                           decltype(&RhsMapper::stride)>::value>> : std::true_type {
+  static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result)
+  {
+    using RhsSubMapper = typename RhsMapper::SubMapper;
+
+    RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0);
+    if (rhs.stride() == 1) {
+      calcVSXVecColLoops<LhsMapper, RhsSubMapper, true>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+    } else {
+      calcVSXVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result);
+    }
+  }
+};
+
 template<typename LhsMapper, typename RhsMapper>
 void gemv_bfloat16_col(
   Index rows, Index cols,
@@ -724,8 +761,6 @@
   bfloat16* res, Index resIncr,
   bfloat16 alpha)
 {
-  typedef typename RhsMapper::LinearMapper LinearMapper;
-
   EIGEN_UNUSED_VARIABLE(resIncr);
   eigen_internal_assert(resIncr == 1);
 
@@ -749,14 +784,10 @@
   {
     Index jend = numext::mini(j2 + block_cols, cols);
 
-    LhsMapper lhs2 = lhs.getSubMapper(0, j2);
-    if (rhs.stride() == 1) {
-      LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0);
-      calcVSXVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
-    } else {
-      RhsMapper rhs3 = rhs2.getSubMapper(j2, 0);
-      calcVSXVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
-    }
+    using LhsSubMapper = typename LhsMapper::SubMapper;
+
+    LhsSubMapper lhs2 = lhs.getSubMapper(0, j2);
+    UseStride<RhsMapper, LhsSubMapper>::run(j2, jend, rows, lhs2, rhs2, pAlpha, result);
   }
 
   convertArrayPointerF32toBF16VSX(result, rows, res);
@@ -1410,7 +1441,7 @@
 template<typename PResPacket, typename ResPacket, typename ResScalar, typename Scalar>
 struct alpha_store
 {
-    alpha_store<PResPacket, ResPacket, ResScalar, Scalar>(ResScalar& alpha) {
+    alpha_store(ResScalar& alpha) {
         separate.r = pset1_complex<Scalar, ResScalar, ResPacket, 0x3>(alpha);
         separate.i = pset1_complex<Scalar, ResScalar, ResPacket, 0x0>(alpha);
     }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index e52e3fb..638dea4 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -4003,6 +4003,8 @@
 
 template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq_f64(_x); }
 
+#endif // EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+
 // Do we have an fp16 types and supporting Neon intrinsics?
 #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
 typedef float16x4_t Packet4hf;
@@ -4651,8 +4653,6 @@
 }
 #endif // end EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
 
-#endif // EIGEN_ARCH_ARM64
-
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 7307994..e4c6f23 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -361,6 +361,10 @@
          HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
   };
 
+  const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize);
+  const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf);
+  const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter);
+
   Index i=0;
   for(; i<n8; i+=8)
   {
@@ -373,8 +377,7 @@
               c6 = pset1<ResPacket>(ResScalar(0)),
               c7 = pset1<ResPacket>(ResScalar(0));
 
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+    for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
     {
       RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
 
@@ -395,7 +398,8 @@
     ResScalar cc5 = predux(c5);
     ResScalar cc6 = predux(c6);
     ResScalar cc7 = predux(c7);
-    for(; j<cols; ++j)
+
+    for (Index j = fullColBlockEnd; j < cols; ++j)
     {
       RhsScalar b0 = rhs(j,0);
 
@@ -424,8 +428,7 @@
               c2 = pset1<ResPacket>(ResScalar(0)),
               c3 = pset1<ResPacket>(ResScalar(0));
 
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+    for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
     {
       RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
 
@@ -438,7 +441,8 @@
     ResScalar cc1 = predux(c1);
     ResScalar cc2 = predux(c2);
     ResScalar cc3 = predux(c3);
-    for(; j<cols; ++j)
+
+    for(Index j = fullColBlockEnd; j < cols; ++j)
     {
       RhsScalar b0 = rhs(j,0);
 
@@ -457,8 +461,7 @@
     ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
               c1 = pset1<ResPacket>(ResScalar(0));
 
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+    for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
     {
       RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
 
@@ -467,7 +470,8 @@
     }
     ResScalar cc0 = predux(c0);
     ResScalar cc1 = predux(c1);
-    for(; j<cols; ++j)
+
+    for(Index j = fullColBlockEnd; j < cols; ++j)
     {
       RhsScalar b0 = rhs(j,0);
 
@@ -482,15 +486,15 @@
     ResPacket c0 = pset1<ResPacket>(ResScalar(0));
     ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));
     ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+
+    for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
     {
       RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
       c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
     }
     ResScalar cc0 = predux(c0);
     if (HasHalf) {
-      for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf)
+      for (Index j = fullColBlockEnd; j < halfColBlockEnd; j += LhsPacketSizeHalf)
         {
           RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);
           c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);
@@ -498,14 +502,14 @@
       cc0 += predux(c0_h);
     }
     if (HasQuarter) {
-      for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter)
+      for (Index j = halfColBlockEnd; j < quarterColBlockEnd; j += LhsPacketSizeQuarter)
         {
           RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);
           c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);
         }
       cc0 += predux(c0_q);
     }
-    for(; j<cols; ++j)
+    for (Index j = quarterColBlockEnd; j < cols; ++j)
     {
       cc0 += cj.pmul(lhs(i,j), rhs(j,0));
     }
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 770107a..94eabdc 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -422,6 +422,12 @@
     internal::add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
     internal::add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
 
+    // Empty product, return early.  Otherwise, we get `nullptr` use errors below when we try to access
+    // coeffRef(0,0).
+    if (a_lhs.size() == 0 || a_rhs.size() == 0) {
+      return;
+    }
+
     LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
     RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
     Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index b148d9c..22b4a7f 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -17,7 +17,7 @@
 
 namespace internal {
 
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized>
 struct trsmKernelL {
   // Generic Implementation of triangular solve for triangular matrix on left and multiple rhs.
   // Handles non-packed matrices.
@@ -27,7 +27,7 @@
     Scalar* _other, Index otherIncr, Index otherStride);
 };
 
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized>
 struct trsmKernelR {
   // Generic Implementation of triangular solve for triangular matrix on right and multiple lhs.
   // Handles non-packed matrices.
@@ -37,8 +37,8 @@
     Scalar* _other, Index otherIncr, Index otherStride);
 };
 
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
-EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized>
+EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, Specialized>::kernel(
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherIncr, Index otherStride)
@@ -88,8 +88,8 @@
   }
 
 
-template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
-EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized>
+EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, Specialized>::kernel(
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherIncr, Index otherStride)
@@ -180,7 +180,7 @@
       // TODO: Investigate better heuristics for cutoffs.
       double L2Cap = 0.5; // 50% of L2 size
       if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) {
-        trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1>::kernel(
+        trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1, /*Specialized=*/true>::kernel(
           size, cols, _tri, triStride, _other, 1, otherStride);
         return;
       }
@@ -253,7 +253,7 @@
               i  = IsLower ? k2 + k1: k2 - k1 - actualPanelWidth;
             }
 #endif
-            trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel(
+            trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel(
               actualPanelWidth, actual_cols,
               _tri + i + (i)*triStride, triStride,
               _other + i*OtherInnerStride + j2*otherStride, otherIncr, otherStride);
@@ -327,7 +327,7 @@
       manage_caching_sizes(GetAction, &l1, &l2, &l3);
       double L2Cap = 0.5; // 50% of L2 size
       if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) {
-        trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::
+        trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::
           kernel(size, rows, _tri, triStride, _other, 1, otherStride);
         return;
       }
@@ -423,7 +423,7 @@
 
             {
               // unblocked triangular solve
-              trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::
+              trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::
                 kernel(actualPanelWidth, actual_mc,
                             _tri + absolute_j2 + absolute_j2*triStride, triStride,
                             _other + i2*OtherInnerStride + absolute_j2*otherStride, otherIncr, otherStride);
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 41fbe15..b0217a9 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -182,6 +182,7 @@
 {
 public:
   typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+  typedef blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> SubMapper;
   typedef BlasVectorMapper<Scalar, Index> VectorMapper;
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
@@ -191,9 +192,9 @@
     eigen_assert(incr==1);
   }
 
-  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE SubMapper
   getSubMapper(Index i, Index j) const {
-    return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+    return SubMapper(&operator()(i, j), m_stride);
   }
 
   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
@@ -316,12 +317,13 @@
 {
 public:
   typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
+  typedef blas_data_mapper SubMapper;
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
 
-  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE SubMapper
   getSubMapper(Index i, Index j) const {
-    return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
+    return SubMapper(&operator()(i, j), m_stride, m_incr.value());
   }
 
   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
@@ -448,10 +450,12 @@
 template<typename Scalar, typename Index, int StorageOrder>
 class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
   public:
+  typedef const_blas_data_mapper<Scalar, Index, StorageOrder> SubMapper;
+
   EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
 
-  EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
-    return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+  EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const {
+    return SubMapper(&(this->operator()(i, j)), this->m_stride);
   }
 };
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 0b867d5..0835880 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -623,7 +623,9 @@
 /// supports Neon vector intrinsics for fp16.
 #if EIGEN_ARCH_ARM_OR_ARM64
   #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
-    #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
+    // Clang only supports FP16 on aarch64, and not all intrinsics are available
+    // on A32 anyways even in GCC (e.g. vdiv_f16, vsqrt_f16).
+    #if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
       #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1
     #else
       #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0
@@ -635,7 +637,9 @@
 /// supports Neon scalar intrinsics for fp16.
 #if EIGEN_ARCH_ARM_OR_ARM64
   #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
-    #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
+    // Clang only supports FP16 on aarch64, and not all intrinsics are available
+    // on A32 anyways, even in GCC (e.g. vceqh_f16).
+    #if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
       #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1
     #endif
   #endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a192aac..ba5a563 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -430,10 +430,34 @@
   return (a+b-1) / b;
 }
 
+// Handle integer comparisons of different signedness.
+template <typename X, typename Y, bool XIsInteger = NumTraits<X>::IsInteger, bool XIsSigned = NumTraits<X>::IsSigned,
+          bool YIsInteger = NumTraits<Y>::IsInteger, bool YIsSigned = NumTraits<Y>::IsSigned>
+struct equal_strict_impl {
+  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) { return x == y; }
+};
+template <typename X, typename Y>
+struct equal_strict_impl<X, Y, true, false, true, true> {
+  // X is an unsigned integer
+  // Y is a signed integer
+  // if Y is non-negative, it may be represented exactly as its unsigned counterpart.
+  using UnsignedY = typename internal::make_unsigned<Y>::type;
+  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) {
+    return y < Y(0) ? false : (x == static_cast<UnsignedY>(y));
+  }
+};
+template <typename X, typename Y>
+struct equal_strict_impl<X, Y, true, true, true, false> {
+  // X is a signed integer
+  // Y is an unsigned integer
+  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) {
+    return equal_strict_impl<Y, X>::run(y, x);
+  }
+};
+
 // The aim of the following functions is to bypass -Wfloat-equal warnings
 // when we really want a strict equality comparison on floating points.
-template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
-bool equal_strict(const X& x,const Y& y) { return x == y; }
+template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const X& x, const Y& y) { return equal_strict_impl<X, Y>::run(x, y); }
 
 #if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
@@ -458,7 +482,7 @@
 bool is_exactly_one(const X& x) { return equal_strict(x, typename NumTraits<X>::Literal{1}); }
 
 template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
-bool not_equal_strict(const X& x,const Y& y) { return x != y; }
+bool not_equal_strict(const X& x,const Y& y) { return !equal_strict_impl<X, Y>::run(x, y); }
 
 #if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 49fa37c..d8ac1b3 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -17,12 +17,52 @@
 
 namespace internal {
 
-template<typename IndexDest, typename IndexSrc>
-EIGEN_DEVICE_FUNC
-inline IndexDest convert_index(const IndexSrc& idx) {
-  // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away:
-  eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type");
-  return IndexDest(idx);
+
+// useful for unsigned / signed integer comparisons when idx is intended to be non-negative
+template <typename IndexType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename make_unsigned<IndexType>::type returnUnsignedIndexValue(
+    const IndexType& idx) {
+  EIGEN_STATIC_ASSERT((NumTraits<IndexType>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+  eigen_internal_assert(idx >= 0 && "Index value is negative and target type is unsigned");
+  using UnsignedType = typename make_unsigned<IndexType>::type;
+  return static_cast<UnsignedType>(idx);
+}
+
+template <typename IndexDest, typename IndexSrc, 
+          bool IndexDestIsInteger = NumTraits<IndexDest>::IsInteger,
+          bool IndexDestIsSigned = NumTraits<IndexDest>::IsSigned,
+          bool IndexSrcIsInteger = NumTraits<IndexSrc>::IsInteger,
+          bool IndexSrcIsSigned = NumTraits<IndexSrc>::IsSigned>
+struct convert_index_impl {
+  static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) {
+    eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value is too big for target type");
+    return static_cast<IndexDest>(idx);
+  }
+};
+template <typename IndexDest, typename IndexSrc>
+struct convert_index_impl<IndexDest, IndexSrc, true, true, true, false> {
+  // IndexDest is a signed integer
+  // IndexSrc is an unsigned integer
+  static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) {
+    eigen_internal_assert(idx <= returnUnsignedIndexValue(NumTraits<IndexDest>::highest()) &&
+                          "Index value is too big for target type");
+    return static_cast<IndexDest>(idx);
+  }
+};
+template <typename IndexDest, typename IndexSrc>
+struct convert_index_impl<IndexDest, IndexSrc, true, false, true, true> {
+  // IndexDest is an unsigned integer
+  // IndexSrc is a signed integer
+  static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) {
+    eigen_internal_assert(returnUnsignedIndexValue(idx) <= NumTraits<IndexDest>::highest() &&
+                          "Index value is too big for target type");
+    return static_cast<IndexDest>(idx);
+  }
+};
+
+template <typename IndexDest, typename IndexSrc>
+EIGEN_DEVICE_FUNC inline IndexDest convert_index(const IndexSrc& idx) {
+  return convert_index_impl<IndexDest, IndexSrc>::run(idx);
 }
 
 // true if T can be considered as an integral index (i.e., and integral type or enum)
@@ -271,7 +311,9 @@
 }
 
 constexpr inline int size_at_compile_time(int rows, int cols) {
-  return (rows==Dynamic || cols==Dynamic) ? Dynamic : rows * cols;
+  if (rows == 0 || cols == 0) return 0;
+  if (rows == Dynamic || cols == Dynamic) return Dynamic;
+  return rows * cols;
 }
 
 template<typename XprType> struct size_of_xpr_at_compile_time
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 0aca4c4..3413a51 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -298,6 +298,15 @@
     */
   EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
 
+  /** Constructs and initializes a quaternion from its real part as a scalar,
+   *  and its imaginary part as a 3-vector [\c x, \c y, \c z]
+   */
+  template <typename Derived>
+  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Eigen::MatrixBase<Derived>& vec)
+      : m_coeffs(vec.x(), vec.y(), vec.z(), w) {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived, 3);
+  }
+
   /** Constructs and initialize a quaternion from the array data */
   EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
 
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index a69fdca..bb1d3db 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -107,6 +107,7 @@
 public:
   using Base::rows;
   using Base::cols;
+  using Base::diagSize;
   using Base::computeU;
   using Base::computeV;
 
@@ -270,7 +271,6 @@
   using Base::m_computationOptions;
   using Base::m_computeThinU;
   using Base::m_computeThinV;
-  using Base::m_diagSize;
   using Base::m_info;
   using Base::m_isInitialized;
   using Base::m_matrixU;
@@ -291,7 +291,7 @@
   if (cols < m_algoswap)
     internal::allocate_small_svd<MatrixType, ComputationOptions>::run(smallSvd, rows, cols, computationOptions);
 
-  m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
+  m_computed = MatrixXr::Zero(diagSize() + 1, diagSize() );
   m_compU = computeV();
   m_compV = computeU();
   m_isTranspose = (cols > rows);
@@ -307,20 +307,20 @@
   m_useQrDecomp = !disableQrDecomp && ((rows / kMinAspectRatio > cols) || (cols / kMinAspectRatio > rows));
   if (m_useQrDecomp) {
     qrDecomp = HouseholderQR<MatrixX>((std::max)(rows, cols), (std::min)(rows, cols));
-    reducedTriangle = MatrixX(m_diagSize, m_diagSize);
+    reducedTriangle = MatrixX(diagSize(), diagSize());
   }
 
   copyWorkspace = MatrixX(m_isTranspose ? cols : rows, m_isTranspose ? rows : cols);
-  bid = internal::UpperBidiagonalization<MatrixX>(m_useQrDecomp ? m_diagSize : copyWorkspace.rows(),
-                                                  m_useQrDecomp ? m_diagSize : copyWorkspace.cols());
+  bid = internal::UpperBidiagonalization<MatrixX>(m_useQrDecomp ? diagSize() : copyWorkspace.rows(),
+                                                  m_useQrDecomp ? diagSize() : copyWorkspace.cols());
 
-  if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
-  else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
+  if (m_compU) m_naiveU = MatrixXr::Zero(diagSize() + 1, diagSize() + 1 );
+  else         m_naiveU = MatrixXr::Zero(2, diagSize() + 1 );
 
-  if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
+  if (m_compV) m_naiveV = MatrixXr::Zero(diagSize(), diagSize());
 
-  m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
-  m_workspaceI.resize(3*m_diagSize);
+  m_workspace.resize((diagSize()+1)*(diagSize()+1)*3);
+  m_workspaceI.resize(3*diagSize());
 }  // end allocate
 
 template <typename MatrixType, int Options>
@@ -369,7 +369,7 @@
   // bidiagonalize the input matrix directly.
   if (m_useQrDecomp) {
     qrDecomp.compute(copyWorkspace);
-    reducedTriangle = qrDecomp.matrixQR().topRows(m_diagSize);
+    reducedTriangle = qrDecomp.matrixQR().topRows(diagSize());
     reducedTriangle.template triangularView<StrictlyLower>().setZero();
     bid.compute(reducedTriangle);
   } else {
@@ -380,26 +380,26 @@
   m_naiveU.setZero();
   m_naiveV.setZero();
   // FIXME this line involves a temporary matrix
-  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
+  m_computed.topRows(diagSize()) = bid.bidiagonal().toDenseMatrix().transpose();
   m_computed.template bottomRows<1>().setZero();
-  divide(0, m_diagSize - 1, 0, 0, 0);
+  divide(0, diagSize() - 1, 0, 0, 0);
   if (m_info != Success && m_info != NoConvergence) {
     m_isInitialized = true;
     return *this;
   }
 
   //**** step 3 - Copy singular values and vectors
-  for (int i=0; i<m_diagSize; i++)
+  for (int i=0; i<diagSize(); i++)
   {
     RealScalar a = abs(m_computed.coeff(i, i));
     m_singularValues.coeffRef(i) = a * scale;
     if (a<considerZero)
     {
       m_nonzeroSingularValues = i;
-      m_singularValues.tail(m_diagSize - i - 1).setZero();
+      m_singularValues.tail(diagSize() - i - 1).setZero();
       break;
     }
-    else if (i == m_diagSize - 1)
+    else if (i == diagSize() - 1)
     {
       m_nonzeroSingularValues = i + 1;
       break;
@@ -426,20 +426,20 @@
   // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
   if (computeU())
   {
-    Index Ucols = m_computeThinU ? m_diagSize : rows();
+    Index Ucols = m_computeThinU ? diagSize() : rows();
     m_matrixU = MatrixX::Identity(rows(), Ucols);
-    m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+    m_matrixU.topLeftCorner(diagSize(), diagSize()) = naiveV.template cast<Scalar>().topLeftCorner(diagSize(), diagSize());
     // FIXME the following conditionals involve temporary buffers
-    if (m_useQrDecomp) m_matrixU.topLeftCorner(householderU.cols(), m_diagSize).applyOnTheLeft(householderU);
+    if (m_useQrDecomp) m_matrixU.topLeftCorner(householderU.cols(), diagSize()).applyOnTheLeft(householderU);
     else m_matrixU.applyOnTheLeft(householderU);
   }
   if (computeV())
   {
-    Index Vcols = m_computeThinV ? m_diagSize : cols();
+    Index Vcols = m_computeThinV ? diagSize() : cols();
     m_matrixV = MatrixX::Identity(cols(), Vcols);
-    m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+    m_matrixV.topLeftCorner(diagSize(), diagSize()) = naiveU.template cast<Scalar>().topLeftCorner(diagSize(), diagSize());
     // FIXME the following conditionals involve temporary buffers
-    if (m_useQrDecomp) m_matrixV.topLeftCorner(householderV.cols(), m_diagSize).applyOnTheLeft(householderV);
+    if (m_useQrDecomp) m_matrixV.topLeftCorner(householderV.cols(), diagSize()).applyOnTheLeft(householderV);
     else m_matrixV.applyOnTheLeft(householderV);
   }
 }
diff --git a/Eigen/src/SVD/BDCSVD_LAPACKE.h b/Eigen/src/SVD/BDCSVD_LAPACKE.h
index d4cc173..9a1e843 100644
--- a/Eigen/src/SVD/BDCSVD_LAPACKE.h
+++ b/Eigen/src/SVD/BDCSVD_LAPACKE.h
@@ -67,8 +67,8 @@
     // prepare arguments to ?gesdd
     const lapack_int matrix_order = lapack_storage_of(matrix);
     const char jobz  = (SVD::m_computeFullU || SVD::m_computeFullV) ? 'A' : (SVD::m_computeThinU || SVD::m_computeThinV) ? 'S' : 'N';
-    const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::m_rows) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
-    const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::m_cols) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
+    const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::rows()) : (jobz == 'S') ? to_lapack(SVD::diagSize()) : 1;
+    const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::cols()) : (jobz == 'S') ? to_lapack(SVD::diagSize()) : 1;
     lapack_int ldu, ldvt;
     Scalar *u, *vt, dummy;
     MatrixType localU;
@@ -76,20 +76,20 @@
       ldu  = to_lapack(SVD::m_matrixU.outerStride());
       u    = SVD::m_matrixU.data();
     } else if (SVD::computeV()) {
-      localU.resize(SVD::m_rows, u_cols);
+      localU.resize(SVD::rows(), u_cols);
       ldu  = to_lapack(localU.outerStride());
       u    = localU.data();
     } else { ldu=1; u=&dummy; }
     MatrixType localV;
     if (SVD::computeU() || SVD::computeV()) {
-      localV.resize(vt_rows, SVD::m_cols);
+      localV.resize(vt_rows, SVD::cols());
       ldvt  = to_lapack(localV.outerStride());
       vt   = localV.data();
     } else { ldvt=1; vt=&dummy; }
     MatrixType temp; temp = matrix;
 
     // actual call to ?gesdd
-    lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::m_rows), to_lapack(SVD::m_cols),
+    lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::rows()), to_lapack(SVD::cols()),
                              to_lapack(temp.data()), to_lapack(temp.outerStride()), (RealScalar*)SVD::m_singularValues.data(),
                              to_lapack(u), ldu, to_lapack(vt), ldvt);
 
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 552dfd8..5fdb3dc 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -519,7 +519,7 @@
   typedef typename Base::Scalar Scalar;
   typedef typename Base::RealScalar RealScalar;
   typedef typename Base::Index Index;
-  enum {
+  enum : int {
     Options = Options_,
     QRPreconditioner = internal::get_qr_preconditioner(Options),
     RowsAtCompileTime = Base::RowsAtCompileTime,
@@ -625,6 +625,7 @@
   using Base::computeV;
   using Base::rows;
   using Base::cols;
+  using Base::diagSize;
   using Base::rank;
 
  private:
@@ -632,13 +633,11 @@
   JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
 
  protected:
-  using Base::m_cols;
   using Base::m_computationOptions;
   using Base::m_computeFullU;
   using Base::m_computeFullV;
   using Base::m_computeThinU;
   using Base::m_computeThinV;
-  using Base::m_diagSize;
   using Base::m_info;
   using Base::m_isAllocated;
   using Base::m_isInitialized;
@@ -646,7 +645,6 @@
   using Base::m_matrixV;
   using Base::m_nonzeroSingularValues;
   using Base::m_prescribedThreshold;
-  using Base::m_rows;
   using Base::m_singularValues;
   using Base::m_usePrescribedThreshold;
   using Base::ShouldComputeThinU;
@@ -671,18 +669,18 @@
 };
 
 template <typename MatrixType, int Options>
-void JacobiSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int computationOptions) {
-  if (Base::allocate(rows, cols, computationOptions)) return;
+void JacobiSVD<MatrixType, Options>::allocate(Index rows_, Index cols_, unsigned int computationOptions_) {
+  if (Base::allocate(rows_, cols_, computationOptions_)) return;
 
   eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
                !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
                "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
                "Use the ColPivHouseholderQR preconditioner instead.");
 
-  m_workMatrix.resize(m_diagSize, m_diagSize);
-  if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);
-  if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);
-  if(m_rows!=m_cols)  m_scaledMatrix.resize(rows,cols);
+  m_workMatrix.resize(diagSize(), diagSize());
+  if(cols()>rows())   m_qr_precond_morecols.allocate(*this);
+  if(rows()>cols())   m_qr_precond_morerows.allocate(*this);
+  if(rows()!=cols())  m_scaledMatrix.resize(rows(),cols());
 }
 
 template <typename MatrixType, int Options>
@@ -711,7 +709,7 @@
   
   /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
 
-  if(m_rows!=m_cols)
+  if(rows() != cols())
   {
     m_scaledMatrix = matrix / scale;
     m_qr_precond_morecols.run(*this, m_scaledMatrix);
@@ -719,11 +717,11 @@
   }
   else
   {
-    m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale;
-    if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows);
-    if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize);
-    if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols);
-    if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize);
+    m_workMatrix = matrix.template topLeftCorner<DiagSizeAtCompileTime,DiagSizeAtCompileTime>(diagSize(),diagSize()) / scale;
+    if(m_computeFullU) m_matrixU.setIdentity(rows(),rows());
+    if(m_computeThinU) m_matrixU.setIdentity(rows(),diagSize());
+    if(m_computeFullV) m_matrixV.setIdentity(cols(),cols());
+    if(m_computeThinV) m_matrixV.setIdentity(cols(),diagSize());
   }
 
   /*** step 2. The main Jacobi SVD iteration. ***/
@@ -736,7 +734,7 @@
 
     // do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix
 
-    for(Index p = 1; p < m_diagSize; ++p)
+    for(Index p = 1; p < diagSize(); ++p)
     {
       for(Index q = 0; q < p; ++q)
       {
@@ -771,7 +769,7 @@
 
   /*** step 3. The work matrix is now diagonal, so ensure it's positive so its diagonal entries are the singular values ***/
 
-  for(Index i = 0; i < m_diagSize; ++i)
+  for(Index i = 0; i < diagSize(); ++i)
   {
     // For a complex matrix, some diagonal coefficients might note have been
     // treated by svd_precondition_2x2_block_to_be_real, and the imaginary part
@@ -795,11 +793,11 @@
 
   /*** step 4. Sort singular values in descending order and compute the number of nonzero singular values ***/
 
-  m_nonzeroSingularValues = m_diagSize;
-  for(Index i = 0; i < m_diagSize; i++)
+  m_nonzeroSingularValues = diagSize();
+  for(Index i = 0; i < diagSize(); i++)
   {
     Index pos;
-    RealScalar maxRemainingSingularValue = m_singularValues.tail(m_diagSize-i).maxCoeff(&pos);
+    RealScalar maxRemainingSingularValue = m_singularValues.tail(diagSize()-i).maxCoeff(&pos);
     if(numext::is_exactly_zero(maxRemainingSingularValue))
     {
       m_nonzeroSingularValues = i;
diff --git a/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
index 93244cd..11247b1 100644
--- a/Eigen/src/SVD/JacobiSVD_LAPACKE.h
+++ b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -51,7 +51,7 @@
   allocate(matrix.rows(), matrix.cols(), computationOptions); \
 \
   /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
-  m_nonzeroSingularValues = m_diagSize; \
+  m_nonzeroSingularValues = diagSize(); \
 \
   lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \
   lapack_int matrix_order = LAPACKE_COLROW; \
@@ -64,15 +64,15 @@
     u    = (LAPACKE_TYPE*)m_matrixU.data(); \
   } else { ldu=1; u=&dummy; }\
   MatrixType localV; \
-  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
+  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(cols()) : (m_computeThinV) ? internal::convert_index<lapack_int>(diagSize()) : 1; \
   if (computeV()) { \
-    localV.resize(vt_rows, m_cols); \
+    localV.resize(vt_rows, cols()); \
     ldvt  = internal::convert_index<lapack_int>(localV.outerStride()); \
     vt   = (LAPACKE_TYPE*)localV.data(); \
   } else { ldvt=1; vt=&dummy; }\
-  Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
+  Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(diagSize(), 1); \
   MatrixType m_temp; m_temp = matrix; \
-  lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
+  lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(rows()), internal::convert_index<lapack_int>(cols()), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
   /* Check the result of the LAPACK call */ \
   if (info < 0 || !m_singularValues.allFinite()) { \
     m_info = InvalidInput; \
@@ -82,7 +82,7 @@
     m_info = Success; \
     if (computeV()) m_matrixV = localV.adjoint(); \
   } \
- /* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
+ /* for(int i=0;i<diagSize();i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
   m_isInitialized = true; \
   return *this; \
 }
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index 10a6d30..cb018cc 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -283,8 +283,9 @@
   /** \returns true if \a V (full or thin) is asked for in this SVD decomposition */
   inline bool computeV() const { return m_computeFullV || m_computeThinV; }
 
-  inline Index rows() const { return m_rows; }
-  inline Index cols() const { return m_cols; }
+  inline Index rows() const { return m_rows.value(); }
+  inline Index cols() const { return m_cols.value(); }
+  inline Index diagSize() const { return m_diagSize.value(); }
   
   #ifdef EIGEN_PARSED_BY_DOXYGEN
   /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
@@ -348,7 +349,10 @@
   bool m_computeFullU, m_computeThinU;
   bool m_computeFullV, m_computeThinV;
   unsigned int m_computationOptions;
-  Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;
+  Index m_nonzeroSingularValues;
+  internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+  internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+  internal::variable_if_dynamic<Index, DiagSizeAtCompileTime> m_diagSize;
   RealScalar m_prescribedThreshold;
 
   /** \brief Default Constructor.
@@ -368,9 +372,11 @@
         m_computeFullV(false),
         m_computeThinV(false),
         m_computationOptions(0),
-        m_rows(-1),
-        m_cols(-1),
-        m_diagSize(0) {}
+        m_nonzeroSingularValues(0),
+        m_rows(RowsAtCompileTime),
+        m_cols(ColsAtCompileTime),
+        m_diagSize(DiagSizeAtCompileTime),
+        m_prescribedThreshold(0) {}
 };
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -409,15 +415,15 @@
   eigen_assert(rows >= 0 && cols >= 0);
 
   if (m_isAllocated &&
-      rows == m_rows &&
-      cols == m_cols &&
+      rows == m_rows.value() &&
+      cols == m_cols.value() &&
       computationOptions == m_computationOptions)
   {
     return true;
   }
 
-  m_rows = rows;
-  m_cols = cols;
+  m_rows.setValue(rows);
+  m_cols.setValue(cols);
   m_info = Success;
   m_isInitialized = false;
   m_isAllocated = true;
@@ -430,12 +436,12 @@
   eigen_assert(!(m_computeFullU && m_computeThinU) && "SVDBase: you can't ask for both full and thin U");
   eigen_assert(!(m_computeFullV && m_computeThinV) && "SVDBase: you can't ask for both full and thin V");
 
-  m_diagSize = (std::min)(m_rows, m_cols);
-  m_singularValues.resize(m_diagSize);
+  m_diagSize.setValue(numext::mini(m_rows.value(), m_cols.value()));
+  m_singularValues.resize(m_diagSize.value());
   if(RowsAtCompileTime==Dynamic)
-    m_matrixU.resize(m_rows, m_computeFullU ? m_rows : m_computeThinU ? m_diagSize : 0);
+    m_matrixU.resize(m_rows.value(), m_computeFullU ? m_rows.value() : m_computeThinU ? m_diagSize.value() : 0);
   if(ColsAtCompileTime==Dynamic)
-    m_matrixV.resize(m_cols, m_computeFullV ? m_cols : m_computeThinV ? m_diagSize : 0);
+    m_matrixV.resize(m_cols.value(), m_computeFullV ? m_cols.value() : m_computeThinV ? m_diagSize.value() : 0);
 
   return false;
 }
diff --git a/Eigen/src/ThreadPool/EventCount.h b/Eigen/src/ThreadPool/EventCount.h
index 034637c..3b57d27 100644
--- a/Eigen/src/ThreadPool/EventCount.h
+++ b/Eigen/src/ThreadPool/EventCount.h
@@ -230,7 +230,7 @@
   void Unpark(Waiter* w) {
     for (Waiter* next; w; w = next) {
       uint64_t wnext = w->next.load(std::memory_order_relaxed) & kStackMask;
-      next = wnext == kStackMask ? nullptr : &waiters_[wnext];
+      next = wnext == kStackMask ? nullptr : &waiters_[internal::convert_index<size_t>(wnext)];
       unsigned state;
       {
         EIGEN_MUTEX_LOCK lock(w->mu);
diff --git a/Eigen/src/plugins/IndexedViewMethods.h b/Eigen/src/plugins/IndexedViewMethods.h
index 78f12fe..e5432ea 100644
--- a/Eigen/src/plugins/IndexedViewMethods.h
+++ b/Eigen/src/plugins/IndexedViewMethods.h
@@ -37,7 +37,7 @@
 }
 
 template <typename Indices>
-inline IvcColType<Indices> ivcSize(const Indices& indices) const {
+inline IvcType<Indices> ivcSize(const Indices& indices) const {
   return internal::makeIndexedViewCompatible(
       indices, internal::variable_if_dynamic<Index, SizeAtCompileTime>(derived().size()), Specialized);
 }
diff --git a/doc/CustomizingEigen_CustomScalar.dox b/doc/CustomizingEigen_CustomScalar.dox
index 24e5f56..672cfc3 100644
--- a/doc/CustomizingEigen_CustomScalar.dox
+++ b/doc/CustomizingEigen_CustomScalar.dox
@@ -76,6 +76,7 @@
     static inline Real epsilon() { return 0; }
     static inline Real dummy_precision() { return 0; }
     static inline int digits10() { return 0; }
+    static inline int max_digits10() { return 0; }
 
     enum {
       IsInteger = 0,
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 1648c7c..d06fa2c 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -87,21 +87,21 @@
 template <typename Scalar, typename Fn, typename RefFn>
 void binary_op_test(std::string name, Fn fun, RefFn ref) {
   const Scalar tol = test_precision<Scalar>();
-  Array<Scalar, Dynamic, Dynamic> x;
-  Array<Scalar, Dynamic, Dynamic> y;
-  special_value_pairs(x, y);
+  Array<Scalar, Dynamic, Dynamic> lhs;
+  Array<Scalar, Dynamic, Dynamic> rhs;
+  special_value_pairs(lhs, rhs);
 
-  Array<Scalar, Dynamic, Dynamic> actual = fun(x, y);
+  Array<Scalar, Dynamic, Dynamic> actual = fun(lhs, rhs);
   bool all_pass = true;
-  for (Index i = 0; i < x.rows(); ++i) {
-    for (Index j = 0; j < x.cols(); ++j) {
-      Scalar e = static_cast<Scalar>(ref(x(i,j), y(i,j)));
+  for (Index i = 0; i < lhs.rows(); ++i) {
+    for (Index j = 0; j < lhs.cols(); ++j) {
+      Scalar e = static_cast<Scalar>(ref(lhs(i,j), rhs(i,j)));
       Scalar a = actual(i, j);
       bool success = (a==e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
       if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a);
       all_pass &= success;
       if (!success) {
-        std::cout << name << "(" << x(i,j) << "," << y(i,j) << ") = " << a << " !=  " << e << std::endl;
+        std::cout << name << "(" << lhs(i,j) << "," << rhs(i,j) << ") = " << a << " !=  " << e << std::endl;
       }
     }
   }
@@ -139,27 +139,27 @@
 void unary_op_test(std::string name, Fn fun, RefFn ref) {
   const Scalar tol = test_precision<Scalar>();
   auto values = special_values<Scalar>();
-  Map<Array<Scalar, Dynamic, 1>> x(values.data(), values.size());
+  Map<Array<Scalar, Dynamic, 1>> valuesMap(values.data(), values.size());
 
-  Array<Scalar, Dynamic, Dynamic> actual = fun(x);
+  Array<Scalar, Dynamic, Dynamic> actual = fun(valuesMap);
   bool all_pass = true;
-  for (Index i = 0; i < x.size(); ++i) {
-    Scalar e = static_cast<Scalar>(ref(x(i)));
+  for (Index i = 0; i < valuesMap.size(); ++i) {
+    Scalar e = static_cast<Scalar>(ref(valuesMap(i)));
     Scalar a = actual(i);
     bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
                    ((numext::isnan)(a) && (numext::isnan)(e));
     if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a);
     all_pass &= success;
     if (!success) {
-      std::cout << name << "(" << x(i) << ") = " << a << " !=  " << e << std::endl;
+      std::cout << name << "(" << valuesMap(i) << ") = " << a << " !=  " << e << std::endl;
     }
   }
   VERIFY(all_pass);
 }
 
 #define UNARY_FUNCTOR_TEST_ARGS(fun) #fun, \
-      [](const auto& x) { return (Eigen::fun)(x); },    \
-      [](const auto& x) { return (std::fun)(x); }
+      [](const auto& x_) { return (Eigen::fun)(x_); },    \
+      [](const auto& y_) { return (std::fun)(y_); }
 
 template <typename Scalar>
 void unary_ops_test() {
@@ -267,8 +267,15 @@
             #ifdef EIGEN_COMP_MSVC
             // Work around MSVC return value on underflow.
             // if std::pow returns 0 and Eigen returns a denormalized value, then skip the test
-            int fpclass = std::fpclassify(a);
-            if (e == Base(0) && fpclass == FP_SUBNORMAL) continue;
+            int eigen_fpclass = std::fpclassify(a);
+            if (e == Base(0) && eigen_fpclass == FP_SUBNORMAL) continue;
+            #endif
+
+            #ifdef EIGEN_VECTORIZE_NEON
+            // Work around NEON flush-to-zero mode
+            // if std::pow returns denormalized value and Eigen returns 0, then skip the test
+            int ref_fpclass = std::fpclassify(e);
+            if (a == Base(0) && ref_fpclass == FP_SUBNORMAL) continue;
             #endif
 
             bool both_nan = (numext::isnan)(a) && (numext::isnan)(e);
@@ -330,10 +337,6 @@
     for (Base a : y) {
       Base e = ref_pow<Base, Exponent>::run(base, exponent);
       bool pass = (a == e);
-      //if (!NumTraits<Base>::IsInteger) {
-      //  pass = pass || (((numext::isfinite)(e) && internal::isApprox(a, e)) ||
-      //                  ((numext::isnan)(a) && (numext::isnan)(e)));
-      //}
       all_pass &= pass;
       if (!pass) {
         std::cout << "pow(" << base << "," << exponent << ")   =   " << a << " !=  " << e << std::endl;
@@ -442,12 +445,6 @@
   signbit_test<double>();
   signbit_test<Eigen::half>();
   signbit_test<Eigen::bfloat16>();
-
-  signbit_test<uint8_t>();
-  signbit_test<uint16_t>();
-  signbit_test<uint32_t>();
-  signbit_test<uint64_t>();
-
   signbit_test<int8_t>();
   signbit_test<int16_t>();
   signbit_test<int32_t>();
@@ -1324,67 +1321,62 @@
     CALL_SUBTEST_2( array_generic(Array22f()) );
     CALL_SUBTEST_3( array_generic(Array44d()) );
     CALL_SUBTEST_4( array_generic(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_5( array_generic(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_6( array_generic(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_6( array_generic(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_7( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_7( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_8( array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_8( array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_7( array_generic(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_8( array_generic(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_7( array_generic(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_8( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_9( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_10( array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_11( array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
   }
   for(int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1( comparisons(Array<float, 1, 1>()) );
     CALL_SUBTEST_2( comparisons(Array22f()) );
     CALL_SUBTEST_3( comparisons(Array44d()) );
-    CALL_SUBTEST_5( comparisons(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_6( comparisons(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_7( comparisons(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_8( comparisons(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
   }
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1( min_max(Array<float, 1, 1>()) );
-    CALL_SUBTEST_2( min_max(Array22f()) );
-    CALL_SUBTEST_3( min_max(Array44d()) );
-    CALL_SUBTEST_5( min_max(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_6( min_max(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_6( min_max(Array<float, 1, 1>()) );
+    CALL_SUBTEST_7( min_max(Array22f()) );
+    CALL_SUBTEST_8( min_max(Array44d()) );
+    CALL_SUBTEST_9( min_max(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_10( min_max(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
   }
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1( array_real(Array<float, 1, 1>()) );
-    CALL_SUBTEST_2( array_real(Array22f()) );
-    CALL_SUBTEST_3( array_real(Array44d()) );
-    CALL_SUBTEST_5( array_real(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_7( array_real(Array<Eigen::half, 32, 32>()) );
-    CALL_SUBTEST_8( array_real(Array<Eigen::bfloat16, 32, 32>()) );
+    CALL_SUBTEST_11( array_real(Array<float, 1, 1>()) );
+    CALL_SUBTEST_12( array_real(Array22f()) );
+    CALL_SUBTEST_13( array_real(Array44d()) );
+    CALL_SUBTEST_14( array_real(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_15( array_real(Array<Eigen::half, 32, 32>()) );
+    CALL_SUBTEST_16( array_real(Array<Eigen::bfloat16, 32, 32>()) );
   }
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_4( array_complex(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_5( array_complex(ArrayXXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_17( array_complex(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_18( array_complex(ArrayXXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))));
   }
 
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_5( float_pow_test() );
-    CALL_SUBTEST_6( int_pow_test() );
-    CALL_SUBTEST_7( mixed_pow_test() );
-    CALL_SUBTEST_8( signbit_tests() );
+    CALL_SUBTEST_19( float_pow_test() );
+    CALL_SUBTEST_20( int_pow_test() );
+    CALL_SUBTEST_21( mixed_pow_test() );
+    CALL_SUBTEST_22( signbit_tests() );
   }
   for (int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_2( typed_logicals_test(ArrayX<int>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_2( typed_logicals_test(ArrayX<float>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) );
-    CALL_SUBTEST_3( typed_logicals_test(ArrayX<double>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_3( typed_logicals_test(ArrayX<std::complex<float>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_3( typed_logicals_test(ArrayX<std::complex<double>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_23( typed_logicals_test(ArrayX<int>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_24( typed_logicals_test(ArrayX<float>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_25( typed_logicals_test(ArrayX<double>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) );
+    CALL_SUBTEST_26( typed_logicals_test(ArrayX<std::complex<float>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_27( typed_logicals_test(ArrayX<std::complex<double>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
   }
 
   for (int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1((cast_test<1, 1>()));
-    CALL_SUBTEST_2((cast_test<3, 1>()));
-    CALL_SUBTEST_2((cast_test<3, 3>()));
-    CALL_SUBTEST_3((cast_test<5, 1>()));
-    CALL_SUBTEST_3((cast_test<5, 5>()));
-    CALL_SUBTEST_4((cast_test<9, 1>()));
-    CALL_SUBTEST_4((cast_test<9, 9>()));
-    CALL_SUBTEST_5((cast_test<17, 1>()));
-    CALL_SUBTEST_5((cast_test<17, 17>()));
-    CALL_SUBTEST_6((cast_test<Dynamic, 1>()));
-    CALL_SUBTEST_6((cast_test<Dynamic, Dynamic>()));
+    CALL_SUBTEST_28( (cast_test<1, 1>()) );
+    CALL_SUBTEST_29( (cast_test<3, 1>()) );
+    CALL_SUBTEST_30( (cast_test<5, 1>()) );
+    CALL_SUBTEST_31( (cast_test<9, 1>()) );
+    CALL_SUBTEST_32( (cast_test<17, 1>()) );
+    CALL_SUBTEST_33( (cast_test<Dynamic, 1>()) );
   }
 
   VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp
index 47dfc04..05cd6ad 100644
--- a/test/basicstuff.cpp
+++ b/test/basicstuff.cpp
@@ -69,8 +69,10 @@
   x = v1(static_cast<unsigned int>(r1));
   x = v1(static_cast<signed long>(r1));
   x = v1(static_cast<unsigned long>(r1));
-  x = v1(static_cast<long long int>(r1));
-  x = v1(static_cast<unsigned long long int>(r1));
+  if(sizeof(Index) >= sizeof(long long int))
+    x = v1(static_cast<long long int>(r1));
+  if(sizeof(Index) >= sizeof(unsigned long long int))
+    x = v1(static_cast<unsigned long long int>(r1));
 
   VERIFY_IS_APPROX(               v1,    v1);
   VERIFY_IS_NOT_APPROX(           v1,    2*v1);
diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp
index 4752809..4f0a137 100644
--- a/test/bdcsvd.cpp
+++ b/test/bdcsvd.cpp
@@ -69,8 +69,13 @@
 }
 
 template <typename MatrixType>
-void bdcsvd_all_options(const MatrixType& input = MatrixType()) {
-  svd_option_checks<MatrixType, 0>(input);
+void bdcsvd_thin_options(const MatrixType& input = MatrixType()) {
+  svd_thin_option_checks<MatrixType, 0>(input);
+}
+
+template <typename MatrixType>
+void bdcsvd_full_options(const MatrixType& input = MatrixType()) {
+  svd_option_checks_full_only<MatrixType, 0>(input);
 }
 
 template <typename MatrixType>
@@ -82,13 +87,15 @@
 EIGEN_DECLARE_TEST(bdcsvd)
 {
   CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix3f>()));
-  CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix4d>()));
-  CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 10, 7>>()));
-  CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 7, 10>>()));
-  CALL_SUBTEST_3((bdcsvd_verify_assert<Matrix<std::complex<double>, 6, 9>>()));
+  CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix4d>()));
+  CALL_SUBTEST_3((bdcsvd_verify_assert<Matrix<float, 10, 7>>()));
+  CALL_SUBTEST_4((bdcsvd_verify_assert<Matrix<float, 7, 10>>()));
+  CALL_SUBTEST_5((bdcsvd_verify_assert<Matrix<std::complex<double>, 6, 9>>()));
 
-  CALL_SUBTEST_4((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2cd>)));
-  CALL_SUBTEST_5((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2d>)));
+  CALL_SUBTEST_6((svd_all_trivial_2x2(bdcsvd_thin_options<Matrix2cd>)));
+  CALL_SUBTEST_7((svd_all_trivial_2x2(bdcsvd_full_options<Matrix2cd>)));
+  CALL_SUBTEST_8((svd_all_trivial_2x2(bdcsvd_thin_options<Matrix2d>)));
+  CALL_SUBTEST_9((svd_all_trivial_2x2(bdcsvd_full_options<Matrix2d>)));
 
   for (int i = 0; i < g_repeat; i++) {
     int r = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2),
@@ -97,58 +104,68 @@
     TEST_SET_BUT_UNUSED_VARIABLE(r)
     TEST_SET_BUT_UNUSED_VARIABLE(c)
 
-    CALL_SUBTEST_6((compare_bdc_jacobi<MatrixXf>(MatrixXf(r, c))));
-    CALL_SUBTEST_7((compare_bdc_jacobi<MatrixXd>(MatrixXd(r, c))));
-    CALL_SUBTEST_8((compare_bdc_jacobi<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_10((compare_bdc_jacobi<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_11((compare_bdc_jacobi<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_12((compare_bdc_jacobi<MatrixXcd>(MatrixXcd(r, c))));
     // Test on inf/nan matrix
-    CALL_SUBTEST_9((svd_inf_nan<MatrixXf>()));
-    CALL_SUBTEST_10((svd_inf_nan<MatrixXd>()));
+    CALL_SUBTEST_13((svd_inf_nan<MatrixXf>()));
+    CALL_SUBTEST_14((svd_inf_nan<MatrixXd>()));
 
     // Verify some computations using all combinations of the Options template parameter.
-    CALL_SUBTEST_11((bdcsvd_all_options<Matrix3f>()));
-    CALL_SUBTEST_12((bdcsvd_all_options<Matrix<float, 2, 3>>()));
-    CALL_SUBTEST_13((bdcsvd_all_options<MatrixXd>(MatrixXd(20, 17))));
-    CALL_SUBTEST_14((bdcsvd_all_options<MatrixXd>(MatrixXd(17, 20))));
-    CALL_SUBTEST_15((bdcsvd_all_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15))));
-    CALL_SUBTEST_16((bdcsvd_all_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c))));
-    CALL_SUBTEST_17((bdcsvd_all_options<MatrixXf>(MatrixXf(r, c))));
-    CALL_SUBTEST_18((bdcsvd_all_options<MatrixXcd>(MatrixXcd(r, c))));
-    CALL_SUBTEST_19((bdcsvd_all_options<MatrixXd>(MatrixXd(r, c))));
-    CALL_SUBTEST_20((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27))));
-    CALL_SUBTEST_21((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20))));
-
-    CALL_SUBTEST_22((
+    CALL_SUBTEST_15((bdcsvd_thin_options<Matrix3f>()));
+    CALL_SUBTEST_16((bdcsvd_full_options<Matrix3f>()));
+    CALL_SUBTEST_17((bdcsvd_thin_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_18((bdcsvd_full_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_19((bdcsvd_thin_options<MatrixXd>(MatrixXd(20, 17))));
+    CALL_SUBTEST_20((bdcsvd_full_options<MatrixXd>(MatrixXd(20, 17))));
+    CALL_SUBTEST_21((bdcsvd_thin_options<MatrixXd>(MatrixXd(17, 20))));
+    CALL_SUBTEST_22((bdcsvd_full_options<MatrixXd>(MatrixXd(17, 20))));
+    CALL_SUBTEST_23((bdcsvd_thin_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15))));
+    CALL_SUBTEST_24((bdcsvd_full_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15))));
+    CALL_SUBTEST_25((bdcsvd_thin_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c))));
+    CALL_SUBTEST_26((bdcsvd_full_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c))));
+    CALL_SUBTEST_27((bdcsvd_thin_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_28((bdcsvd_full_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_29((bdcsvd_thin_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_30((bdcsvd_full_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_31((bdcsvd_thin_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_32((bdcsvd_full_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_33((bdcsvd_thin_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27))));
+    CALL_SUBTEST_34((bdcsvd_full_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27))));
+    CALL_SUBTEST_35((bdcsvd_thin_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20))));
+    CALL_SUBTEST_36((bdcsvd_full_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20))));
+    CALL_SUBTEST_37((
         svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 20, 35>, ColPivHouseholderQRPreconditioner>(
             r, c)));
-    CALL_SUBTEST_22(
+    CALL_SUBTEST_38(
         (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 35, 20>, HouseholderQRPreconditioner>(r,
                                                                                                                    c)));
-    CALL_SUBTEST_22((
+    CALL_SUBTEST_39((
         svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 20, 35>, ColPivHouseholderQRPreconditioner>(
             r, c)));
-    CALL_SUBTEST_22(
+    CALL_SUBTEST_40(
         (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 35, 20>, HouseholderQRPreconditioner>(r,
                                                                                                                    c)));
   }
 
   // test matrixbase method
-  CALL_SUBTEST_23(( bdcsvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_23(( bdcsvd_method<Matrix3f>() ));
+  CALL_SUBTEST_41(( bdcsvd_method<Matrix2cd>() ));
+  CALL_SUBTEST_42(( bdcsvd_method<Matrix3f>() ));
 
   // Test problem size constructors
-  CALL_SUBTEST_24( BDCSVD<MatrixXf>(10,10) );
+  CALL_SUBTEST_43( BDCSVD<MatrixXf>(10,10) );
 
   // Check that preallocation avoids subsequent mallocs
   // Disabled because not supported by BDCSVD
   // CALL_SUBTEST_9( svd_preallocate<void>() );
 
-  CALL_SUBTEST_25( svd_underoverflow<void>() );
+  CALL_SUBTEST_44( svd_underoverflow<void>() );
 
   // Without total deflation issues.
-  CALL_SUBTEST_26((  compare_bdc_jacobi_instance(true) ));
-  CALL_SUBTEST_26((  compare_bdc_jacobi_instance(false) ));
+  CALL_SUBTEST_45((  compare_bdc_jacobi_instance(true) ));
+  CALL_SUBTEST_46((  compare_bdc_jacobi_instance(false) ));
 
   // With total deflation issues before, when it shouldn't be triggered.
-  CALL_SUBTEST_27((  compare_bdc_jacobi_instance(true, 3) ));
-  CALL_SUBTEST_27((  compare_bdc_jacobi_instance(false, 3) ));
+  CALL_SUBTEST_47((  compare_bdc_jacobi_instance(true, 3) ));
+  CALL_SUBTEST_48((  compare_bdc_jacobi_instance(false, 3) ));
 }
diff --git a/test/block.cpp b/test/block.cpp
index f8583c3..aba0896 100644
--- a/test/block.cpp
+++ b/test/block.cpp
@@ -40,14 +40,14 @@
 std::enable_if_t<((MatrixType::Flags&RowMajorBit)==0),void>
 check_left_top(const MatrixType& m, Index r, Index c,
                Index rows, Index /*unused*/) {
-  VERIFY_IS_EQUAL(m.leftCols(c).coeff(r+c*rows), m(r,c));
+  if(c > 0) VERIFY_IS_EQUAL(m.leftCols(c).coeff(r+c*rows), m(r,c));
 }
 
 template <typename MatrixType>
 std::enable_if_t<((MatrixType::Flags&RowMajorBit)!=0),void>
 check_left_top(const MatrixType& m,  Index r, Index c,
                Index /*unused*/, Index cols) {
-  VERIFY_IS_EQUAL(m.topRows(r).coeff(c+r*cols), m(r,c));
+  if(r > 0) VERIFY_IS_EQUAL(m.topRows(r).coeff(c+r*cols), m(r,c));
 }
 
 template<typename MatrixType> void block(const MatrixType& m)
diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp
index e2fc9a8..5759888 100644
--- a/test/boostmultiprec.cpp
+++ b/test/boostmultiprec.cpp
@@ -156,6 +156,7 @@
   std::cout << "NumTraits<Real>::lowest()          = " << NumTraits<Real>::lowest() << std::endl;
   std::cout << "NumTraits<Real>::highest()         = " << NumTraits<Real>::highest() << std::endl;
   std::cout << "NumTraits<Real>::digits10()        = " << NumTraits<Real>::digits10() << std::endl;
+  std::cout << "NumTraits<Real>::max_digits10()    = " << NumTraits<Real>::max_digits10() << std::endl;
 
   // check stream output
   {
diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp
index a821cf2..2eef40b 100644
--- a/test/geo_quaternion.cpp
+++ b/test/geo_quaternion.cpp
@@ -98,6 +98,10 @@
     VERIFY_IS_MUCH_SMALLER_THAN(abs(q1.angularDistance(q2) - refangle), Scalar(1));
   }
 
+  // Action on vector by the q v q* formula
+  VERIFY_IS_APPROX(q1 * v2, (q1 * Quaternionx(Scalar(0), v2) * q1.inverse()).vec());
+  VERIFY_IS_APPROX(q1.inverse() * v2, (q1.inverse() * Quaternionx(Scalar(0), v2) * q1).vec());
+
   // rotation matrix conversion
   VERIFY_IS_APPROX(q1 * v2, q1.toRotationMatrix() * v2);
   VERIFY_IS_APPROX(q1 * q2 * v2,
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp
index 3d6b630..9416b84 100644
--- a/test/jacobisvd.cpp
+++ b/test/jacobisvd.cpp
@@ -35,12 +35,23 @@
 }
 
 template <typename MatrixType>
-void jacobisvd_all_options(const MatrixType& input = MatrixType()) {
+void jacobisvd_thin_options(const MatrixType& input = MatrixType()) {
   MatrixType m(input.rows(), input.cols());
   svd_fill_random(m);
-  svd_option_checks<MatrixType, 0>(m);
-  svd_option_checks<MatrixType, ColPivHouseholderQRPreconditioner>(m);
-  svd_option_checks<MatrixType, HouseholderQRPreconditioner>(m);
+
+  svd_thin_option_checks<MatrixType, 0>(m);
+  svd_thin_option_checks<MatrixType, ColPivHouseholderQRPreconditioner>(m);
+  svd_thin_option_checks<MatrixType, HouseholderQRPreconditioner>(m);
+}
+
+template <typename MatrixType>
+void jacobisvd_full_options(const MatrixType& input = MatrixType()) {
+  MatrixType m(input.rows(), input.cols());
+  svd_fill_random(m);
+
+  svd_option_checks_full_only<MatrixType, 0>(m);
+  svd_option_checks_full_only<MatrixType, ColPivHouseholderQRPreconditioner>(m);
+  svd_option_checks_full_only<MatrixType, HouseholderQRPreconditioner>(m);
   svd_option_checks_full_only<MatrixType, FullPivHouseholderQRPreconditioner>(
       m);  // FullPiv only used when computing full unitaries
 }
@@ -103,18 +114,18 @@
 EIGEN_DECLARE_TEST(jacobisvd)
 {
   CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix4d>()));
-  CALL_SUBTEST_1((jacobisvd_verify_inputs(Matrix<float, 5, Dynamic>(5, 6))));
-  CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix<std::complex<double>, 7, 5>>()));
+  CALL_SUBTEST_2((jacobisvd_verify_inputs(Matrix<float, 5, Dynamic>(5, 6))));
+  CALL_SUBTEST_3((jacobisvd_verify_inputs<Matrix<std::complex<double>, 7, 5>>()));
 
-  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix3f>()));
-  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix4d>()));
-  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 10, 12>>()));
-  CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 12, 10>>()));
-  CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXf>(MatrixXf(10, 12))));
-  CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXcd>(MatrixXcd(7, 5))));
+  CALL_SUBTEST_4((jacobisvd_verify_assert<Matrix3f>()));
+  CALL_SUBTEST_5((jacobisvd_verify_assert<Matrix4d>()));
+  CALL_SUBTEST_6((jacobisvd_verify_assert<Matrix<float, 10, 12>>()));
+  CALL_SUBTEST_7((jacobisvd_verify_assert<Matrix<float, 12, 10>>()));
+  CALL_SUBTEST_8((jacobisvd_verify_assert<MatrixXf>(MatrixXf(10, 12))));
+  CALL_SUBTEST_9((jacobisvd_verify_assert<MatrixXcd>(MatrixXcd(7, 5))));
 
-  CALL_SUBTEST_3(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2cd>));
-  CALL_SUBTEST_4(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2d>));
+  CALL_SUBTEST_10(svd_all_trivial_2x2(jacobisvd_thin_options<Matrix2cd>));
+  CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd_thin_options<Matrix2d>));
 
   for (int i = 0; i < g_repeat; i++) {
     int r = internal::random<int>(1, 30),
@@ -123,64 +134,83 @@
     TEST_SET_BUT_UNUSED_VARIABLE(r)
     TEST_SET_BUT_UNUSED_VARIABLE(c)
     
-    CALL_SUBTEST_5((jacobisvd_all_options<Matrix3f>()));
-    CALL_SUBTEST_6((jacobisvd_all_options<Matrix4d>()));
-    CALL_SUBTEST_7((jacobisvd_all_options<Matrix<float, 2, 3>>()));
-    CALL_SUBTEST_8((jacobisvd_all_options<Matrix<double, 4, 7>>()));
-    CALL_SUBTEST_9((jacobisvd_all_options<Matrix<double, 7, 4>>()));
-    CALL_SUBTEST_10((jacobisvd_all_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5))));
-    CALL_SUBTEST_11((jacobisvd_all_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c))));
-    CALL_SUBTEST_12((jacobisvd_all_options<MatrixXf>(MatrixXf(r, c))));
-    CALL_SUBTEST_13((jacobisvd_all_options<MatrixXcd>(MatrixXcd(r, c))));
-    CALL_SUBTEST_14((jacobisvd_all_options<MatrixXd>(MatrixXd(r, c))));
-    CALL_SUBTEST_15((jacobisvd_all_options<Matrix<double, 5, 7, RowMajor>>()));
-    CALL_SUBTEST_16((jacobisvd_all_options<Matrix<double, 7, 5, RowMajor>>()));
+    CALL_SUBTEST_12((jacobisvd_thin_options<Matrix3f>()));
+    CALL_SUBTEST_13((jacobisvd_full_options<Matrix3f>()));
+    CALL_SUBTEST_14((jacobisvd_thin_options<Matrix4d>()));
+    CALL_SUBTEST_15((jacobisvd_full_options<Matrix4d>()));
+    CALL_SUBTEST_16((jacobisvd_thin_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_17((jacobisvd_full_options<Matrix<float, 2, 3>>()));
+    CALL_SUBTEST_18((jacobisvd_thin_options<Matrix<double, 4, 7>>()));
+    CALL_SUBTEST_19((jacobisvd_full_options<Matrix<double, 4, 7>>()));
+    CALL_SUBTEST_20((jacobisvd_thin_options<Matrix<double, 7, 4>>()));
+    CALL_SUBTEST_21((jacobisvd_full_options<Matrix<double, 7, 4>>()));
+    CALL_SUBTEST_22((jacobisvd_thin_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5))));
+    CALL_SUBTEST_23((jacobisvd_full_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5))));
+    CALL_SUBTEST_24((jacobisvd_thin_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c))));
+    CALL_SUBTEST_25((jacobisvd_full_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c))));
+    CALL_SUBTEST_26((jacobisvd_thin_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_27((jacobisvd_full_options<MatrixXf>(MatrixXf(r, c))));
+    CALL_SUBTEST_28((jacobisvd_thin_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_29((jacobisvd_full_options<MatrixXcd>(MatrixXcd(r, c))));
+    CALL_SUBTEST_30((jacobisvd_thin_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_31((jacobisvd_full_options<MatrixXd>(MatrixXd(r, c))));
+    CALL_SUBTEST_32((jacobisvd_thin_options<Matrix<double, 5, 7, RowMajor>>()));
+    CALL_SUBTEST_33((jacobisvd_full_options<Matrix<double, 5, 7, RowMajor>>()));
+    CALL_SUBTEST_34((jacobisvd_thin_options<Matrix<double, 7, 5, RowMajor>>()));
+    CALL_SUBTEST_35((jacobisvd_full_options<Matrix<double, 7, 5, RowMajor>>()));
 
     MatrixXcd noQRTest = MatrixXcd(r, r);
     svd_fill_random(noQRTest);
-    CALL_SUBTEST_17((svd_option_checks<MatrixXcd, NoQRPreconditioner>(noQRTest)));
+    CALL_SUBTEST_36((svd_thin_option_checks<MatrixXcd, NoQRPreconditioner>(noQRTest)));
+    CALL_SUBTEST_36((svd_option_checks_full_only<MatrixXcd, NoQRPreconditioner>(noQRTest)));
 
-    CALL_SUBTEST_18((
+    CALL_SUBTEST_37((
         svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 13, 15>, ColPivHouseholderQRPreconditioner>(
             r, c)));
-    CALL_SUBTEST_18(
+    CALL_SUBTEST_38(
         (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 15, 13>, HouseholderQRPreconditioner>(r,
                                                                                                                    c)));
-    CALL_SUBTEST_18((
+    CALL_SUBTEST_39((
         svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 13, 15>, ColPivHouseholderQRPreconditioner>(
             r, c)));
-    CALL_SUBTEST_18(
+    CALL_SUBTEST_40(
         (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 15, 13>, HouseholderQRPreconditioner>(r,
                                                                                                                    c)));
 
     // Test on inf/nan matrix
-    CALL_SUBTEST_19((svd_inf_nan<MatrixXf>()));
-    CALL_SUBTEST_19((svd_inf_nan<MatrixXd>()));
+    CALL_SUBTEST_41((svd_inf_nan<MatrixXf>()));
+    CALL_SUBTEST_42((svd_inf_nan<MatrixXd>()));
 
-    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 6, 1>>()));
-    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, 6>>()));
-    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, Dynamic, 1>>(Matrix<double, Dynamic, 1>(r))));
-    CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, Dynamic>>(Matrix<double, 1, Dynamic>(c))));
+    CALL_SUBTEST_43((jacobisvd_verify_assert<Matrix<double, 6, 1>>()));
+    CALL_SUBTEST_44((jacobisvd_verify_assert<Matrix<double, 1, 6>>()));
+    CALL_SUBTEST_45((jacobisvd_verify_assert<Matrix<double, Dynamic, 1>>(Matrix<double, Dynamic, 1>(r))));
+    CALL_SUBTEST_46((jacobisvd_verify_assert<Matrix<double, 1, Dynamic>>(Matrix<double, 1, Dynamic>(c))));
   }
 
-  CALL_SUBTEST_21((jacobisvd_all_options<MatrixXd>(
+  CALL_SUBTEST_47((jacobisvd_thin_options<MatrixXd>(
       MatrixXd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2),
                internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2)))));
-  CALL_SUBTEST_22((jacobisvd_all_options<MatrixXcd>(
+  CALL_SUBTEST_48((jacobisvd_full_options<MatrixXd>(
+      MatrixXd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2),
+               internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2)))));
+  CALL_SUBTEST_49((jacobisvd_thin_options<MatrixXcd>(
+      MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3),
+                internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3)))));
+  CALL_SUBTEST_50((jacobisvd_full_options<MatrixXcd>(
       MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3),
                 internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3)))));
 
   // test matrixbase method
-  CALL_SUBTEST_23(( jacobisvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_23(( jacobisvd_method<Matrix3f>() ));
+  CALL_SUBTEST_51(( jacobisvd_method<Matrix2cd>() ));
+  CALL_SUBTEST_52(( jacobisvd_method<Matrix3f>() ));
 
   // Test problem size constructors
-  CALL_SUBTEST_24( JacobiSVD<MatrixXf>(10,10) );
+  CALL_SUBTEST_53( JacobiSVD<MatrixXf>(10,10) );
 
   // Check that preallocation avoids subsequent mallocs
-  CALL_SUBTEST_25( svd_preallocate<void>() );
+  CALL_SUBTEST_54( svd_preallocate<void>() );
 
-  CALL_SUBTEST_26( svd_underoverflow<void>() );
+  CALL_SUBTEST_55( svd_underoverflow<void>() );
 
   msvc_workaround();
 }
diff --git a/test/numext.cpp b/test/numext.cpp
index e99eddc..cca0411 100644
--- a/test/numext.cpp
+++ b/test/numext.cpp
@@ -286,9 +286,9 @@
       return all_pass;
     };
 
-    bool all_pass = check_all(non_negative_values, false_mask);
-    all_pass = all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask));
-    VERIFY(all_pass);
+    bool check_all_pass = check_all(non_negative_values, false_mask);
+    check_all_pass = check_all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask));
+    VERIFY(check_all_pass);
   }
 };
 template <typename T>
diff --git a/test/random_matrix.cpp b/test/random_matrix.cpp
index 873845f..a914af1 100644
--- a/test/random_matrix.cpp
+++ b/test/random_matrix.cpp
@@ -63,7 +63,7 @@
 {
     RealVectorType svs = setupRangeSvs<RealVectorType, RealScalar>(diag_size, min_svs, max_svs);
 
-    MatrixType M;
+    MatrixType M = MatrixType::Zero(rows, cols);
     generateRandomMatrixSvs(svs, rows, cols, M);
 
     // validate dimensions
diff --git a/test/random_without_cast_overflow.h b/test/random_without_cast_overflow.h
index 7f1ea5f..715bb8e 100644
--- a/test/random_without_cast_overflow.h
+++ b/test/random_without_cast_overflow.h
@@ -23,7 +23,7 @@
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
-    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger &&
+    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsInteger &&
                    !NumTraits<TgtScalar>::IsSigned &&
                    (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits ||
                     (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits &&
@@ -34,12 +34,27 @@
   }
 };
 
+// Signed to unsigned integer widening cast.
+template <typename SrcScalar, typename TgtScalar>
+struct random_without_cast_overflow<
+    SrcScalar, TgtScalar,
+    std::enable_if_t<NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsInteger &&
+                   !NumTraits<TgtScalar>::IsSigned &&
+                   (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits ||
+                    (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits &&
+                     NumTraits<SrcScalar>::IsSigned))>> {
+  static SrcScalar value() {
+    SrcScalar a = internal::random<SrcScalar>();
+    return a;
+  }
+};
+
 // Integer to unsigned narrowing cast.
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
     std::enable_if_t<
-        NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && !NumTraits<SrcScalar>::IsSigned &&
+        NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && NumTraits<TgtScalar>::IsSigned && !NumTraits<SrcScalar>::IsSigned &&
         (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> {
   static SrcScalar value() {
     TgtScalar b = internal::random<TgtScalar>();
@@ -47,6 +62,19 @@
   }
 };
 
+// Integer to unsigned narrowing cast.
+template <typename SrcScalar, typename TgtScalar>
+struct random_without_cast_overflow<
+    SrcScalar, TgtScalar,
+    std::enable_if_t<
+        NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsSigned && !NumTraits<SrcScalar>::IsSigned &&
+        (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> {
+  static SrcScalar value() {
+    TgtScalar b = internal::random<TgtScalar>();
+    return static_cast<SrcScalar>(b);
+  }
+};
+
 // Integer to signed narrowing cast.
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
@@ -83,7 +111,7 @@
 struct random_without_cast_overflow<
     SrcScalar, TgtScalar,
     std::enable_if_t<
-        !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger &&
+        !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger && NumTraits<TgtScalar>::IsSigned &&
         (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>> {
   static SrcScalar value() {
     // NOTE: internal::random<T>() is limited by RAND_MAX, so random<int64_t> is always within that range.
@@ -95,6 +123,22 @@
   }
 };
 
+template <typename SrcScalar, typename TgtScalar>
+struct random_without_cast_overflow<
+    SrcScalar, TgtScalar,
+    std::enable_if_t<
+        !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsSigned &&
+        (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>> {
+  static SrcScalar value() {
+    // NOTE: internal::random<T>() is limited by RAND_MAX, so random<int64_t> is always within that range.
+    // This prevents us from simply shifting bits, which would result in only 0 or -1.
+    // Instead, keep least-significant K bits and sign.
+    static const TgtScalar KeepMask = (static_cast<TgtScalar>(1) << std::numeric_limits<SrcScalar>::digits) - 1;
+    const TgtScalar a = internal::random<TgtScalar>();
+    return static_cast<SrcScalar>(a & KeepMask);
+  }
+};
+
 // Integer to floating-point, re-use above logic.
 template <typename SrcScalar, typename TgtScalar>
 struct random_without_cast_overflow<
diff --git a/test/stdvector.cpp b/test/stdvector.cpp
index 2ca584b..1d44e42 100644
--- a/test/stdvector.cpp
+++ b/test/stdvector.cpp
@@ -124,7 +124,7 @@
 {
   typedef Eigen::Vector3f T;
   std::vector<T, Eigen::aligned_allocator<T> > v;
-  v.push_back(T());
+  v.push_back(T(1.0f,2.0f,3.0f));
 }
 
 EIGEN_DECLARE_TEST(stdvector)
diff --git a/test/svd_common.h b/test/svd_common.h
index 6497470..e43adc4 100644
--- a/test/svd_common.h
+++ b/test/svd_common.h
@@ -484,18 +484,15 @@
 }
 
 template <typename MatrixType, int QRPreconditioner = 0>
-void svd_option_checks(const MatrixType& input) {
+void svd_thin_option_checks(const MatrixType& input) {
   MatrixType m(input.rows(), input.cols());
   svd_fill_random(m);
+
   svd_compute_checks<MatrixType, QRPreconditioner>(m);
   svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU>(m);
   svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinV>(m);
   svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeThinV>(m);
 
-  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU>(m);
-  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullV>(m);
-  svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV>(m);
-
   svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeFullV>(m);
   svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeThinV>(m);
 
diff --git a/test/triangular.cpp b/test/triangular.cpp
index 2259869..0578b08 100644
--- a/test/triangular.cpp
+++ b/test/triangular.cpp
@@ -55,6 +55,7 @@
              r1(rows, cols),
              r2(rows, cols);
   VectorType v2 = VectorType::Random(rows);
+  VectorType v3 = VectorType::Zero(rows);
 
   MatrixType m1up = m1.template triangularView<Upper>();
   MatrixType m2up = m2.template triangularView<Upper>();
@@ -96,23 +97,31 @@
   Transpose<MatrixType> trm4(m4);
   // test back and forward substitution with a vector as the rhs
   m3 = m1.template triangularView<Upper>();
-  VERIFY(v2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(v2)), largerEps));
+  v3 = m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(v2));
+  VERIFY(v2.isApprox(v3, largerEps));
   m3 = m1.template triangularView<Lower>();
-  VERIFY(v2.isApprox(m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(v2)), largerEps));
+  v3 = m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(v2));
+  VERIFY(v2.isApprox(v3, largerEps));
   m3 = m1.template triangularView<Upper>();
-  VERIFY(v2.isApprox(m3 * (m1.template triangularView<Upper>().solve(v2)), largerEps));
+  v3 = m3 * (m1.template triangularView<Upper>().solve(v2));
+  VERIFY(v2.isApprox(v3, largerEps));
   m3 = m1.template triangularView<Lower>();
-  VERIFY(v2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(v2)), largerEps));
+  v3 = m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(v2));
+  VERIFY(v2.isApprox(v3, largerEps));
 
   // test back and forward substitution with a matrix as the rhs
   m3 = m1.template triangularView<Upper>();
-  VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(m2)), largerEps));
+  m4 = m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(m2));
+  VERIFY(m2.isApprox(m4, largerEps));
   m3 = m1.template triangularView<Lower>();
-  VERIFY(m2.isApprox(m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(m2)), largerEps));
+  m4 = m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(m2));
+  VERIFY(m2.isApprox(m4, largerEps));
   m3 = m1.template triangularView<Upper>();
-  VERIFY(m2.isApprox(m3 * (m1.template triangularView<Upper>().solve(m2)), largerEps));
+  m4 = m3 * (m1.template triangularView<Upper>().solve(m2));
+  VERIFY(m2.isApprox(m4, largerEps));
   m3 = m1.template triangularView<Lower>();
-  VERIFY(m2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(m2)), largerEps));
+  m4 = m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(m2));
+  VERIFY(m2.isApprox(m4, largerEps));
 
   // check M * inv(L) using in place API
   m4 = m3;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index b830984..f873f2b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -360,7 +360,7 @@
       int i;
       Index size = Index(1);
       for (i = 0; i < NumIndices; i++) {
-        internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]);
+        internal::check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic>::run(size, dimensions[i]);
         size *= dimensions[i];
       }
       #ifdef EIGEN_INITIALIZE_COEFFS
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index ee01d99..7e924879 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -998,8 +998,8 @@
   enum {
     Vectorizable = packet_traits<Scalar>::Vectorizable,
     PacketSize = packet_traits<Scalar>::size,
-    HasHalfPacket = unpacket_traits<HalfPacket>::size < PacketSize,
     HalfPacketSize = unpacket_traits<HalfPacket>::size,
+    HasHalfPacket = static_cast<int>(HalfPacketSize) < static_cast<int>(PacketSize)
   };
 
  public:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 55369e1..2fa6777 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -91,8 +91,8 @@
     eigen_assert(rhs_block);
     BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn);
     char* block_mem = static_cast<char*>(d.allocate(sz.lhs_size + sz.rhs_size));
-    *lhs_block = reinterpret_cast<LhsScalar*>(block_mem);
-    *rhs_block = reinterpret_cast<RhsScalar*>(block_mem + sz.lhs_size);
+    *lhs_block = static_cast<LhsScalar*>(static_cast<void*>(block_mem));
+    *rhs_block = static_cast<RhsScalar*>(static_cast<void*>(block_mem + sz.lhs_size));
     return block_mem;
   }
 
@@ -115,12 +115,12 @@
     for (Index x = 0; x < num_slices; x++) {
       if (num_lhs > 0) lhs_blocks[x].resize(num_lhs);
       for (Index m = 0; m < num_lhs; m++) {
-        lhs_blocks[x][m] = reinterpret_cast<LhsScalar*>(mem);
+        lhs_blocks[x][m] = static_cast<LhsScalar*>(static_cast<void*>(mem));
         mem += sz.lhs_size;
       }
       if (num_rhs > 0) rhs_blocks[x].resize(num_rhs);
       for (Index n = 0; n < num_rhs; n++) {
-        rhs_blocks[x][n] = reinterpret_cast<RhsScalar*>(mem);
+        rhs_blocks[x][n] = static_cast<RhsScalar*>(static_cast<void*>(mem));
         mem += sz.rhs_size;
       }
     }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
index 8478cc3..7fb54b9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
@@ -443,6 +443,14 @@
     return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, j + m_horiz_offset);
   }
 
+  template <typename PacketT>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacketPartial(Index i, Index j, Index, Index = 0) const {
+    if (UseDirectOffsets) {
+      return m_base_mapper.template loadPacket<PacketT,Alignment>(i, j);
+    }
+    return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, j + m_horiz_offset);
+  }
+
   template <typename PacketT, int AlignmentType>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const {
     if (UseDirectOffsets) {
@@ -473,6 +481,8 @@
     return SubMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const Index stride() const { return m_base_mapper.stride(); }
+
   template <typename PacketT, int AlignmentType>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
     EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 6d9e9dc..914140d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -423,7 +423,7 @@
 template <typename Dims1, typename Dims2, ptrdiff_t n>
 struct sizes_match_below_dim<Dims1, Dims2, n, n> {
   static EIGEN_DEVICE_FUNC  EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
-    return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &&
+    return numext::equal_strict(array_get<n - 1>(dims1), array_get<n - 1>(dims2)) &&
         sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
   }
 };
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 137b962..0b4ce1a 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -181,15 +181,16 @@
     typedef typename impl_type::Scalar Scalar;
     typedef typename impl_type::Complex Complex;
 
-    enum Flag {
-      Default=0, // goof proof
-      Unscaled=1,
-      HalfSpectrum=2,
-      // SomeOtherSpeedOptimization=4
-      Speedy=32767
-    };
+    using Flag = int;
+    static constexpr Flag Default = 0;
+    static constexpr Flag Unscaled = 1;
+    static constexpr Flag HalfSpectrum = 2;
+    static constexpr Flag Speedy = 32767;
 
-    FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { }
+    FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) 
+    {
+        eigen_assert((flags == Default || flags == Unscaled || flags == HalfSpectrum || flags == Speedy) && "invalid flags argument");
+    }
 
     inline
     bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;}
diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp
index 0955795..8452577 100644
--- a/unsupported/test/EulerAngles.cpp
+++ b/unsupported/test/EulerAngles.cpp
@@ -9,6 +9,8 @@
 
 #include "main.h"
 
+EIGEN_DISABLE_DEPRECATED_WARNING
+
 #include <unsupported/Eigen/EulerAngles>
 
 using namespace Eigen;
diff --git a/unsupported/test/NNLS.cpp b/unsupported/test/NNLS.cpp
index b347c06..58092ef 100644
--- a/unsupported/test/NNLS.cpp
+++ b/unsupported/test/NNLS.cpp
@@ -54,7 +54,7 @@
 }
 
 template <typename MatrixType>
-void test_nnls_random_problem() {
+void test_nnls_random_problem(const MatrixType&) {
   //
   // SETUP
   //
@@ -448,12 +448,9 @@
 
   for (int i = 0; i < g_repeat; i++) {
     // Essential NNLS properties, across different types.
-    CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
-    CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
-    {
-      using MatFixed = Matrix<double, 12, 5>;
-      CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
-    }
+    CALL_SUBTEST_2(test_nnls_random_problem(MatrixXf()));
+    CALL_SUBTEST_3(test_nnls_random_problem(MatrixXd()));
+    CALL_SUBTEST_4(test_nnls_random_problem(Matrix<double, 12, 5>()));
     CALL_SUBTEST_5(test_nnls_with_half_precision());
 
     // Robustness tests:
diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp
index 81d81ef..7976dcb 100644
--- a/unsupported/test/cxx11_tensor_casts.cpp
+++ b/unsupported/test/cxx11_tensor_casts.cpp
@@ -30,7 +30,7 @@
 
   for (int i = 0; i < 101; ++i) {
     for (int j = 0; j < 201; ++j) {
-      const ToType ref = static_cast<ToType>(ftensor(i, j));
+      const ToType ref = internal::cast<FromType, ToType>(ftensor(i, j));
       VERIFY_IS_EQUAL(ttensor(i, j), ref);
     }
   }
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp
index ce4e538..071046a 100644
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -485,7 +485,7 @@
     // Test against probabilistic forward error bound. In reality, the error is much smaller
     // when we use tree summation.
     double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum);
-    double tol = numext::sqrt(static_cast<double>(num_elements)) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum);
+    double tol = numext::sqrt(static_cast<double>(num_elements)) * static_cast<double>(NumTraits<ScalarType>::epsilon()) * abs_sum;
     VERIFY_LE(err, tol);
   }
 }
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp
index 10beb07..7256dc0 100644
--- a/unsupported/test/mpreal_support.cpp
+++ b/unsupported/test/mpreal_support.cpp
@@ -20,6 +20,7 @@
   std::cerr << "highest =         " << NumTraits<mpreal>::highest() << "\n";
   std::cerr << "lowest =          " << NumTraits<mpreal>::lowest() << "\n";
   std::cerr << "digits10 =        " << NumTraits<mpreal>::digits10() << "\n";
+  std::cerr << "max_digits10 =    " << NumTraits<mpreal>::max_digits10() << "\n";
 
   for(int i = 0; i < g_repeat; i++) {
     int s = Eigen::internal::random<int>(1,100);