Update Eigen to commit:2620cb930b7ad87ed1d77a26319739f4b1c86d33 CHANGELOG ========= 2620cb930 - Update file Geometry_SIMD.h b2c9ba2be - Fix preprocessor condition on when to use fast float logistic implementation. 283d69294 - Guard AVX2 implementation of psignbit in PacketMath.h be54cc8de - Fix preverse for PowerPC. c5b234196 - Fix unused variable warning in TensorIO.h 86aee3d9c - Fix long double random 776d86d8d - AVX: guard Packet4l definition e63d9f6cc - Fix random again f75e2297d - AVX2 - double->int64_t casting 13092b5d0 - Fix usages of Eigen::array to be compatible with std::array. 77833f932 - Allow symbols to be used in compile-time expressions. d26e19714 - Add missing cwiseSquare, tests for cwise matrix ops. 35bf6c8ed - Add SimplicialNonHermitianLLT and SimplicialNonHermitianLDLT 4dccaa587 - Use truncation rather than rounding when casting Packet2d to Packet2l. 7b5d32b7c - Sparse move c8d368bda - More fixes for 32-bit. de304ab96 - Fix using ScalarPrinter redefinition for gcc. c54303848 - Undef macro in TensorContractionGpu.h that causes buildbreakages. d8aa4d6ba - Fix another instance of Packet2l on win32. 9f77ce4f1 - Add custom formatting of complex numbers for Numpy/Native. 5570a2786 - cross3_product vectorization 0b3df4a6e - Remove "extern C" in CholmodSupport. a39ade4cc - Protect use of alloca. b86641a4c - Add support for casting between double and int64_t for SSE and AVX2. d88393258 - Fix Packet*l for 32-bit builds. d792f13a6 - Make more Matrix functions constexpr d3cd31265 - Remove slow index check in Tensor::resize from release mode. 386e2079e - Fix Jacobi module doc. 8b101ade2 - Fix CwiseUnaryView for MSVC. 0951ad2a8 - Don'\''t hide rbegin/rend for GPU. 24f8fdeb4 - Fix CwiseUnaryView const access (Attempt 2). 285da30ec - Fix const input and c++20 compatibility in unary view. 126ba1a16 - Add Packet2l for SSE. 1d4369c2f - Fix CwiseUnaryView. 352ede96e - Fix incomplete cholesky. f1adb0ccc - Split up cxx11_tensor_gpu to reduce timeouts. 17f3bf898 - Fix pexp test for ARM. 6da34d9d9 - Allow aligned assignment in TRMV. 3e8e63eb4 - Fix packetmath plog test on Windows. 5ffb307af - Fix deprecated anonymous enum-enum conversion warnings 55dd48747 - Revert "fix unaligned access in trmv" 38fcedaf8 - Fix pexp complex test edge-cases. 251ec4208 - Return 0 volume for empty AlignedBox 64edfbed0 - Fix static_assert for c++14. 3f3144f53 - fix unaligned access in trmv 23f6c2685 - Rip out make_coherent, add CoherentPadOp. edaf9e16b - Fix triangular matrix-vector multiply uninitialized warning. 98620b58c - Eliminate FindCUDA cmake warning. cc941d69a - Update error about c++14 requirement. 6893287c9 - Add degenerate checks before calling BLAS routines. fa201f1bb - Fix QR colpivoting warnings and test failure. b33491070 - delete shadowed typedefs a962a2759 - Fix MSVC GPU build. a2f8eba02 - Speed up sparse x dense dot product. 7a88cdd6a - Fix signed integer UB in random. a6dc930d1 - Speed up SparseQR. feaafda30 - Change array_size result from enum to constexpr. 8a73c6490 - Remove "using namespace Eigen" from blas/common.h. 6ed4d80cc - Fix crash in IncompleteCholesky when the input has zeros on the diagonal. 3859e8d5b - Add method signDeterminant() to QR and related decompositions. db6b9db33 - Make header guards in GeneralMatrixMatrix.h and Parallelizer.h consistent:... b56e30841 - Enable direct access for IndexedView. 90087b990 - Fix use of uninitialized memory in kronecker_product test. 6b365e74d - Fix GPU build for ptanh_float. b14c5d0fa - Fix real schur and polynomial solver. 8a4118746 - fix exp complex test: use int instead of index 960892ca1 - JacobiSVD: get rid of m_scaledMatrix, m_adjoint, hopefully fix some compiler warnings 18a161bf1 - fix pexp_complex_test be06c9ad5 - Implement float pexp_complex 4d419e220 - Rename generic_fast_tanh_float to ptanh_float and move it to... PiperOrigin-RevId: 622268063 Change-Id: Ia444528ed7190f328160d8784de910a3722b5aed

commit: 941ca8d83f776b9a07153d3abef2877907aa0555 [log] [tgz]
author: Rasmus Munk Larsen <rmlarsen@google.com> Fri Apr 05 13:30:15 2024 -0700
committer: Copybara-Service <copybara-worker@google.com> Fri Apr 05 13:32:20 2024 -0700
tree: 438d003173b8ca2ffec2bbcf02188401be3d1020
parent: f6c457cdc27f232d7cf7f4f9b801aa8c3f9f18c6 [diff]
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport
index 2961863..adc5f8d 100644
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport

@@ -12,9 +12,7 @@
 
 #include "src/Core/util/DisableStupidWarnings.h"
 
-extern "C" {
 #include <cholmod.h>
-}
 
 /** \ingroup Support_modules
  * \defgroup CholmodSupport_Module CholmodSupport module

diff --git a/Eigen/Core b/Eigen/Core
index f9d9974..ed7d353 100644
--- a/Eigen/Core
+++ b/Eigen/Core

@@ -178,6 +178,7 @@
 
 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
+#include "src/Core/RandomImpl.h"
 #include "src/Core/GenericPacketMath.h"
 #include "src/Core/MathFunctionsImpl.h"
 #include "src/Core/arch/Default/ConjHelper.h"

diff --git a/Eigen/src/Core/ArithmeticSequence.h b/Eigen/src/Core/ArithmeticSequence.h
index 0f45e89..ae6373d 100644
--- a/Eigen/src/Core/ArithmeticSequence.h
+++ b/Eigen/src/Core/ArithmeticSequence.h

@@ -61,26 +61,28 @@
 template <typename FirstType, typename SizeType, typename IncrType>
 class ArithmeticSequence {
  public:
-  ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}
-  ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {}
+  constexpr ArithmeticSequence() = default;
+  constexpr ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}
+  constexpr ArithmeticSequence(FirstType first, SizeType size, IncrType incr)
+      : m_first(first), m_size(size), m_incr(incr) {}
 
   enum {
-    SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,
+    // SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,
     IncrAtCompileTime = internal::get_fixed_value<IncrType, DynamicIndex>::value
   };
 
   /** \returns the size, i.e., number of elements, of the sequence */
-  Index size() const { return m_size; }
+  constexpr Index size() const { return m_size; }
 
   /** \returns the first element \f$ a_0 \f$ in the sequence */
-  Index first() const { return m_first; }
+  constexpr Index first() const { return m_first; }
 
   /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */
-  Index operator[](Index i) const { return m_first + i * m_incr; }
+  constexpr Index operator[](Index i) const { return m_first + i * m_incr; }
 
-  const FirstType& firstObject() const { return m_first; }
-  const SizeType& sizeObject() const { return m_size; }
-  const IncrType& incrObject() const { return m_incr; }
+  constexpr const FirstType& firstObject() const { return m_first; }
+  constexpr const SizeType& sizeObject() const { return m_size; }
+  constexpr const IncrType& incrObject() const { return m_incr; }
 
  protected:
   FirstType m_first;
@@ -88,7 +90,7 @@
   IncrType m_incr;
 
  public:
-  auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) {
+  constexpr auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) {
     return seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr);
   }
 };
@@ -201,33 +203,6 @@
 
 }  // namespace placeholders
 
-namespace internal {
-
-// Convert a symbolic span into a usable one (i.e., remove last/end "keywords")
-template <typename T>
-struct make_size_type {
-  typedef std::conditional_t<symbolic::is_symbolic<T>::value, Index, T> type;
-};
-
-template <typename FirstType, typename SizeType, typename IncrType, int XprSize>
-struct IndexedViewCompatibleType<ArithmeticSequence<FirstType, SizeType, IncrType>, XprSize> {
-  typedef ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType> type;
-};
-
-template <typename FirstType, typename SizeType, typename IncrType>
-ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType> makeIndexedViewCompatible(
-    const ArithmeticSequence<FirstType, SizeType, IncrType>& ids, Index size, SpecializedType) {
-  return ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType>(
-      eval_expr_given_size(ids.firstObject(), size), eval_expr_given_size(ids.sizeObject(), size), ids.incrObject());
-}
-
-template <typename FirstType, typename SizeType, typename IncrType>
-struct get_compile_time_incr<ArithmeticSequence<FirstType, SizeType, IncrType> > {
-  enum { value = get_fixed_value<IncrType, DynamicIndex>::value };
-};
-
-}  // end namespace internal
-
 /** \namespace Eigen::indexing
   * \ingroup Core_Module
   *

diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 725b337..49b1410 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h

@@ -18,7 +18,9 @@
 namespace internal {
 template <typename ViewOp, typename MatrixType, typename StrideType>
 struct traits<CwiseUnaryView<ViewOp, MatrixType, StrideType> > : traits<MatrixType> {
-  typedef typename result_of<ViewOp(const typename traits<MatrixType>::Scalar&)>::type Scalar;
+  typedef typename result_of<ViewOp(typename traits<MatrixType>::Scalar&)>::type1 ScalarRef;
+  static_assert(std::is_reference<ScalarRef>::value, "Views must return a reference type.");
+  typedef remove_all_t<ScalarRef> Scalar;
   typedef typename MatrixType::Nested MatrixTypeNested;
   typedef remove_all_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
@@ -44,10 +46,76 @@
                                    : int(StrideType::OuterStrideAtCompileTime)
   };
 };
-}  // namespace internal
 
-template <typename ViewOp, typename MatrixType, typename StrideType, typename StorageKind>
-class CwiseUnaryViewImpl;
+// Generic API dispatcher
+template <typename ViewOp, typename XprType, typename StrideType, typename StorageKind,
+          bool Mutable = !std::is_const<XprType>::value>
+class CwiseUnaryViewImpl : public generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type {
+ public:
+  typedef typename generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
+};
+
+template <typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false>
+    : public dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type {
+ public:
+  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
+  typedef typename dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
+
+  EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeffRef(0)); }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const {
+    return StrideType::InnerStrideAtCompileTime != 0 ? int(StrideType::InnerStrideAtCompileTime)
+                                                     : derived().nestedExpression().innerStride() *
+                                                           sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const {
+    return StrideType::OuterStrideAtCompileTime != 0 ? int(StrideType::OuterStrideAtCompileTime)
+                                                     : derived().nestedExpression().outerStride() *
+                                                           sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar);
+  }
+
+ protected:
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
+
+  // Allow const access to coeffRef for the case of direct access being enabled.
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const {
+    return internal::evaluator<Derived>(derived()).coeffRef(index);
+  }
+
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const {
+    return internal::evaluator<Derived>(derived()).coeffRef(row, col);
+  }
+};
+
+template <typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, true>
+    : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false> {
+ public:
+  typedef CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false> Base;
+  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
+
+  using Base::data;
+  EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
+    return internal::evaluator<Derived>(derived()).coeffRef(row, col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
+    return internal::evaluator<Derived>(derived()).coeffRef(index);
+  }
+
+ protected:
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
+};
+
+}  // namespace internal
 
 /** \class CwiseUnaryView
  * \ingroup Core_Module
@@ -63,11 +131,11 @@
  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
  */
 template <typename ViewOp, typename MatrixType, typename StrideType>
-class CwiseUnaryView
-    : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind> {
+class CwiseUnaryView : public internal::CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
+                                                           typename internal::traits<MatrixType>::StorageKind> {
  public:
-  typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
-                                      typename internal::traits<MatrixType>::StorageKind>::Base Base;
+  typedef typename internal::CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
+                                                typename internal::traits<MatrixType>::StorageKind>::Base Base;
   EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
   typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
   typedef internal::remove_all_t<MatrixType> NestedExpression;
@@ -94,44 +162,6 @@
   ViewOp m_functor;
 };
 
-// Generic API dispatcher
-template <typename ViewOp, typename XprType, typename StrideType, typename StorageKind>
-class CwiseUnaryViewImpl : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type {
- public:
-  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
-};
-
-template <typename ViewOp, typename MatrixType, typename StrideType>
-class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense>
-    : public internal::dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type {
- public:
-  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
-  typedef typename internal::dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type Base;
-
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
-
-  EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
-  EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const {
-    return StrideType::InnerStrideAtCompileTime != 0
-               ? int(StrideType::InnerStrideAtCompileTime)
-               : derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) /
-                     sizeof(Scalar);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const {
-    return StrideType::OuterStrideAtCompileTime != 0
-               ? int(StrideType::OuterStrideAtCompileTime)
-               : derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) /
-                     sizeof(Scalar);
-  }
-
- protected:
-  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
-};
-
-}  // end namespace Eigen
+}  // namespace Eigen
 
 #endif  // EIGEN_CWISE_UNARY_VIEW_H

diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 3ec6852..1220073 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h

@@ -242,26 +242,18 @@
 
 template <typename Scalar, int Size, int MaxSize>
 struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
-  enum {
-    ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
-    PacketSize = internal::packet_traits<Scalar>::size
-  };
 #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
-  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0,
-                        internal::plain_enum_min(AlignedMax, PacketSize)>
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax>
       m_data;
   EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
 #else
   // Some architectures cannot align on the stack,
   // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
   internal::plain_array<
-      Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + (ForceAlignment ? EIGEN_MAX_ALIGN_BYTES : 0), 0>
+      Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0>
       m_data;
   EIGEN_STRONG_INLINE Scalar* data() {
-    return ForceAlignment
-               ? reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
-                                           EIGEN_MAX_ALIGN_BYTES)
-               : m_data.array;
+    return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES);
   }
 #endif
 };

diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 4b56f0f..58a197f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h

@@ -44,24 +44,30 @@
 
 struct default_packet_traits {
   enum {
+    // Ops that are implemented for most types.
     HasAdd = 1,
     HasSub = 1,
     HasShift = 1,
     HasMul = 1,
     HasNegate = 1,
     HasAbs = 1,
-    HasArg = 0,
     HasAbs2 = 1,
-    HasAbsDiff = 0,
     HasMin = 1,
     HasMax = 1,
     HasConj = 1,
     HasSetLinear = 1,
     HasSign = 1,
+
+    HasArg = 0,
+    HasAbsDiff = 0,
     HasBlend = 0,
     // This flag is used to indicate whether packet comparison is supported.
     // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
     HasCmp = 0,
+    HasRound = 0,
+    HasRint = 0,
+    HasFloor = 0,
+    HasCeil = 0,
 
     HasDiv = 0,
     HasReciprocal = 0,
@@ -73,7 +79,6 @@
     HasLog1p = 0,
     HasLog10 = 0,
     HasPow = 0,
-
     HasSin = 0,
     HasCos = 0,
     HasTan = 0,
@@ -96,12 +101,7 @@
     HasIGammaDerA = 0,
     HasGammaSampleDerAlpha = 0,
     HasIGammac = 0,
-    HasBetaInc = 0,
-
-    HasRound = 0,
-    HasRint = 0,
-    HasFloor = 0,
-    HasCeil = 0
+    HasBetaInc = 0
   };
 };
 

diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h
index 0a02417..454e560 100644
--- a/Eigen/src/Core/IndexedView.h
+++ b/Eigen/src/Core/IndexedView.h

@@ -20,8 +20,8 @@
 template <typename XprType, typename RowIndices, typename ColIndices>
 struct traits<IndexedView<XprType, RowIndices, ColIndices>> : traits<XprType> {
   enum {
-    RowsAtCompileTime = int(array_size<RowIndices>::value),
-    ColsAtCompileTime = int(array_size<ColIndices>::value),
+    RowsAtCompileTime = int(IndexedViewHelper<RowIndices>::SizeAtCompileTime),
+    ColsAtCompileTime = int(IndexedViewHelper<ColIndices>::SizeAtCompileTime),
     MaxRowsAtCompileTime = RowsAtCompileTime,
     MaxColsAtCompileTime = ColsAtCompileTime,
 
@@ -30,8 +30,8 @@
                  : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0
                                                                             : XprTypeIsRowMajor,
 
-    RowIncr = int(get_compile_time_incr<RowIndices>::value),
-    ColIncr = int(get_compile_time_incr<ColIndices>::value),
+    RowIncr = int(IndexedViewHelper<RowIndices>::IncrAtCompileTime),
+    ColIncr = int(IndexedViewHelper<ColIndices>::IncrAtCompileTime),
     InnerIncr = IsRowMajor ? ColIncr : RowIncr,
     OuterIncr = IsRowMajor ? RowIncr : ColIncr,
 
@@ -47,24 +47,23 @@
                     is_same<AllRange<InnerSize>, std::conditional_t<XprTypeIsRowMajor, ColIndices, RowIndices>>::value,
 
     InnerStrideAtCompileTime =
-        InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == UndefinedIncr
+        InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined
             ? Dynamic
             : XprInnerStride * InnerIncr,
     OuterStrideAtCompileTime =
-        OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == UndefinedIncr
+        OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined
             ? Dynamic
             : XprOuterstride * OuterIncr,
 
-    ReturnAsScalar = is_same<RowIndices, SingleRange>::value && is_same<ColIndices, SingleRange>::value,
+    ReturnAsScalar = is_single_range<RowIndices>::value && is_single_range<ColIndices>::value,
     ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
     ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),
 
     // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,
     // but this is too strict regarding negative strides...
-    DirectAccessMask =
-        (int(InnerIncr) != UndefinedIncr && int(OuterIncr) != UndefinedIncr && InnerIncr >= 0 && OuterIncr >= 0)
-            ? DirectAccessBit
-            : 0,
+    DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0)
+                           ? DirectAccessBit
+                           : 0,
     FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
     FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
     FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
@@ -75,11 +74,11 @@
   typedef Block<XprType, RowsAtCompileTime, ColsAtCompileTime, IsInnerPannel> BlockType;
 };
 
-}  // namespace internal
-
-template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
 class IndexedViewImpl;
 
+}  // namespace internal
+
 /** \class IndexedView
  * \ingroup Core_Module
  *
@@ -120,26 +119,43 @@
  */
 template <typename XprType, typename RowIndices, typename ColIndices>
 class IndexedView
-    : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind> {
+    : public internal::IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
+                                       (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags &
+                                        DirectAccessBit) != 0> {
  public:
-  typedef
-      typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base
-          Base;
+  typedef typename internal::IndexedViewImpl<
+      XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
+      (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags & DirectAccessBit) != 0>
+      Base;
   EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView)
   EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
 
+  template <typename T0, typename T1>
+  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
+};
+
+namespace internal {
+
+// Generic API dispatcher
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
+class IndexedViewImpl : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type {
+ public:
+  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type Base;
   typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
   typedef internal::remove_all_t<XprType> NestedExpression;
+  typedef typename XprType::Scalar Scalar;
+
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
 
   template <typename T0, typename T1>
-  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)
+  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices)
       : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) {}
 
   /** \returns number of rows */
-  Index rows() const { return internal::index_list_size(m_rowIndices); }
+  Index rows() const { return IndexedViewHelper<RowIndices>::size(m_rowIndices); }
 
   /** \returns number of columns */
-  Index cols() const { return internal::index_list_size(m_colIndices); }
+  Index cols() const { return IndexedViewHelper<ColIndices>::size(m_colIndices); }
 
   /** \returns the nested expression */
   const internal::remove_all_t<XprType>& nestedExpression() const { return m_xpr; }
@@ -153,20 +169,76 @@
   /** \returns a const reference to the object storing/generating the column indices */
   const ColIndices& colIndices() const { return m_colIndices; }
 
+  constexpr Scalar& coeffRef(Index rowId, Index colId) {
+    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
+  }
+
+  constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
+    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
+  }
+
  protected:
   MatrixTypeNested m_xpr;
   RowIndices m_rowIndices;
   ColIndices m_colIndices;
 };
 
-// Generic API dispatcher
 template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
-class IndexedViewImpl : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type {
+class IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, true>
+    : public IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, false> {
  public:
-  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type Base;
-};
+  using Base = internal::IndexedViewImpl<XprType, RowIndices, ColIndices,
+                                         typename internal::traits<XprType>::StorageKind, false>;
+  using Derived = IndexedView<XprType, RowIndices, ColIndices>;
 
-namespace internal {
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
+
+  template <typename T0, typename T1>
+  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
+
+  Index rowIncrement() const {
+    if (traits<Derived>::RowIncr != DynamicIndex && traits<Derived>::RowIncr != Undefined) {
+      return traits<Derived>::RowIncr;
+    }
+    return IndexedViewHelper<RowIndices>::incr(this->rowIndices());
+  }
+  Index colIncrement() const {
+    if (traits<Derived>::ColIncr != DynamicIndex && traits<Derived>::ColIncr != Undefined) {
+      return traits<Derived>::ColIncr;
+    }
+    return IndexedViewHelper<ColIndices>::incr(this->colIndices());
+  }
+
+  Index innerIncrement() const { return traits<Derived>::IsRowMajor ? colIncrement() : rowIncrement(); }
+
+  Index outerIncrement() const { return traits<Derived>::IsRowMajor ? rowIncrement() : colIncrement(); }
+
+  std::decay_t<typename XprType::Scalar>* data() {
+    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
+    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
+    return this->nestedExpression().data() + row_offset + col_offset;
+  }
+
+  const std::decay_t<typename XprType::Scalar>* data() const {
+    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
+    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
+    return this->nestedExpression().data() + row_offset + col_offset;
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT {
+    if (traits<Derived>::InnerStrideAtCompileTime != Dynamic) {
+      return traits<Derived>::InnerStrideAtCompileTime;
+    }
+    return innerIncrement() * this->nestedExpression().innerStride();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT {
+    if (traits<Derived>::OuterStrideAtCompileTime != Dynamic) {
+      return traits<Derived>::OuterStrideAtCompileTime;
+    }
+    return outerIncrement() * this->nestedExpression().outerStride();
+  }
+};
 
 template <typename ArgType, typename RowIndices, typename ColIndices>
 struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>

diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index c92572f..f907d1e 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h

@@ -164,7 +164,7 @@
   typedef typename NumTraits<Scalar>::Real RealScalar;
   EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast<RealScalar*>(&x)[1]; }
   EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) {
-    return reinterpret_cast<RealScalar*>(&x)[1];
+    return reinterpret_cast<const RealScalar*>(&x)[1];
   }
 };
 
@@ -604,7 +604,6 @@
 struct count_bits_impl {
   static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
                 "BitsType must be an unsigned integer");
-
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     int n = CHAR_BIT * sizeof(BitsType);
     int shift = n / 2;
@@ -655,9 +654,9 @@
 #if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 
 template <typename BitsType>
-struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> {
+struct count_bits_impl<
+    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned int)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
-  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
@@ -669,10 +668,10 @@
 };
 
 template <typename BitsType>
-struct count_bits_impl<
-    BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> {
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned int) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(unsigned long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
-  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
@@ -684,10 +683,10 @@
 };
 
 template <typename BitsType>
-struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) &&
-                                                  sizeof(BitsType) <= sizeof(unsigned long long)>> {
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(unsigned long long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
-  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
@@ -701,9 +700,9 @@
 #elif EIGEN_COMP_MSVC
 
 template <typename BitsType>
-struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> {
+struct count_bits_impl<
+    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
-  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     unsigned long out;
     _BitScanReverse(&out, static_cast<unsigned long>(bits));
@@ -720,10 +719,10 @@
 #ifdef _WIN64
 
 template <typename BitsType>
-struct count_bits_impl<
-    BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> {
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(__int64)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
-  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     unsigned long out;
     _BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
@@ -742,186 +741,27 @@
 #endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 
 template <typename BitsType>
-int log2_ceil(BitsType x) {
-  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
-  bool powerOfTwo = (x & (x - 1)) == 0;
-  return x == 0 ? 0 : powerOfTwo ? n - 1 : n;
+struct log_2_impl {
+  static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT;
+  static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) {
+    const int n = kTotalBits - clz(x);
+    bool power_of_two = (x & (x - 1)) == 0;
+    return x == 0 ? 0 : power_of_two ? (n - 1) : n;
+  }
+  static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) {
+    const int n = kTotalBits - clz(x);
+    return x == 0 ? 0 : n - 1;
+  }
+};
+
+template <typename BitsType>
+int log2_ceil(const BitsType& x) {
+  return log_2_impl<BitsType>::run_ceil(x);
 }
 
 template <typename BitsType>
-int log2_floor(BitsType x) {
-  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
-  return x == 0 ? 0 : n - 1;
-}
-
-/****************************************************************************
- * Implementation of random                                               *
- ****************************************************************************/
-
-// return a Scalar filled with numRandomBits beginning from the least significant bit
-template <typename Scalar>
-Scalar getRandomBits(int numRandomBits) {
-  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
-  enum : int {
-    StdRandBits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value,
-    ScalarBits = sizeof(Scalar) * CHAR_BIT
-  };
-  eigen_assert((numRandomBits >= 0) && (numRandomBits <= ScalarBits));
-  const BitsType mask = BitsType(-1) >> ((ScalarBits - numRandomBits) & (ScalarBits - 1));
-  BitsType randomBits = BitsType(0);
-  for (int shift = 0; shift < numRandomBits; shift += StdRandBits) {
-    int r = std::rand();
-    randomBits |= static_cast<BitsType>(r) << shift;
-  }
-  // clear the excess bits
-  randomBits &= mask;
-  return numext::bit_cast<Scalar, BitsType>(randomBits);
-}
-
-template <typename Scalar, bool IsComplex, bool IsInteger>
-struct random_default_impl {};
-
-template <typename Scalar>
-struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
-
-template <typename Scalar>
-struct random_retval {
-  typedef Scalar type;
-};
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
-
-template <typename Scalar>
-struct random_default_impl<Scalar, false, false> {
-  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
-    Scalar half_x = Scalar(0.5) * x;
-    Scalar half_y = Scalar(0.5) * y;
-    Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits);
-    // result is in the half-open interval [x, y) -- provided that x < y
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
-    return run(x, y, mantissa_bits);
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
-    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
-    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissa_bits);
-    BitsType randomBits = getRandomBits<BitsType>(numRandomBits);
-    // if fewer than MantissaBits is requested, shift them to the left
-    randomBits <<= (mantissa_bits - numRandomBits);
-    // randomBits is in the half-open interval [2,4)
-    randomBits |= numext::bit_cast<BitsType>(Scalar(2));
-    // result is in the half-open interval [-1,1)
-    Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3);
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run() {
-    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
-    return run(mantissa_bits);
-  }
-};
-
-// TODO: fix this for PPC
-template <bool Specialize = sizeof(long double) == 2 * sizeof(uint64_t) && !EIGEN_ARCH_PPC>
-struct random_longdouble_impl {
-  enum : int {
-    Size = sizeof(long double),
-    MantissaBits = NumTraits<long double>::digits() - 1,
-    LowBits = MantissaBits > 64 ? 64 : MantissaBits,
-    HighBits = MantissaBits > 64 ? MantissaBits - 64 : 0
-  };
-  static EIGEN_DEVICE_FUNC inline long double run() {
-    EIGEN_USING_STD(memcpy)
-    uint64_t randomBits[2];
-    long double result = 2.0L;
-    memcpy(&randomBits, &result, Size);
-    randomBits[0] |= getRandomBits<uint64_t>(LowBits);
-    randomBits[1] |= getRandomBits<uint64_t>(HighBits);
-    memcpy(&result, &randomBits, Size);
-    result -= 3.0L;
-    return result;
-  }
-};
-template <>
-struct random_longdouble_impl<false> {
-  using Impl = random_impl<double>;
-  static EIGEN_DEVICE_FUNC inline long double run() { return static_cast<long double>(Impl::run()); }
-};
-
-template <>
-struct random_impl<long double> {
-  static EIGEN_DEVICE_FUNC inline long double run(const long double& x, const long double& y) {
-    long double half_x = 0.5L * x;
-    long double half_y = 0.5L * y;
-    long double result = (half_x + half_y) + (half_y - half_x) * run();
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline long double run() { return random_longdouble_impl<>::run(); }
-};
-
-template <typename Scalar>
-struct random_default_impl<Scalar, false, true> {
-  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
-  enum : int { ScalarBits = sizeof(Scalar) * CHAR_BIT };
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    if (y <= x) return x;
-    const BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x) + 1;
-    // handle edge case where [x,y] spans the entire range of Scalar
-    if (range == 0) return getRandomBits<Scalar>(ScalarBits);
-    // calculate the number of random bits needed to fill range
-    const int numRandomBits = log2_ceil(range);
-    BitsType randomBits;
-    do {
-      randomBits = getRandomBits<BitsType>(numRandomBits);
-      // if the random draw is outside [0, range), try again (rejection sampling)
-      // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50%
-    } while (randomBits >= range);
-    Scalar result = x + static_cast<Scalar>(randomBits);
-    return result;
-  }
-
-  static EIGEN_DEVICE_FUNC inline Scalar run() {
-#ifdef EIGEN_MAKING_DOCS
-    return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
-#else
-    return getRandomBits<Scalar>(ScalarBits);
-#endif
-  }
-};
-
-template <>
-struct random_impl<bool> {
-  static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) {
-    if (y <= x) return x;
-    return run();
-  }
-  static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<int>(1) ? true : false; }
-};
-
-template <typename Scalar>
-struct random_default_impl<Scalar, true, false> {
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag()));
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run() {
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    return Scalar(random<RealScalar>(), random<RealScalar>());
-  }
-};
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) {
-  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
-}
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() {
-  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+int log2_floor(const BitsType& x) {
+  return log_2_impl<BitsType>::run_floor(x);
 }
 
 // Implementation of is* functions
@@ -980,7 +820,7 @@
 template <typename T>
 EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
 template <typename T>
-T generic_fast_tanh_float(const T& a_x);
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x);
 
 /****************************************************************************
  * Implementation of sign                                                 *
@@ -1535,6 +1375,25 @@
   return exp(x);
 }
 
+// MSVC screws up some edge-cases for std::exp(complex).
+#ifdef EIGEN_COMP_MSVC
+template <typename RealScalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<RealScalar> exp(const std::complex<RealScalar>& x) {
+  EIGEN_USING_STD(exp);
+  // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised.
+  // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised.
+  if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::quiet_NaN(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
+  // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
+  if ((real_ref(x) == NumTraits<RealScalar>::infinity() && !(isfinite)(imag_ref(x)))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::infinity(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  return exp(x);
+}
+#endif
+
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
 #endif
@@ -1798,7 +1657,7 @@
 }
 
 #if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); }
 #endif
 
 #if defined(SYCL_DEVICE_ONLY)

diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index ed44089..689c6d8 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h

@@ -146,65 +146,6 @@
   }
 };
 
-/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
-    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-    is accurate up to a couple of ulps in the (approximate) range [-8, 8],
-    outside of which tanh(x) = +/-1 in single precision. The input is clamped
-    to the range [-c, c]. The value c is chosen as the smallest value where
-    the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
-    the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
-
-    This implementation works on both scalars and packets.
-*/
-template <typename T>
-T generic_fast_tanh_float(const T& a_x) {
-  // Clamp the inputs to the range [-c, c]
-#ifdef EIGEN_VECTORIZE_FMA
-  const T plus_clamp = pset1<T>(7.99881172180175781f);
-  const T minus_clamp = pset1<T>(-7.99881172180175781f);
-#else
-  const T plus_clamp = pset1<T>(7.90531110763549805f);
-  const T minus_clamp = pset1<T>(-7.90531110763549805f);
-#endif
-  const T tiny = pset1<T>(0.0004f);
-  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
-  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
-  // The monomial coefficients of the numerator polynomial (odd).
-  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
-  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
-  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
-  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
-  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
-  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
-  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
-
-  // The monomial coefficients of the denominator polynomial (even).
-  const T beta_0 = pset1<T>(4.89352518554385e-03f);
-  const T beta_2 = pset1<T>(2.26843463243900e-03f);
-  const T beta_4 = pset1<T>(1.18534705686654e-04f);
-  const T beta_6 = pset1<T>(1.19825839466702e-06f);
-
-  // Since the polynomials are odd/even, we need x^2.
-  const T x2 = pmul(x, x);
-
-  // Evaluate the numerator polynomial p.
-  T p = pmadd(x2, alpha_13, alpha_11);
-  p = pmadd(x2, p, alpha_9);
-  p = pmadd(x2, p, alpha_7);
-  p = pmadd(x2, p, alpha_5);
-  p = pmadd(x2, p, alpha_3);
-  p = pmadd(x2, p, alpha_1);
-  p = pmul(x, p);
-
-  // Evaluate the denominator polynomial q.
-  T q = pmadd(x2, beta_6, beta_4);
-  q = pmadd(x2, q, beta_2);
-  q = pmadd(x2, q, beta_0);
-
-  // Divide the numerator by the denominator.
-  return pselect(tiny_mask, x, pdiv(p, q));
-}
-
 template <typename RealScalar>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) {
   // IEEE IEC 6059 special cases.

diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index ce0e4e6..af6afaf 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h

@@ -30,7 +30,7 @@
     actual_alignment = ((Options_ & DontAlign) == 0) ? default_alignment : 0,
     required_alignment = unpacket_traits<PacketScalar>::alignment,
     packet_access_bit = (packet_traits<Scalar_>::Vectorizable &&
-                         (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment >= required_alignment)))
+                         (EIGEN_UNALIGNED_VECTORIZE || (int(actual_alignment) >= int(required_alignment))))
                             ? PacketAccessBit
                             : 0
   };
@@ -48,7 +48,7 @@
     Flags = compute_matrix_flags(Options_),
     Options = Options_,
     InnerStrideAtCompileTime = 1,
-    OuterStrideAtCompileTime = (Options & RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+    OuterStrideAtCompileTime = (int(Options) & int(RowMajor)) ? ColsAtCompileTime : RowsAtCompileTime,
 
     // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
     EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
@@ -207,7 +207,7 @@
    *
    * \callgraph
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) { return Base::_set(other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); }
 
   /** \internal
    * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
@@ -250,17 +250,18 @@
    *
    * \sa resize(Index,Index)
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix()
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix()
       : Base(){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
 
         // FIXME is it still needed
-        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(internal::constructor_without_unaligned_array_assert)
+        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit Matrix(
+            internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
 
-        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(Matrix && other)
+        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix && other)
             EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
       : Base(std::move(other)) {}
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(Matrix&& other)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other)
       EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
     Base::operator=(std::move(other));
     return *this;

diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index f9bf737..5f846a0 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h

@@ -31,6 +31,7 @@
 
 namespace internal {
 
+#ifndef EIGEN_NO_DEBUG
 template <int MaxSizeAtCompileTime, int MaxRowsAtCompileTime, int MaxColsAtCompileTime>
 struct check_rows_cols_for_overflow {
   EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime* MaxColsAtCompileTime == MaxSizeAtCompileTime,
@@ -44,7 +45,7 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = cols > MaxIndex / MaxRowsAtCompileTime;
+    bool error = cols > (MaxIndex / MaxRowsAtCompileTime);
     if (error) throw_std_bad_alloc();
   }
 };
@@ -54,7 +55,7 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = rows > MaxIndex / MaxColsAtCompileTime;
+    bool error = rows > (MaxIndex / MaxColsAtCompileTime);
     if (error) throw_std_bad_alloc();
   }
 };
@@ -64,10 +65,11 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = cols == 0 ? false : (rows > MaxIndex / cols);
+    bool error = cols == 0 ? false : (rows > (MaxIndex / cols));
     if (error) throw_std_bad_alloc();
   }
 };
+#endif
 
 template <typename Derived, typename OtherDerived = Derived,
           bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
@@ -297,8 +299,10 @@
                  internal::check_implication(ColsAtCompileTime == Dynamic && MaxColsAtCompileTime != Dynamic,
                                              cols <= MaxColsAtCompileTime) &&
                  rows >= 0 && cols >= 0 && "Invalid sizes when resizing a matrix or array.");
+#ifndef EIGEN_NO_DEBUG
     internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(rows,
                                                                                                                   cols);
+#endif
 #ifdef EIGEN_INITIALIZE_COEFFS
     Index size = rows * cols;
     bool size_changed = size != this->size();
@@ -367,8 +371,10 @@
   template <typename OtherDerived>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other) {
     const OtherDerived& other = _other.derived();
+#ifndef EIGEN_NO_DEBUG
     internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(
         other.rows(), other.cols());
+#endif
     const Index othersize = other.rows() * other.cols();
     if (RowsAtCompileTime == 1) {
       eigen_assert(other.rows() == 1 || other.cols() == 1);
@@ -446,7 +452,9 @@
   /** This is a special case of the templated operator=. Its purpose is to
    * prevent a default operator= from hiding the templated operator=.
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) {
+    return _set(other);
+  }
 
   /** \sa MatrixBase::lazyAssign() */
   template <typename OtherDerived>
@@ -464,28 +472,29 @@
   // Prevent user from trying to instantiate PlainObjectBase objects
   // by making all its constructor protected. See bug 1074.
  protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() {
     //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
   }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   // FIXME is it still needed ?
   /** \internal */
-  EIGEN_DEVICE_FUNC explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+  EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert()) {
     // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
   }
 #endif
 
-  EIGEN_DEVICE_FUNC PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT : m_storage(std::move(other.m_storage)) {}
+  EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
+      : m_storage(std::move(other.m_storage)) {}
 
-  EIGEN_DEVICE_FUNC PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
+  EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
     m_storage = std::move(other.m_storage);
     return *this;
   }
 
   /** Copy constructor */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other)
       : Base(), m_storage(other.m_storage) {}
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
       : m_storage(size, rows, cols) {
@@ -743,7 +752,7 @@
   // aliasing is dealt once in internal::call_assignment
   // so at this stage we have to assume aliasing... and resising has to be done later.
   template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase<OtherDerived>& other) {
     internal::call_assignment(this->derived(), other.derived());
     return this->derived();
   }
@@ -754,7 +763,7 @@
    * \sa operator=(const MatrixBase<OtherDerived>&), _set()
    */
   template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
     // I don't think we need this resize call since the lazyAssign will anyways resize
     // and lazyAssign will be called by the assign selector.
     //_resize_to_match(other);
@@ -941,8 +950,10 @@
         ((Derived::IsRowMajor && _this.cols() == cols) ||  // row-major and we change only the number of rows
          (!Derived::IsRowMajor && _this.rows() == rows)))  // column-major and we change only the number of columns
     {
+#ifndef EIGEN_NO_DEBUG
       internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime, Derived::MaxRowsAtCompileTime,
                                              Derived::MaxColsAtCompileTime>::run(rows, cols);
+#endif
       _this.derived().m_storage.conservativeResize(rows * cols, rows, cols);
     } else {
       // The storage order does not allow us to use reallocation.

diff --git a/Eigen/src/Core/RandomImpl.h b/Eigen/src/Core/RandomImpl.h
new file mode 100644
index 0000000..445376c
--- /dev/null
+++ b/Eigen/src/Core/RandomImpl.h

@@ -0,0 +1,253 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2024 Charles Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RANDOM_IMPL_H
+#define EIGEN_RANDOM_IMPL_H
+
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+/****************************************************************************
+ * Implementation of random                                               *
+ ****************************************************************************/
+
+template <typename Scalar, bool IsComplex, bool IsInteger>
+struct random_default_impl {};
+
+template <typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template <typename Scalar>
+struct random_retval {
+  typedef Scalar type;
+};
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) {
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() {
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+}
+
+// TODO: replace or provide alternatives to this, e.g. std::random_device
+struct eigen_random_device {
+  using ReturnType = int;
+  static constexpr int Entropy = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value;
+  static constexpr ReturnType Highest = RAND_MAX;
+  static EIGEN_DEVICE_FUNC inline ReturnType run() { return std::rand(); };
+};
+
+// Fill a built-in unsigned integer with numRandomBits beginning with the least significant bit
+template <typename Scalar>
+struct random_bits_impl {
+  EIGEN_STATIC_ASSERT(std::is_unsigned<Scalar>::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER)
+  using RandomDevice = eigen_random_device;
+  using RandomReturnType = typename RandomDevice::ReturnType;
+  static constexpr int kEntropy = RandomDevice::Entropy;
+  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
+  // return a Scalar filled with numRandomBits beginning from the least significant bit
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits));
+    const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1));
+    Scalar randomBits = 0;
+    for (int shift = 0; shift < numRandomBits; shift += kEntropy) {
+      RandomReturnType r = RandomDevice::run();
+      randomBits |= static_cast<Scalar>(r) << shift;
+    }
+    // clear the excess bits
+    randomBits &= mask;
+    return randomBits;
+  }
+};
+
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline BitsType getRandomBits(int numRandomBits) {
+  return random_bits_impl<BitsType>::run(numRandomBits);
+}
+
+// random implementation for a built-in floating point type
+template <typename Scalar, bool BuiltIn = std::is_floating_point<Scalar>::value>
+struct random_float_impl {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() {
+    const int digits = NumTraits<Scalar>::digits();
+    return digits - 1;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
+    BitsType randomBits = getRandomBits<BitsType>(numRandomBits);
+    // if fewer than MantissaBits is requested, shift them to the left
+    randomBits <<= (mantissaBits() - numRandomBits);
+    // randomBits is in the half-open interval [2,4)
+    randomBits |= numext::bit_cast<BitsType>(Scalar(2));
+    // result is in the half-open interval [-1,1)
+    Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3);
+    return result;
+  }
+};
+// random implementation for a custom floating point type
+// uses double as the implementation with a mantissa with a size equal to either the target scalar's mantissa or that of
+// double, whichever is smaller
+template <typename Scalar>
+struct random_float_impl<Scalar, false> {
+  static EIGEN_DEVICE_FUNC inline int mantissaBits() {
+    const int digits = NumTraits<Scalar>::digits();
+    constexpr int kDoubleDigits = NumTraits<double>::digits();
+    return numext::mini(digits, kDoubleDigits) - 1;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
+    Scalar result = static_cast<Scalar>(random_float_impl<double>::run(numRandomBits));
+    return result;
+  }
+};
+
+// random implementation for long double
+// this specialization is not compatible with double-double scalars
+template <bool Specialize = (sizeof(long double) == 2 * sizeof(uint64_t)) &&
+                            ((std::numeric_limits<long double>::digits != (2 * std::numeric_limits<double>::digits)))>
+struct random_longdouble_impl {
+  static constexpr int Size = sizeof(long double);
+  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits<long double>::digits() - 1; }
+  static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) {
+    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
+    EIGEN_USING_STD(memcpy);
+    int numLowBits = numext::mini(numRandomBits, 64);
+    int numHighBits = numext::maxi(numRandomBits - 64, 0);
+    uint64_t randomBits[2];
+    long double result = 2.0L;
+    memcpy(&randomBits, &result, Size);
+    randomBits[0] |= getRandomBits<uint64_t>(numLowBits);
+    randomBits[1] |= getRandomBits<uint64_t>(numHighBits);
+    memcpy(&result, &randomBits, Size);
+    result -= 3.0L;
+    return result;
+  }
+};
+template <>
+struct random_longdouble_impl<false> {
+  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits<double>::digits() - 1; }
+  static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) {
+    return static_cast<long double>(random_float_impl<double>::run(numRandomBits));
+  }
+};
+template <>
+struct random_float_impl<long double> : random_longdouble_impl<> {};
+
+template <typename Scalar>
+struct random_default_impl<Scalar, false, false> {
+  using Impl = random_float_impl<Scalar>;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
+    Scalar half_x = Scalar(0.5) * x;
+    Scalar half_y = Scalar(0.5) * y;
+    Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits);
+    // result is in the half-open interval [x, y) -- provided that x < y
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    return run(x, y, Impl::mantissaBits());
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { return Impl::run(numRandomBits); }
+  static EIGEN_DEVICE_FUNC inline Scalar run() { return run(Impl::mantissaBits()); }
+};
+
+template <typename Scalar, bool IsSigned = NumTraits<Scalar>::IsSigned, bool BuiltIn = std::is_integral<Scalar>::value>
+struct random_int_impl;
+
+// random implementation for a built-in unsigned integer type
+template <typename Scalar>
+struct random_int_impl<Scalar, false, true> {
+  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    if (y <= x) return x;
+    Scalar range = y - x;
+    // handle edge case where [x,y] spans the entire range of Scalar
+    if (range == NumTraits<Scalar>::highest()) return run();
+    Scalar count = range + 1;
+    // calculate the number of random bits needed to fill range
+    int numRandomBits = log2_ceil(count);
+    Scalar randomBits;
+    do {
+      randomBits = getRandomBits<Scalar>(numRandomBits);
+      // if the random draw is outside [0, range), try again (rejection sampling)
+      // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50%
+    } while (randomBits >= count);
+    Scalar result = x + randomBits;
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() { return getRandomBits<Scalar>(kTotalBits); }
+};
+
+// random implementation for a built-in signed integer type
+template <typename Scalar>
+struct random_int_impl<Scalar, true, true> {
+  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
+  using BitsType = typename make_unsigned<Scalar>::type;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    if (y <= x) return x;
+    // Avoid overflow by representing `range` as an unsigned type
+    BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x);
+    BitsType randomBits = random_int_impl<BitsType>::run(0, range);
+    // Avoid overflow in the case where `x` is negative and there is a large range so
+    // `randomBits` would also be negative if cast to `Scalar` first.
+    Scalar result = static_cast<Scalar>(static_cast<BitsType>(x) + randomBits);
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() { return static_cast<Scalar>(getRandomBits<BitsType>(kTotalBits)); }
+};
+
+// todo: custom integers
+template <typename Scalar, bool IsSigned>
+struct random_int_impl<Scalar, IsSigned, false> {
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&, const Scalar&) { return run(); }
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
+    eigen_assert(std::false_type::value && "RANDOM FOR CUSTOM INTEGERS NOT YET SUPPORTED");
+    return Scalar(0);
+  }
+};
+
+template <typename Scalar>
+struct random_default_impl<Scalar, false, true> : random_int_impl<Scalar> {};
+
+template <>
+struct random_impl<bool> {
+  static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) {
+    if (y <= x) return x;
+    return run();
+  }
+  static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<unsigned>(1) ? true : false; }
+};
+
+template <typename Scalar>
+struct random_default_impl<Scalar, true, false> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  using Impl = random_impl<RealScalar>;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
+    return Scalar(Impl::run(x.real(), y.real(), numRandomBits), Impl::run(x.imag(), y.imag(), numRandomBits));
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    return Scalar(Impl::run(x.real(), y.real()), Impl::run(x.imag(), y.imag()));
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    return Scalar(Impl::run(numRandomBits), Impl::run(numRandomBits));
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() { return Scalar(Impl::run(), Impl::run()); }
+};
+
+}  // namespace internal
+}  // namespace Eigen
+
+#endif  // EIGEN_RANDOM_IMPL_H

diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index afdb242..2b1683b 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h

@@ -184,7 +184,8 @@
   enum {
     Mode = Mode_,
     Flags = internal::traits<TriangularView>::Flags,
-    TransposeMode = (Mode & Upper ? Lower : 0) | (Mode & Lower ? Upper : 0) | (Mode & (UnitDiag)) | (Mode & (ZeroDiag)),
+    TransposeMode = (int(Mode) & int(Upper) ? Lower : 0) | (int(Mode) & int(Lower) ? Upper : 0) |
+                    (int(Mode) & int(UnitDiag)) | (int(Mode) & int(ZeroDiag)),
     IsVectorAtCompileTime = false
   };
 

diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 6a8bee8..bae5714 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h

@@ -41,6 +41,7 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
+    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -443,6 +444,11 @@
   return plog_complex<Packet4cf>(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
+  return pexp_complex<Packet4cf>(a);
+}
+
 }  // end namespace internal
 
 }  // end namespace Eigen

diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index d752f06..2383e46 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h

@@ -270,9 +270,7 @@
 template <>
 struct packet_traits<int64_t> : default_packet_traits {
   typedef Packet4l type;
-  // There is no half-size packet for current Packet4l.
-  // TODO: support as SSE path.
-  typedef Packet4l half;
+  typedef Packet2l half;
   enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 4 };
 };
 template <>
@@ -332,6 +330,9 @@
 struct unpacket_traits<Packet4d> {
   typedef double type;
   typedef Packet2d half;
+#ifdef EIGEN_VECTORIZE_AVX2
+  typedef Packet4l integer_packet;
+#endif
   enum {
     size = 4,
     alignment = Aligned32,
@@ -368,7 +369,7 @@
 template <>
 struct unpacket_traits<Packet4l> {
   typedef int64_t type;
-  typedef Packet4l half;
+  typedef Packet2l half;
   enum {
     size = 4,
     alignment = Aligned32,
@@ -561,7 +562,7 @@
 }
 template <int N>
 EIGEN_STRONG_INLINE std::enable_if_t<(N == 63), Packet4l> parithmetic_shift_right(Packet4l a) {
-  return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask));
+  return _mm256_cmpgt_epi64(_mm256_setzero_si256(), a);
 }
 template <int N>
 EIGEN_STRONG_INLINE std::enable_if_t<(N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) {
@@ -623,22 +624,22 @@
 template <>
 EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet4l>(int64_t* to, const Packet4l& from, Index stride) {
   __m128i low = _mm256_extractf128_si256(from, 0);
-  to[stride * 0] = _mm_extract_epi64(low, 0);
-  to[stride * 1] = _mm_extract_epi64(low, 1);
+  to[stride * 0] = _mm_extract_epi64_0(low);
+  to[stride * 1] = _mm_extract_epi64_1(low);
 
   __m128i high = _mm256_extractf128_si256(from, 1);
-  to[stride * 2] = _mm_extract_epi64(high, 0);
-  to[stride * 3] = _mm_extract_epi64(high, 1);
+  to[stride * 2] = _mm_extract_epi64_0(high);
+  to[stride * 3] = _mm_extract_epi64_1(high);
 }
 template <>
 EIGEN_DEVICE_FUNC inline void pscatter<uint64_t, Packet4ul>(uint64_t* to, const Packet4ul& from, Index stride) {
   __m128i low = _mm256_extractf128_si256(from, 0);
-  to[stride * 0] = _mm_extract_epi64(low, 0);
-  to[stride * 1] = _mm_extract_epi64(low, 1);
+  to[stride * 0] = _mm_extract_epi64_0(low);
+  to[stride * 1] = _mm_extract_epi64_1(low);
 
   __m128i high = _mm256_extractf128_si256(from, 1);
-  to[stride * 2] = _mm_extract_epi64(high, 0);
-  to[stride * 3] = _mm_extract_epi64(high, 1);
+  to[stride * 2] = _mm_extract_epi64_0(high);
+  to[stride * 3] = _mm_extract_epi64_1(high);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore1<Packet4l>(int64_t* to, const int64_t& a) {
@@ -652,21 +653,21 @@
 }
 template <>
 EIGEN_STRONG_INLINE int64_t pfirst<Packet4l>(const Packet4l& a) {
-  return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
+  return _mm_extract_epi64_0(_mm256_castsi256_si128(a));
 }
 template <>
 EIGEN_STRONG_INLINE uint64_t pfirst<Packet4ul>(const Packet4ul& a) {
-  return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
+  return _mm_extract_epi64_0(_mm256_castsi256_si128(a));
 }
 template <>
 EIGEN_STRONG_INLINE int64_t predux<Packet4l>(const Packet4l& a) {
   __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
-  return _mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1);
+  return _mm_extract_epi64_0(r) + _mm_extract_epi64_1(r);
 }
 template <>
 EIGEN_STRONG_INLINE uint64_t predux<Packet4ul>(const Packet4ul& a) {
   __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
-  return numext::bit_cast<uint64_t>(_mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1));
+  return numext::bit_cast<uint64_t>(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r));
 }
 #define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
@@ -1803,14 +1804,12 @@
 // pabs should be ok
 template <>
 EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a) {
-  const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
-                                                              0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF));
+  const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
   return _mm256_and_ps(a, mask);
 }
 template <>
 EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) {
-  const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF,
-                                                              0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF));
+  const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFF));
   return _mm256_and_pd(a, mask);
 }
 template <>
@@ -1830,28 +1829,32 @@
 
 template <>
 EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) {
-  return _mm_srai_epi16(a, 15);
+  return _mm_cmpgt_epi16(_mm_setzero_si128(), a);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) {
-  return _mm_srai_epi16(a, 15);
+  return _mm_cmpgt_epi16(_mm_setzero_si128(), a);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8f psignbit(const Packet8f& a) {
-  return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a)));
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(_mm256_setzero_si256(), _mm256_castps_si256(a)));
+#else
+  return _mm256_castsi256_ps(parithmetic_shift_right<31>(Packet8i(_mm256_castps_si256(a))));
+#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& a) {
-  return pzero(a);
+  return _mm256_setzero_si256();
 }
 #ifdef EIGEN_VECTORIZE_AVX2
 template <>
 EIGEN_STRONG_INLINE Packet4d psignbit(const Packet4d& a) {
-  return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a)));
+  return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(a)));
 }
 template <>
 EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& a) {
-  return pzero(a);
+  return _mm256_setzero_si256();
 }
 #endif
 

diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index 3688f8d..9dcd6ef 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h

@@ -47,6 +47,13 @@
 struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};
 template <>
 struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
+
+#ifdef EIGEN_VECTORIZE_AVX2
+template <>
+struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
+template <>
+struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
+#endif
 #endif
 
 template <>
@@ -189,6 +196,63 @@
 
 #ifdef EIGEN_VECTORIZE_AVX2
 template <>
+EIGEN_STRONG_INLINE Packet4l pcast<Packet4d, Packet4l>(const Packet4d& a) {
+#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
+  return _mm256_cvttpd_epi64(a);
+#else
+
+  // if 'a' exceeds the numerical limits of int64_t, the behavior is undefined
+
+  // e <= 0 corresponds to |a| < 1, which should result in zero. incidentally, intel intrinsics with shift arguments
+  // greater than or equal to 64 produce zero. furthermore, negative shifts appear to be interpreted as large positive
+  // shifts (two's complement), which also result in zero. therefore, e does not need to be clamped to [0, 64)
+
+  constexpr int kTotalBits = sizeof(double) * CHAR_BIT, kMantissaBits = std::numeric_limits<double>::digits - 1,
+                kExponentBits = kTotalBits - kMantissaBits - 1, kBias = (1 << (kExponentBits - 1)) - 1;
+
+  const __m256i cst_one = _mm256_set1_epi64x(1);
+  const __m256i cst_total_bits = _mm256_set1_epi64x(kTotalBits);
+  const __m256i cst_bias = _mm256_set1_epi64x(kBias);
+
+  __m256i a_bits = _mm256_castpd_si256(a);
+  // shift left by 1 to clear the sign bit, and shift right by kMantissaBits + 1 to recover biased exponent
+  __m256i biased_e = _mm256_srli_epi64(_mm256_slli_epi64(a_bits, 1), kMantissaBits + 1);
+  __m256i e = _mm256_sub_epi64(biased_e, cst_bias);
+
+  // shift to the left by kExponentBits + 1 to clear the sign and exponent bits
+  __m256i shifted_mantissa = _mm256_slli_epi64(a_bits, kExponentBits + 1);
+  // shift to the right by kTotalBits - e to convert the significand to an integer
+  __m256i result_significand = _mm256_srlv_epi64(shifted_mantissa, _mm256_sub_epi64(cst_total_bits, e));
+
+  // add the implied bit
+  __m256i result_exponent = _mm256_sllv_epi64(cst_one, e);
+  // e <= 0 is interpreted as a large positive shift (2's complement), which also conveniently results in zero
+  __m256i result = _mm256_add_epi64(result_significand, result_exponent);
+  // handle negative arguments
+  __m256i sign_mask = _mm256_cmpgt_epi64(_mm256_setzero_si256(), a_bits);
+  result = _mm256_sub_epi64(_mm256_xor_si256(result, sign_mask), sign_mask);
+  return result;
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
+#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
+  return _mm256_cvtepi64_pd(a);
+#else
+  EIGEN_ALIGN16 int64_t aux[4];
+  pstore(aux, a);
+  return _mm256_set_pd(static_cast<double>(aux[3]), static_cast<double>(aux[2]), static_cast<double>(aux[1]),
+                       static_cast<double>(aux[0]));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
+  return _mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
+}
+
+template <>
 EIGEN_STRONG_INLINE Packet4ul preinterpret<Packet4ul, Packet4l>(const Packet4l& a) {
   return Packet4ul(a);
 }
@@ -198,6 +262,21 @@
   return Packet4l(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4d>(const Packet4d& a) {
+  return _mm256_castpd_si256(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet4l>(const Packet4l& a) {
+  return _mm256_castsi256_pd(a);
+}
+
+// truncation operations
+template <>
+EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet4l>(const Packet4l& a) {
+  return _mm256_castsi256_si128(a);
+}
 #endif
 
 template <>

diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index c14b4a0..b70c7fe 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h

@@ -40,6 +40,7 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
+    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -460,6 +461,11 @@
   return plog_complex<Packet8cf>(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet8cf pexp<Packet8cf>(const Packet8cf& a) {
+  return pexp_complex<Packet8cf>(a);
+}
+
 }  // end namespace internal
 }  // end namespace Eigen
 

diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index b6d2d98..ed2f189 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h

@@ -34,6 +34,7 @@
 typedef __m512 Packet16f;
 typedef __m512i Packet16i;
 typedef __m512d Packet8d;
+// TODO(rmlarsen): Add support for Packet8l.
 #ifndef EIGEN_VECTORIZE_AVX512FP16
 typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
 #endif

diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index 131e6f1..fc11174 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h

@@ -1,870 +1,870 @@
-// This file is part of Eigen, a lightweight C++ template library

-// for linear algebra.

-//

-//

-//

-// This Source Code Form is subject to the terms of the Mozilla

-// Public License v. 2.0. If a copy of the MPL was not distributed

-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

-

-#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H

-#define EIGEN_PACKET_MATH_FP16_AVX512_H

-

-// IWYU pragma: private

-#include "../../InternalHeaderCheck.h"

-

-namespace Eigen {

-

-namespace internal {

-

-typedef __m512h Packet32h;

-typedef eigen_packet_wrapper<__m256i, 1> Packet16h;

-typedef eigen_packet_wrapper<__m128i, 2> Packet8h;

-

-template <>

-struct is_arithmetic<Packet8h> {

-  enum { value = true };

-};

-

-template <>

-struct packet_traits<half> : default_packet_traits {

-  typedef Packet32h type;

-  typedef Packet16h half;

-  enum {

-    Vectorizable = 1,

-    AlignedOnScalar = 1,

-    size = 32,

-

-    HasCmp = 1,

-    HasAdd = 1,

-    HasSub = 1,

-    HasMul = 1,

-    HasDiv = 1,

-    HasNegate = 1,

-    HasAbs = 1,

-    HasAbs2 = 0,

-    HasMin = 1,

-    HasMax = 1,

-    HasConj = 1,

-    HasSetLinear = 0,

-    HasLog = 1,

-    HasLog1p = 1,

-    HasExp = 1,

-    HasExpm1 = 1,

-    HasSqrt = 1,

-    HasRsqrt = 1,

-    // These ones should be implemented in future

-    HasBessel = 0,

-    HasNdtri = 0,

-    HasSin = EIGEN_FAST_MATH,

-    HasCos = EIGEN_FAST_MATH,

-    HasTanh = EIGEN_FAST_MATH,

-    HasErf = 0,  // EIGEN_FAST_MATH,

-    HasBlend = 0,

-    HasRound = 1,

-    HasFloor = 1,

-    HasCeil = 1,

-    HasRint = 1

-  };

-};

-

-template <>

-struct unpacket_traits<Packet32h> {

-  typedef Eigen::half type;

-  typedef Packet16h half;

-  enum {

-    size = 32,

-    alignment = Aligned64,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-template <>

-struct unpacket_traits<Packet16h> {

-  typedef Eigen::half type;

-  typedef Packet8h half;

-  enum {

-    size = 16,

-    alignment = Aligned32,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-template <>

-struct unpacket_traits<Packet8h> {

-  typedef Eigen::half type;

-  typedef Packet8h half;

-  enum {

-    size = 8,

-    alignment = Aligned16,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-// Memory functions

-

-// pset1

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {

-  return _mm512_set1_ph(static_cast<_Float16>(from));

-}

-

-// pset1frombits

-template <>

-EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {

-  return _mm512_castsi512_ph(_mm512_set1_epi16(from));

-}

-

-// pfirst

-

-template <>

-EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {

-#ifdef EIGEN_VECTORIZE_AVX512DQ

-  return half_impl::raw_uint16_to_half(

-      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));

-#else

-  Eigen::half dest[32];

-  _mm512_storeu_ph(dest, from);

-  return dest[0];

-#endif

-}

-

-// pload

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {

-  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);

-}

-

-// ploadu

-

-template <>

-EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {

-  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);

-}

-

-// pstore

-

-template <>

-EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {

-  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);

-}

-

-// pstoreu

-

-template <>

-EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {

-  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);

-}

-

-// ploaddup

-template <>

-EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {

-  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));

-  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,

-                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),

-                               a);

-}

-

-// ploadquad

-template <>

-EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {

-  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));

-  return _mm512_permutexvar_ph(

-      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),

-      a);

-}

-

-// pabs

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {

-  return _mm512_abs_ph(a);

-}

-

-// psignbit

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {

-  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));

-}

-

-// pmin

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_min_ph(a, b);

-}

-

-// pmax

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_max_ph(a, b);

-}

-

-// plset

-template <>

-EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {

-  return _mm512_add_ph(_mm512_set1_ph(a),

-                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,

-                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,

-                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));

-}

-

-// por

-

-template <>

-EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pxor

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pand

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pandnot

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));

-}

-

-// pselect

-

-template <>

-EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);

-  return _mm512_mask_blend_ph(mask32, a, b);

-}

-

-// pcmp_eq

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

-}

-

-// pcmp_le

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

-}

-

-// pcmp_lt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

-}

-

-// pcmp_lt_or_nan

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));

-}

-

-// padd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_add_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// psub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_sub_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pmul

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_mul_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pdiv

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_div_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pround

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {

-  // Work-around for default std::round rounding mode.

-

-  // Mask for the sign bit

-  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));

-  // The largest half-preicision float less than 0.5

-  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));

-

-  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);

-}

-

-// print

-

-template <>

-EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);

-}

-

-// pceil

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);

-}

-

-// pfloor

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);

-}

-

-// predux

-template <>

-EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {

-  return (half)_mm512_reduce_add_ph(a);

-}

-

-template <>

-EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {

-  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));

-}

-

-template <>

-EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {

-  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));

-}

-

-// predux_half_dowto4

-template <>

-EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {

-#ifdef EIGEN_VECTORIZE_AVX512DQ

-  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));

-  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));

-

-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

-#else

-  Eigen::half data[32];

-  _mm512_storeu_ph(data, a);

-

-  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));

-  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));

-

-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

-#endif

-}

-

-// predux_max

-

-// predux_min

-

-// predux_mul

-

-#ifdef EIGEN_VECTORIZE_FMA

-

-// pmadd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fmadd_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pmsub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fmsub_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pnmadd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fnmadd_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pnmsub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fnmsub_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-#endif

-

-// pnegate

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {

-  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);

-}

-

-// pconj

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {

-  return a;

-}

-

-// psqrt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {

-  return _mm512_sqrt_ph(a);

-}

-

-// prsqrt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {

-  return _mm512_rsqrt_ph(a);

-}

-

-// preciprocal

-

-template <>

-EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {

-  return _mm512_rcp_ph(a);

-}

-

-// ptranspose

-

-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {

-  __m512i t[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 16; i++) {

-    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

-    t[2 * i + 1] =

-        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

-  }

-

-  __m512i p[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 8; i++) {

-    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);

-    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);

-    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);

-    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);

-  }

-

-  __m512i q[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 4; i++) {

-    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);

-    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);

-    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);

-    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);

-    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);

-    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);

-    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);

-    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);

-  }

-

-  __m512i f[32];

-

-#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \

-  do {                                                                                              \

-    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \

-    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \

-    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \

-    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \

-    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \

-    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \

-    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \

-    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \

-  } while (false);

-

-  PACKET32H_TRANSPOSE_HELPER(0, 0);

-  PACKET32H_TRANSPOSE_HELPER(1, 1);

-  PACKET32H_TRANSPOSE_HELPER(2, 2);

-  PACKET32H_TRANSPOSE_HELPER(3, 3);

-

-  PACKET32H_TRANSPOSE_HELPER(1, 0);

-  PACKET32H_TRANSPOSE_HELPER(2, 0);

-  PACKET32H_TRANSPOSE_HELPER(3, 0);

-  PACKET32H_TRANSPOSE_HELPER(2, 1);

-  PACKET32H_TRANSPOSE_HELPER(3, 1);

-  PACKET32H_TRANSPOSE_HELPER(3, 2);

-

-  PACKET32H_TRANSPOSE_HELPER(0, 1);

-  PACKET32H_TRANSPOSE_HELPER(0, 2);

-  PACKET32H_TRANSPOSE_HELPER(0, 3);

-  PACKET32H_TRANSPOSE_HELPER(1, 2);

-  PACKET32H_TRANSPOSE_HELPER(1, 3);

-  PACKET32H_TRANSPOSE_HELPER(2, 3);

-

-#undef PACKET32H_TRANSPOSE_HELPER

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 32; i++) {

-    a.packet[i] = _mm512_castsi512_ph(f[i]);

-  }

-}

-

-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {

-  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;

-  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

-  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

-  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

-  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

-

-  p0 = _mm512_unpacklo_epi32(t0, t2);

-  p1 = _mm512_unpackhi_epi32(t0, t2);

-  p2 = _mm512_unpacklo_epi32(t1, t3);

-  p3 = _mm512_unpackhi_epi32(t1, t3);

-

-  a0 = p0;

-  a1 = p1;

-  a2 = p2;

-  a3 = p3;

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);

-

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);

-

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);

-

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);

-

-  a.packet[0] = _mm512_castsi512_ph(a0);

-  a.packet[1] = _mm512_castsi512_ph(a1);

-  a.packet[2] = _mm512_castsi512_ph(a2);

-  a.packet[3] = _mm512_castsi512_ph(a3);

-}

-

-// preverse

-

-template <>

-EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {

-  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,

-                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),

-                               a);

-}

-

-// pscatter

-

-template <>

-EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {

-  EIGEN_ALIGN64 half aux[32];

-  pstore(aux, from);

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 32; i++) {

-    to[stride * i] = aux[i];

-  }

-}

-

-// pgather

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {

-  return _mm512_castsi512_ph(_mm512_set_epi16(

-      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,

-      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,

-      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,

-      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,

-      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,

-      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,

-      from[1 * stride].x, from[0 * stride].x));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);

-

-EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {

-  __m512d result = _mm512_undefined_pd();

-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);

-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);

-  return _mm512_castpd_ph(result);

-}

-

-EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {

-  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));

-  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));

-}

-

-// psin

-template <>

-EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = psin(low);

-  Packet16h highOut = psin(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pcos

-template <>

-EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pcos(low);

-  Packet16h highOut = pcos(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog

-template <>

-EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog(low);

-  Packet16h highOut = plog(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog2

-template <>

-EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog2(low);

-  Packet16h highOut = plog2(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog1p

-template <>

-EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog1p(low);

-  Packet16h highOut = plog1p(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pexp(low);

-  Packet16h highOut = pexp(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pexpm1

-template <>

-EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pexpm1(low);

-  Packet16h highOut = pexpm1(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// ptanh

-template <>

-EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = ptanh(low);

-  Packet16h highOut = ptanh(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pfrexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h exp1 = _mm256_undefined_si256();

-  Packet16h exp2 = _mm256_undefined_si256();

-

-  Packet16h lowOut = pfrexp(low, exp1);

-  Packet16h highOut = pfrexp(high, exp2);

-

-  exponent = combine2Packet16h(exp1, exp2);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pldexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h exp1;

-  Packet16h exp2;

-  extract2Packet16h(exponent, exp1, exp2);

-

-  Packet16h lowOut = pldexp(low, exp1);

-  Packet16h highOut = pldexp(high, exp2);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-}  // end namespace internal

-}  // end namespace Eigen

-

-#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H

+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+//
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
+#define EIGEN_PACKET_MATH_FP16_AVX512_H
+
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+typedef __m512h Packet32h;
+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
+
+template <>
+struct is_arithmetic<Packet8h> {
+  enum { value = true };
+};
+
+template <>
+struct packet_traits<half> : default_packet_traits {
+  typedef Packet32h type;
+  typedef Packet16h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 32,
+
+    HasCmp = 1,
+    HasAdd = 1,
+    HasSub = 1,
+    HasMul = 1,
+    HasDiv = 1,
+    HasNegate = 1,
+    HasAbs = 1,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 1,
+    HasSetLinear = 0,
+    HasLog = 1,
+    HasLog1p = 1,
+    HasExp = 1,
+    HasExpm1 = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    // These ones should be implemented in future
+    HasBessel = 0,
+    HasNdtri = 0,
+    HasSin = EIGEN_FAST_MATH,
+    HasCos = EIGEN_FAST_MATH,
+    HasTanh = EIGEN_FAST_MATH,
+    HasErf = 0,  // EIGEN_FAST_MATH,
+    HasBlend = 0,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasRint = 1
+  };
+};
+
+template <>
+struct unpacket_traits<Packet32h> {
+  typedef Eigen::half type;
+  typedef Packet16h half;
+  enum {
+    size = 32,
+    alignment = Aligned64,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet16h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 16,
+    alignment = Aligned32,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet8h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 8,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+// Memory functions
+
+// pset1
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
+  return _mm512_set1_ph(static_cast<_Float16>(from));
+}
+
+// pset1frombits
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
+  return _mm512_castsi512_ph(_mm512_set1_epi16(from));
+}
+
+// pfirst
+
+template <>
+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return half_impl::raw_uint16_to_half(
+      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
+#else
+  Eigen::half dest[32];
+  _mm512_storeu_ph(dest, from);
+  return dest[0];
+#endif
+}
+
+// pload
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
+}
+
+// ploadu
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
+}
+
+// pstore
+
+template <>
+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
+}
+
+// pstoreu
+
+template <>
+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
+}
+
+// ploaddup
+template <>
+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
+  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
+                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
+                               a);
+}
+
+// ploadquad
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
+  return _mm512_permutexvar_ph(
+      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
+      a);
+}
+
+// pabs
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
+  return _mm512_abs_ph(a);
+}
+
+// psignbit
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
+  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
+}
+
+// pmin
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_min_ph(a, b);
+}
+
+// pmax
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_max_ph(a, b);
+}
+
+// plset
+template <>
+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
+  return _mm512_add_ph(_mm512_set1_ph(a),
+                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
+                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
+                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+}
+
+// por
+
+template <>
+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pxor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pand
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pandnot
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
+}
+
+// pselect
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+  return _mm512_mask_blend_ph(mask32, a, b);
+}
+
+// pcmp_eq
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_le
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_lt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
+}
+
+// pcmp_lt_or_nan
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
+}
+
+// padd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_add_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// psub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_sub_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pmul
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_mul_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pdiv
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_div_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pround
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
+  // Work-around for default std::round rounding mode.
+
+  // Mask for the sign bit
+  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
+  // The largest half-preicision float less than 0.5
+  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
+
+  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
+}
+
+// print
+
+template <>
+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
+}
+
+// pceil
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
+}
+
+// pfloor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
+}
+
+// predux
+template <>
+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
+  return (half)_mm512_reduce_add_ph(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
+  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
+  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
+}
+
+// predux_half_dowto4
+template <>
+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
+  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#else
+  Eigen::half data[32];
+  _mm512_storeu_ph(data, a);
+
+  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
+  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#endif
+}
+
+// predux_max
+
+// predux_min
+
+// predux_mul
+
+#ifdef EIGEN_VECTORIZE_FMA
+
+// pmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+#endif
+
+// pnegate
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
+  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);
+}
+
+// pconj
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
+  return a;
+}
+
+// psqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_sqrt_ph(a);
+}
+
+// prsqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_rsqrt_ph(a);
+}
+
+// preciprocal
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
+  return _mm512_rcp_ph(a);
+}
+
+// ptranspose
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
+  __m512i t[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 16; i++) {
+    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+    t[2 * i + 1] =
+        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+  }
+
+  __m512i p[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 8; i++) {
+    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
+    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
+  }
+
+  __m512i q[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 4; i++) {
+    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
+    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
+  }
+
+  __m512i f[32];
+
+#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \
+  do {                                                                                              \
+    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \
+    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
+    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
+    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
+    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
+    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
+    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
+    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
+  } while (false);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 0);
+  PACKET32H_TRANSPOSE_HELPER(1, 1);
+  PACKET32H_TRANSPOSE_HELPER(2, 2);
+  PACKET32H_TRANSPOSE_HELPER(3, 3);
+
+  PACKET32H_TRANSPOSE_HELPER(1, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 0);
+  PACKET32H_TRANSPOSE_HELPER(3, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 2);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 1);
+  PACKET32H_TRANSPOSE_HELPER(0, 2);
+  PACKET32H_TRANSPOSE_HELPER(0, 3);
+  PACKET32H_TRANSPOSE_HELPER(1, 2);
+  PACKET32H_TRANSPOSE_HELPER(1, 3);
+  PACKET32H_TRANSPOSE_HELPER(2, 3);
+
+#undef PACKET32H_TRANSPOSE_HELPER
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    a.packet[i] = _mm512_castsi512_ph(f[i]);
+  }
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
+  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
+  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+
+  p0 = _mm512_unpacklo_epi32(t0, t2);
+  p1 = _mm512_unpackhi_epi32(t0, t2);
+  p2 = _mm512_unpacklo_epi32(t1, t3);
+  p3 = _mm512_unpackhi_epi32(t1, t3);
+
+  a0 = p0;
+  a1 = p1;
+  a2 = p2;
+  a3 = p3;
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
+
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
+
+  a.packet[0] = _mm512_castsi512_ph(a0);
+  a.packet[1] = _mm512_castsi512_ph(a1);
+  a.packet[2] = _mm512_castsi512_ph(a2);
+  a.packet[3] = _mm512_castsi512_ph(a3);
+}
+
+// preverse
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
+  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+                               a);
+}
+
+// pscatter
+
+template <>
+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
+  EIGEN_ALIGN64 half aux[32];
+  pstore(aux, from);
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    to[stride * i] = aux[i];
+  }
+}
+
+// pgather
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
+  return _mm512_castsi512_ph(_mm512_set_epi16(
+      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
+      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
+      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
+      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
+      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
+      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
+      from[1 * stride].x, from[0 * stride].x));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
+
+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
+  __m512d result = _mm512_undefined_pd();
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
+  return _mm512_castpd_ph(result);
+}
+
+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
+  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
+  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
+}
+
+// psin
+template <>
+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = psin(low);
+  Packet16h highOut = psin(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pcos
+template <>
+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pcos(low);
+  Packet16h highOut = pcos(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog
+template <>
+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog(low);
+  Packet16h highOut = plog(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog2
+template <>
+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog2(low);
+  Packet16h highOut = plog2(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog1p
+template <>
+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog1p(low);
+  Packet16h highOut = plog1p(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexp(low);
+  Packet16h highOut = pexp(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexpm1
+template <>
+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexpm1(low);
+  Packet16h highOut = pexpm1(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// ptanh
+template <>
+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = ptanh(low);
+  Packet16h highOut = ptanh(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pfrexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1 = _mm256_undefined_si256();
+  Packet16h exp2 = _mm256_undefined_si256();
+
+  Packet16h lowOut = pfrexp(low, exp1);
+  Packet16h highOut = pfrexp(high, exp2);
+
+  exponent = combine2Packet16h(exp1, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pldexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1;
+  Packet16h exp2;
+  extract2Packet16h(exponent, exp1, exp2);
+
+  Packet16h lowOut = pldexp(low, exp1);
+  Packet16h highOut = pldexp(high, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H

diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index e3c4436..0252efa 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h

@@ -99,6 +99,7 @@
     HasMax = 0,
     HasSqrt = 1,
     HasLog = 1,
+    HasExp = 1,
 #ifdef EIGEN_VECTORIZE_VSX
     HasBlend = 1,
 #endif
@@ -375,6 +376,11 @@
   return plog_complex<Packet2cf>(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
+  return pexp_complex<Packet2cf>(a);
+}
+
 //---------- double ----------
 #ifdef EIGEN_VECTORIZE_VSX
 struct Packet1cd {

diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index a4b134c..6a2f0e6 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h

@@ -94,9 +94,7 @@
 
 static Packet16uc p16uc_REVERSE32 = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
 static Packet16uc p16uc_REVERSE16 = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
-#ifndef _ARCH_PWR9
 static Packet16uc p16uc_REVERSE8 = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
-#endif
 
 #ifdef _BIG_ENDIAN
 static Packet16uc p16uc_DUPLICATE32_HI = {0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7};
@@ -1928,19 +1926,11 @@
 }
 template <>
 EIGEN_STRONG_INLINE Packet16c preverse(const Packet16c& a) {
-#ifdef _ARCH_PWR9
-  return vec_revb(a);
-#else
   return vec_perm(a, a, p16uc_REVERSE8);
-#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet16uc preverse(const Packet16uc& a) {
-#ifdef _ARCH_PWR9
-  return vec_revb(a);
-#else
   return vec_perm(a, a, p16uc_REVERSE8);
-#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a) {

diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 118426f..78dbf20 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h

@@ -555,7 +555,7 @@
   return float(double(int64_t(p)) * pio2_62);
 }
 
-template <bool ComputeSine, typename Packet>
+template <bool ComputeSine, typename Packet, bool ComputeBoth = false>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 #if EIGEN_COMP_GNUC_STRICT
     __attribute__((optimize("-fno-unsafe-math-optimizations")))
@@ -669,10 +669,21 @@
   y2 = pmadd(y2, x, x);
 
   // Select the correct result from the two polynomials.
-  y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2);
-
+  if (ComputeBoth) {
+    Packet peven = peven_mask(x);
+    Packet ysin = pselect(poly_mask, y2, y1);
+    Packet ycos = pselect(poly_mask, y1, y2);
+    Packet sign_bit_sin = pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)));
+    Packet sign_bit_cos = preinterpret<Packet>(plogical_shift_left<30>(padd(y_int, csti_1)));
+    sign_bit_sin = pand(sign_bit_sin, cst_sign_mask);  // clear all but left most bit
+    sign_bit_cos = pand(sign_bit_cos, cst_sign_mask);  // clear all but left most bit
+    y = pselect(peven, pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos));
+  } else {
+    y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2);
+    y = pxor(y, sign_bit);
+  }
   // Update the sign and filter huge inputs
-  return pxor(y, sign_bit);
+  return y;
 }
 
 template <typename Packet>
@@ -917,6 +928,65 @@
   return pxor(p, x_signmask);
 }
 
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+    is accurate up to a couple of ulps in the (approximate) range [-8, 8],
+    outside of which tanh(x) = +/-1 in single precision. The input is clamped
+    to the range [-c, c]. The value c is chosen as the smallest value where
+    the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
+    the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
+
+    This implementation works on both scalars and packets.
+*/
+template <typename T>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) {
+  // Clamp the inputs to the range [-c, c]
+#ifdef EIGEN_VECTORIZE_FMA
+  const T plus_clamp = pset1<T>(7.99881172180175781f);
+  const T minus_clamp = pset1<T>(-7.99881172180175781f);
+#else
+  const T plus_clamp = pset1<T>(7.90531110763549805f);
+  const T minus_clamp = pset1<T>(-7.90531110763549805f);
+#endif
+  const T tiny = pset1<T>(0.0004f);
+  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
+  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(4.89352518554385e-03f);
+  const T beta_2 = pset1<T>(2.26843463243900e-03f);
+  const T beta_4 = pset1<T>(1.18534705686654e-04f);
+  const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial q.
+  T q = pmadd(x2, beta_6, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pselect(tiny_mask, x, pdiv(p, q));
+}
+
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) {
   typedef typename unpacket_traits<Packet>::type Scalar;
@@ -993,6 +1063,49 @@
 }
 
 template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::as_real RealPacket;
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  typedef typename Scalar::value_type RealScalar;
+  const RealPacket even_mask = peven_mask(a.v);
+  const RealPacket odd_mask = pcplxflip(Packet(even_mask)).v;
+
+  // Let a = x + iy.
+  // exp(a) = exp(x) * cis(y), plus some special edge-case handling.
+
+  // exp(x):
+  RealPacket x = pand(a.v, even_mask);
+  x = por(x, pcplxflip(Packet(x)).v);
+  RealPacket expx = pexp(x);  // exp(x);
+
+  // cis(y):
+  RealPacket y = pand(odd_mask, a.v);
+  y = por(y, pcplxflip(Packet(y)).v);
+  RealPacket cisy = psincos_float<false, RealPacket, true>(y);
+  cisy = pcplxflip(Packet(cisy)).v;  // cos(y) + i * sin(y)
+
+  const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
+  const RealPacket cst_neg_inf = pset1<RealPacket>(-NumTraits<RealScalar>::infinity());
+
+  // If x is -inf, we know that cossin(y) is bounded,
+  //   so the result is (0, +/-0), where the sign of the imaginary part comes
+  //   from the sign of cossin(y).
+  RealPacket cisy_sign = por(pandnot(cisy, pabs(cisy)), pset1<RealPacket>(RealScalar(1)));
+  cisy = pselect(pcmp_eq(x, cst_neg_inf), cisy_sign, cisy);
+
+  // If x is inf, and cos(y) has unknown sign (y is inf or NaN), the result
+  // is (+/-inf, NaN), where the signs are undetermined (take the sign of y).
+  RealPacket y_sign = por(pandnot(y, pabs(y)), pset1<RealPacket>(RealScalar(1)));
+  cisy = pselect(pand(pcmp_eq(x, cst_pos_inf), pisnan(cisy)), pand(y_sign, even_mask), cisy);
+  Packet result = Packet(pmul(expx, cisy));
+
+  // If y is +/- 0, the input is real, so take the real result for consistency.
+  result = pselect(Packet(pcmp_eq(y, pzero(y))), Packet(por(pand(expx, even_mask), pand(y, odd_mask))), result);
+
+  return result;
+}
+
+template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet& a) {
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename Scalar::value_type RealScalar;

diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index 960bb67..9560de2 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h

@@ -98,6 +98,10 @@
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x);
 
+/** \internal \returns tanh(x) for single precision float */
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x);
+
 /** \internal \returns atanh(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x);
@@ -117,6 +121,10 @@
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x);
 
+/** \internal \returns exp(x) for complex types */
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& x);
+
 // Macros for instantiating these generic functions for different backends.
 #define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET)                                             \
   template <>                                                                                     \
@@ -133,6 +141,7 @@
   EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET)                                                    \
+  EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET)                                                   \
   EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET)                                                     \
   EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET)                                                    \
@@ -144,10 +153,6 @@
   template <>                                                                                  \
   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p<PACKET>(const PACKET& _x) { \
     return internal::generic_plog1p(_x);                                                       \
-  }                                                                                            \
-  template <>                                                                                  \
-  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh<PACKET>(const PACKET& _x) {  \
-    return internal::generic_fast_tanh_float(_x);                                              \
   }
 
 #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \

diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 22c7765..5257c03 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h

@@ -63,6 +63,7 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
+    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -447,6 +448,16 @@
   return plog_complex(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet1cf pexp<Packet1cf>(const Packet1cf& a) {
+  return pexp_complex(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
+  return pexp_complex(a);
+}
+
 //---------- double ----------
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
 

diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 76c3a05..0e70f03 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h

@@ -43,6 +43,7 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
+    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -424,6 +425,11 @@
   return plog_complex<Packet2cf>(a);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
+  return pexp_complex<Packet2cf>(a);
+}
+
 }  // end namespace internal
 }  // end namespace Eigen
 

diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index bdbf759..008109a 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h

@@ -52,6 +52,7 @@
 typedef eigen_packet_wrapper<__m128i, 0> Packet4i;
 typedef eigen_packet_wrapper<__m128i, 1> Packet16b;
 typedef eigen_packet_wrapper<__m128i, 4> Packet4ui;
+typedef eigen_packet_wrapper<__m128i, 5> Packet2l;
 
 template <>
 struct is_arithmetic<__m128> {
@@ -69,6 +70,10 @@
 struct is_arithmetic<Packet4i> {
   enum { value = true };
 };
+template <>
+struct is_arithmetic<Packet2l> {
+  enum { value = true };
+};
 // Note that `Packet4ui` uses the underlying type `__m128i`, which is
 // interpreted as a vector of _signed_ `int32`s, which breaks some arithmetic
 // operations used in `GenericPacketMath.h`.
@@ -140,6 +145,27 @@
 
 #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1<Packet4ui>(X)
 
+// Work around lack of extract/cvt for epi64 when compiling for 32-bit.
+#if EIGEN_ARCH_x86_64
+EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); }
+#ifdef EIGEN_VECTORIZE_SSE4_1
+EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); }
+#else
+EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
+  return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
+}
+#endif
+#else
+// epi64 instructions are not available.  The following seems to generate the same instructions
+// with -O2 in GCC/Clang.
+EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) {
+  return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
+}
+EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
+  return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
+}
+#endif
+
 // Use the packet_traits defined in AVX/PacketMath.h instead if we're going
 // to leverage AVX instructions.
 #ifndef EIGEN_VECTORIZE_AVX
@@ -213,10 +239,10 @@
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    HasCmp = 1,
-    HasDiv = 1,
     size = 4,
 
+    HasCmp = 1,
+    HasDiv = 1,
     HasShift = 1,
     HasBlend = 1
   };
@@ -232,10 +258,22 @@
 
     HasDiv = 0,
     HasNegate = 0,
-    HasSqrt = 0,
     HasCmp = 1,
-    HasMin = 1,
-    HasMax = 1,
+    HasShift = 1,
+    HasBlend = 1
+  };
+};
+template <>
+struct packet_traits<int64_t> : default_packet_traits {
+  typedef Packet2l type;
+  typedef Packet2l half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+
+    HasDiv = 0,
+    HasCmp = 1,
     HasShift = 1,
     HasBlend = 1
   };
@@ -250,12 +288,8 @@
     AlignedOnScalar = 1,
     size = 16,
 
-    HasAdd = 1,
-    HasSub = 1,
     HasCmp = 1,  // note -- only pcmp_eq is defined
     HasShift = 0,
-    HasMul = 1,
-    HasNegate = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -283,6 +317,19 @@
 struct unpacket_traits<Packet2d> {
   typedef double type;
   typedef Packet2d half;
+  typedef Packet2l integer_packet;
+  enum {
+    size = 2,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+template <>
+struct unpacket_traits<Packet2l> {
+  typedef int64_t type;
+  typedef Packet2l half;
   enum {
     size = 2,
     alignment = Aligned16,
@@ -348,6 +395,10 @@
   return _mm_set1_pd(from);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pset1<Packet2l>(const int64_t& from) {
+  return _mm_set1_epi64x(from);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
   return _mm_set1_epi32(from);
 }
@@ -374,6 +425,10 @@
   return _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1));
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l peven_mask(const Packet2l& /*a*/) {
+  return _mm_set_epi32(0, 0, -1, -1);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i peven_mask(const Packet4i& /*a*/) {
   return _mm_set_epi32(0, -1, 0, -1);
 }
@@ -395,6 +450,10 @@
   return _mm_setzero_pd();
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pzero(const Packet2l& /*a*/) {
+  return _mm_setzero_si128();
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pzero(const Packet4i& /*a*/) {
   return _mm_setzero_si128();
 }
@@ -424,6 +483,10 @@
   return _mm_add_pd(pset1<Packet2d>(a), _mm_set_pd(1, 0));
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l plset<Packet2l>(const int64_t& a) {
+  return _mm_add_epi32(pset1<Packet2l>(a), _mm_set_epi64x(1, 0));
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) {
   return _mm_add_epi32(pset1<Packet4i>(a), _mm_set_epi32(3, 2, 1, 0));
 }
@@ -441,6 +504,10 @@
   return _mm_add_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l padd<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_add_epi64(a, b);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_add_epi32(a, b);
 }
@@ -474,6 +541,10 @@
   return _mm_sub_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l psub<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_sub_epi64(a, b);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_sub_epi32(a, b);
 }
@@ -521,8 +592,13 @@
   return _mm_xor_pd(a, mask);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pnegate(const Packet2l& a) {
+  return psub(pzero(a), a);
+}
+
+template <>
 EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
-  return psub(Packet4i(_mm_setr_epi32(0, 0, 0, 0)), a);
+  return psub(pzero(a), a);
 }
 
 template <>
@@ -539,6 +615,10 @@
   return a;
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pconj(const Packet2l& a) {
+  return a;
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
   return a;
 }
@@ -552,6 +632,21 @@
   return _mm_mul_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pmul<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  // 64-bit mul requires avx512, so do this with 32-bit multiplication
+  __m128i upper32_a = _mm_srli_epi64(a, 32);
+  __m128i upper32_b = _mm_srli_epi64(b, 32);
+
+  // upper * lower
+  __m128i mul1 = _mm_mul_epu32(upper32_a, b);
+  __m128i mul2 = _mm_mul_epu32(upper32_b, a);
+  // Gives us both upper*upper and lower*lower
+  __m128i mul3 = _mm_mul_epu32(a, b);
+
+  __m128i high = _mm_slli_epi64(_mm_add_epi64(mul1, mul2), 32);
+  return _mm_add_epi64(high, mul3);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_mullo_epi32(a, b);
@@ -602,15 +697,6 @@
 #endif
 }
 
-// for some weird raisons, it has to be overloaded for packet of integers
-template <>
-EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
-  return padd(pmul(a, b), c);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui pmadd(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c) {
-  return padd(pmul(a, b), c);
-}
 #ifdef EIGEN_VECTORIZE_FMA
 template <>
 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
@@ -659,27 +745,36 @@
 
 #ifdef EIGEN_VECTORIZE_SSE4_1
 template <>
-EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
+EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
   return _mm_blendv_ps(b, a, mask);
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {
+EIGEN_STRONG_INLINE Packet2l pselect(const Packet2l& mask, const Packet2l& a, const Packet2l& b) {
+  return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(b), _mm_castsi128_pd(a), _mm_castsi128_pd(mask)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {
   return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask)));
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) {
+EIGEN_STRONG_INLINE Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) {
   return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask)));
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {
+EIGEN_STRONG_INLINE Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {
   return _mm_blendv_pd(b, a, mask);
 }
 #endif
 
 template <>
+EIGEN_STRONG_INLINE Packet2l ptrue<Packet2l>(const Packet2l& a) {
+  return _mm_cmpeq_epi32(a, a);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) {
   return _mm_cmpeq_epi32(a, a);
 }
@@ -707,6 +802,10 @@
   return _mm_and_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pand<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_and_si128(a, b);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_and_si128(a, b);
 }
@@ -728,6 +827,10 @@
   return _mm_or_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l por<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_or_si128(a, b);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_or_si128(a, b);
 }
@@ -749,6 +852,10 @@
   return _mm_xor_pd(a, b);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pxor<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_xor_si128(a, b);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_xor_si128(a, b);
 }
@@ -770,6 +877,10 @@
   return _mm_andnot_pd(b, a);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  return _mm_andnot_si128(b, a);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_andnot_si128(b, a);
 }
@@ -811,7 +922,6 @@
 EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) {
   return _mm_cmpeq_pd(a, b);
 }
-
 template <>
 EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) {
   return _mm_cmplt_epi32(a, b);
@@ -821,8 +931,35 @@
   return _mm_cmpeq_epi32(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) {
-  return _mm_cmpeq_epi32(a, b);
+EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
+  return por(pcmp_lt(a, b), pcmp_eq(a, b));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) {
+#ifdef EIGEN_VECTORIZE_SSE4_2
+  return _mm_cmpgt_epi64(b, a);
+#else
+  Packet4i eq = pcmp_eq<Packet4i>(Packet4i(a), Packet4i(b));
+  Packet2l hi_eq = Packet2l(_mm_shuffle_epi32(eq, (shuffle_mask<1, 1, 3, 3>::mask)));
+  Packet4i lt = pcmp_lt<Packet4i>(Packet4i(a), Packet4i(b));
+  Packet2l hi_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<1, 1, 3, 3>::mask)));
+  Packet2l lo_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<0, 0, 2, 2>::mask)));
+  // return hi(a) < hi(b) || (hi(a) == hi(b) && lo(a) < lo(b))
+  return por(hi_lt, pand(hi_eq, lo_lt));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_cmpeq_epi64(a, b);
+#else
+  Packet4i tmp = pcmp_eq<Packet4i>(Packet4i(a), Packet4i(b));
+  return Packet2l(pand<Packet4i>(tmp, _mm_shuffle_epi32(tmp, (shuffle_mask<1, 0, 3, 2>::mask))));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l pcmp_le(const Packet2l& a, const Packet2l& b) {
+  return por(pcmp_lt(a, b), pcmp_eq(a, b));
 }
 template <>
 EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) {
@@ -831,8 +968,8 @@
   return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask);
 }
 template <>
-EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
-  return por(pcmp_lt(a, b), pcmp_eq(a, b));
+EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) {
+  return _mm_cmpeq_epi32(a, b);
 }
 
 template <>
@@ -876,6 +1013,11 @@
 #endif
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pmin<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  Packet2l a_lt_mask = pcmp_lt(a, b);
+  return por(pandnot(b, a_lt_mask), pand(a, a_lt_mask));
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_min_epi32(a, b);
@@ -937,6 +1079,11 @@
 #endif
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pmax<Packet2l>(const Packet2l& a, const Packet2l& b) {
+  Packet2l a_lt_mask = pcmp_lt(a, b);
+  return por(pandnot(a, a_lt_mask), pand(b, a_lt_mask));
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_max_epi32(a, b);
@@ -1028,6 +1175,46 @@
   return pminmax_propagate_nan(a, b, pmax<Packet2d>);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
+  return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
+  Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
+#ifdef EIGEN_VECTORIZE_AVX
+  return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#else
+  return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#endif  // EIGEN_VECTORIZE_AVX
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i psignbit(const Packet4i& a) {
+  return _mm_srai_epi32(a, 31);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) {
+  return pzero(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l psignbit(const Packet2l& a) {
+  Packet4i tmp = psignbit<Packet4i>(Packet4i(a));
+  return Packet2l(_mm_shuffle_epi32(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+}
+
+template <int N>
+EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) {
+  Packet2l signbit = psignbit(a);
+  return por(_mm_slli_epi64(signbit, 64 - N), _mm_srli_epi64(a, N));
+}
+template <int N>
+EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
+  return _mm_srli_epi64(a, N);
+}
+template <int N>
+EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
+  return _mm_slli_epi64(a, N);
+}
 template <int N>
 EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) {
   return _mm_srai_epi32(a, N);
@@ -1040,7 +1227,6 @@
 EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) {
   return _mm_slli_epi32(a, N);
 }
-
 template <int N>
 EIGEN_STRONG_INLINE Packet4ui parithmetic_shift_right(const Packet4ui& a) {
   return _mm_srli_epi32(a, N);
@@ -1065,12 +1251,17 @@
   return _mm_and_pd(a, mask);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) {
+  Packet2l signbit = psignbit(a);
+  return _mm_sub_epi64(_mm_xor_si128(a, signbit), signbit);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
 #ifdef EIGEN_VECTORIZE_SSSE3
   return _mm_abs_epi32(a);
 #else
-  Packet4i aux = _mm_srai_epi32(a, 31);
-  return _mm_sub_epi32(_mm_xor_si128(a, aux), aux);
+  Packet4i signbit = psignbit(a);
+  return _mm_sub_epi32(_mm_xor_si128(a, signbit), signbit);
 #endif
 }
 template <>
@@ -1078,24 +1269,6 @@
   return a;
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
-  return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31));
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
-  Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
-#ifdef EIGEN_VECTORIZE_AVX
-  return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
-#else
-  return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
-#endif  // EIGEN_VECTORIZE_AVX
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) {
-  return pzero(a);
-}
-
 #ifdef EIGEN_VECTORIZE_SSE4_1
 template <>
 EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
@@ -1217,6 +1390,10 @@
   EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l pload<Packet2l>(const int64_t* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) {
   EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from));
 }
@@ -1251,6 +1428,11 @@
   return _mm_loadu_pd(from);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l ploadu<Packet2l>(const int64_t* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) {
   EIGEN_DEBUG_UNALIGNED_LOAD
   return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
@@ -1299,6 +1481,10 @@
   return pset1<Packet2d>(from[0]);
 }
 template <>
+EIGEN_STRONG_INLINE Packet2l ploaddup<Packet2l>(const int64_t* from) {
+  return pset1<Packet2l>(from[0]);
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) {
   Packet4i tmp;
   tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
@@ -1337,6 +1523,10 @@
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from);
 }
 template <>
+EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet2l& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from);
+}
+template <>
 EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from);
 }
@@ -1358,6 +1548,10 @@
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from);
 }
 template <>
+EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet2l& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
+}
+template <>
 EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
 }
@@ -1393,25 +1587,142 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
+EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+  return _mm_shuffle_ps(a, a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
+  return _mm_shuffle_pd(a, a, 0x1);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l preverse(const Packet2l& a) {
+  return _mm_castpd_si128(preverse(_mm_castsi128_pd(a)));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+  return _mm_shuffle_epi32(a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) {
+  return _mm_shuffle_epi32(a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {
+#ifdef EIGEN_VECTORIZE_SSSE3
+  __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+  return _mm_shuffle_epi8(a, mask);
+#else
+  Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
+  tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
+  return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
+#endif
+}
+
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+// Direct of the struct members fixed bug #62.
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  return a.m128_f32[0];
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  return a.m128d_f64[0];
+}
+template <>
+EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
+  int64_t x = _mm_extract_epi64_0(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+  return x;
+}
+#elif EIGEN_COMP_MSVC_STRICT
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  float x = _mm_cvtss_f32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  double x = _mm_cvtsd_f64(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
+  int64_t x = _mm_extract_epi64_0(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+  return x;
+}
+#else
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  return _mm_cvtss_f32(a);
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  return _mm_cvtsd_f64(a);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
+  return _mm_extract_epi64_0(a);
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  return _mm_cvtsi128_si32(a);
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return static_cast<bool>(x & 1);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
   return _mm_set_ps(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
+EIGEN_STRONG_INLINE Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
   return _mm_set_pd(from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) {
+EIGEN_STRONG_INLINE Packet2l pgather<int64_t, Packet2l>(const int64_t* from, Index stride) {
+  return _mm_set_epi64x(from[1 * stride], from[0 * stride]);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i pgather<int, Packet4i>(const int* from, Index stride) {
   return _mm_set_epi32(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_DEVICE_FUNC inline Packet4ui pgather<uint32_t, Packet4ui>(const uint32_t* from, Index stride) {
+EIGEN_STRONG_INLINE Packet4ui pgather<uint32_t, Packet4ui>(const uint32_t* from, Index stride) {
   return _mm_set_epi32(numext::bit_cast<int32_t>(from[3 * stride]), numext::bit_cast<int32_t>(from[2 * stride]),
                        numext::bit_cast<int32_t>(from[1 * stride]), numext::bit_cast<int32_t>(from[0 * stride]));
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline Packet16b pgather<bool, Packet16b>(const bool* from, Index stride) {
+EIGEN_STRONG_INLINE Packet16b pgather<bool, Packet16b>(const bool* from, Index stride) {
   return _mm_set_epi8(from[15 * stride], from[14 * stride], from[13 * stride], from[12 * stride], from[11 * stride],
                       from[10 * stride], from[9 * stride], from[8 * stride], from[7 * stride], from[6 * stride],
                       from[5 * stride], from[4 * stride], from[3 * stride], from[2 * stride], from[1 * stride],
@@ -1419,33 +1730,38 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
-  to[stride * 0] = _mm_cvtss_f32(from);
-  to[stride * 1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
-  to[stride * 2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
-  to[stride * 3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+EIGEN_STRONG_INLINE void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
+  to[stride * 0] = pfirst(from);
+  to[stride * 1] = pfirst(_mm_shuffle_ps(from, from, 1));
+  to[stride * 2] = pfirst(_mm_shuffle_ps(from, from, 2));
+  to[stride * 3] = pfirst(_mm_shuffle_ps(from, from, 3));
 }
 template <>
-EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
-  to[stride * 0] = _mm_cvtsd_f64(from);
-  to[stride * 1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+EIGEN_STRONG_INLINE void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
+  to[stride * 0] = pfirst(from);
+  to[stride * 1] = pfirst(preverse(from));
 }
 template <>
-EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) {
+EIGEN_STRONG_INLINE void pscatter<int64_t, Packet2l>(int64_t* to, const Packet2l& from, Index stride) {
+  to[stride * 0] = pfirst(from);
+  to[stride * 1] = pfirst(preverse(from));
+}
+template <>
+EIGEN_STRONG_INLINE void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) {
   to[stride * 0] = _mm_cvtsi128_si32(from);
   to[stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
   to[stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
   to[stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
 }
 template <>
-EIGEN_DEVICE_FUNC inline void pscatter<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index stride) {
+EIGEN_STRONG_INLINE void pscatter<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index stride) {
   to[stride * 0] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(from));
   to[stride * 1] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)));
   to[stride * 2] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)));
   to[stride * 3] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)));
 }
 template <>
-EIGEN_DEVICE_FUNC inline void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) {
+EIGEN_STRONG_INLINE void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) {
   to[4 * stride * 0] = _mm_cvtsi128_si32(from);
   to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
   to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
@@ -1485,106 +1801,15 @@
   _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
 }
 template <>
+EIGEN_STRONG_INLINE void prefetch<int64_t>(const int64_t* addr) {
+  _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
+}
+template <>
 EIGEN_STRONG_INLINE void prefetch<uint32_t>(const uint32_t* addr) {
   _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
 }
 #endif
 
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
-// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
-// Direct of the struct members fixed bug #62.
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  return a.m128_f32[0];
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  return a.m128d_f64[0];
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-  return x;
-}
-#elif EIGEN_COMP_MSVC_STRICT
-// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  float x = _mm_cvtss_f32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  double x = _mm_cvtsd_f64(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-  return x;
-}
-#else
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  return _mm_cvtss_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  return _mm_cvtsd_f64(a);
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  return _mm_cvtsi128_si32(a);
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-}
-#endif
-template <>
-EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return static_cast<bool>(x & 1);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
-  return _mm_shuffle_ps(a, a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
-  return _mm_shuffle_pd(a, a, 0x1);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
-  return _mm_shuffle_epi32(a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) {
-  return _mm_shuffle_epi32(a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {
-#ifdef EIGEN_VECTORIZE_SSSE3
-  __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-  return _mm_shuffle_epi8(a, mask);
-#else
-  Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
-  tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
-  return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
-#endif
-}
-
 template <>
 EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
   return pfrexp_generic(a, exponent);
@@ -1610,6 +1835,7 @@
 
 // We specialize pldexp here, since the generic implementation uses Packet2l, which is not well
 // supported by SSE, and has more range than is needed for exponents.
+// TODO(rmlarsen): Remove this specialization once Packet2l has support or casting.
 template <>
 EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
   // Clamp exponent to [-2099, 2099]
@@ -1690,6 +1916,11 @@
   // #endif
 }
 
+template <>
+EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a) {
+  return pfirst<Packet2l>(_mm_add_epi64(a, _mm_unpackhi_epi64(a, a)));
+}
+
 #ifdef EIGEN_VECTORIZE_SSSE3
 template <>
 EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
@@ -1701,7 +1932,6 @@
   Packet4ui tmp0 = _mm_hadd_epi32(a, a);
   return pfirst<Packet4ui>(_mm_hadd_epi32(tmp0, tmp0));
 }
-
 #else
 template <>
 EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
@@ -1734,9 +1964,15 @@
   return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a, a)));
 }
 template <>
+EIGEN_STRONG_INLINE int64_t predux_mul<Packet2l>(const Packet2l& a) {
+  EIGEN_ALIGN16 int64_t aux[2];
+  pstore(aux, a);
+  return aux[0] * aux[1];
+}
+template <>
 EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) {
   // after some experiments, it is seems this is the fastest way to implement it
-  // for GCC (eg., reusing pmul is very slow !)
+  // for GCC (e.g., reusing pmul is very slow!)
   // TODO try to call _mm_mul_epu32 directly
   EIGEN_ALIGN16 int aux[4];
   pstore(aux, a);
@@ -1847,11 +2083,21 @@
 // }
 
 template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet2d& x) {
+  return _mm_movemask_pd(x) != 0x0;
+}
+
+template <>
 EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) {
   return _mm_movemask_ps(x) != 0x0;
 }
 
 template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet2l& x) {
+  return _mm_movemask_pd(_mm_castsi128_pd(x)) != 0x0;
+}
+
+template <>
 EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x) {
   return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
 }
@@ -1860,17 +2106,23 @@
   return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
 }
 
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
   _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
 }
 
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
   __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
   kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
   kernel.packet[1] = tmp;
 }
 
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2l, 2>& kernel) {
+  __m128i tmp = _mm_unpackhi_epi64(kernel.packet[0], kernel.packet[1]);
+  kernel.packet[0] = _mm_unpacklo_epi64(kernel.packet[0], kernel.packet[1]);
+  kernel.packet[1] = tmp;
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
   __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
   __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
   __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
@@ -1881,11 +2133,11 @@
   kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
   kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
 }
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {
   ptranspose((PacketBlock<Packet4i, 4>&)kernel);
 }
 
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16b, 4>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16b, 4>& kernel) {
   __m128i T0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]);
   __m128i T1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]);
   __m128i T2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]);
@@ -1896,7 +2148,7 @@
   kernel.packet[3] = _mm_unpackhi_epi16(T1, T3);
 }
 
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16b, 16>& kernel) {
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16b, 16>& kernel) {
   // If we number the elements in the input thus:
   // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f}
   // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f}
@@ -1983,6 +2235,18 @@
 }
 
 template <>
+EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket,
+                                    const Packet2l& elsePacket) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]);
+  __m128i false_mask = pcmp_eq<Packet2l>(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+  return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template <>
 EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
                                     const Packet4i& elsePacket) {
   const __m128i zero = _mm_setzero_si128();
@@ -2189,11 +2453,6 @@
     HasMax    = 0,
     HasConj   = 0,
     HasSetLinear = 0,
-    HasSqrt = 0,
-    HasRsqrt = 0,
-    HasExp = 0,
-    HasLog = 0,
-    HasBlend = 0
   };
 };
 

diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
index cbc6d47..9a7732a 100644
--- a/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h

@@ -37,6 +37,13 @@
 struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
 template <>
 struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
+
+#ifndef EIGEN_VECTORIZE_AVX2
+template <>
+struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
+template <>
+struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
+#endif
 #endif
 
 template <>
@@ -80,6 +87,22 @@
 }
 
 template <>
+EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
+#if EIGEN_ARCH_x86_64
+  return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a));
+#else
+  return _mm_set_epi64x(static_cast<int64_t>(pfirst(preverse(a))), static_cast<int64_t>(pfirst(a)));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {
+  EIGEN_ALIGN16 int64_t aux[2];
+  pstore(aux, a);
+  return _mm_set_pd(static_cast<double>(aux[1]), static_cast<double>(aux[0]));
+}
+
+template <>
 EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
   return _mm_cvtepi32_ps(a);
 }
@@ -127,6 +150,15 @@
 }
 
 template <>
+EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
+  return _mm_castsi128_pd(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
+  return _mm_castpd_si128(a);
+}
+
+template <>
 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
   return _mm_castpd_si128(a);
 }
@@ -140,6 +172,7 @@
 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
   return Packet4i(a);
 }
+
 // Disable the following code since it's broken on too many platforms / compilers.
 // #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
 #if 0

diff --git a/Eigen/src/Core/arch/SVE/MathFunctions.h b/Eigen/src/Core/arch/SVE/MathFunctions.h
index b095275..8c8ed84 100644
--- a/Eigen/src/Core/arch/SVE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SVE/MathFunctions.h

@@ -39,8 +39,9 @@
 // Hyperbolic Tangent function.
 template <>
 EIGEN_STRONG_INLINE PacketXf ptanh<PacketXf>(const PacketXf& x) {
-  return internal::generic_fast_tanh_float(x);
+  return ptanh_float(x);
 }
+
 }  // end namespace internal
 }  // end namespace Eigen
 

diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index e8bd17d..9b89747 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h

@@ -61,6 +61,7 @@
     HasMul = 1,
     HasDiv = 1,
     HasLog = 1,
+    HasExp = 1,
     HasNegate = 1,
     HasAbs = 0,
     HasAbs2 = 0,
@@ -436,6 +437,11 @@
   return plog_complex(a, b);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
+  return pexp_complex(a, b);
+}
+
 EIGEN_STRONG_INLINE Packet2cf pcplxflip /*<Packet2cf>*/ (const Packet2cf& x) {
   Packet2cf res;
   res.cd[0] = pcplxflip(x.cd[0]);

diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
index 5c55350..32e0425 100644
--- a/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h

@@ -220,7 +220,7 @@
 // Hyperbolic Tangent function.
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x) {
-  return internal::generic_fast_tanh_float(x);
+  return ptanh_float(x);
 }
 
 }  // end namespace internal

diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index a3fc44c..2f9b920 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h

@@ -286,9 +286,10 @@
 template <typename Scalar>
 struct scalar_real_ref_op {
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const {
-    return numext::real_ref(*const_cast<Scalar*>(&a));
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const {
+    return numext::real_ref(a);
   }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::real_ref(a); }
 };
 template <typename Scalar>
 struct functor_traits<scalar_real_ref_op<Scalar>> {
@@ -303,8 +304,9 @@
 template <typename Scalar>
 struct scalar_imag_ref_op {
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const {
-    return numext::imag_ref(*const_cast<Scalar*>(&a));
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::imag_ref(a); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const {
+    return numext::imag_ref(a);
   }
 };
 template <typename Scalar>
@@ -1124,7 +1126,7 @@
 
 // TODO(rmlarsen): Enable the following on host when integer_packet is defined
 // for the relevant packet types.
-#ifdef EIGEN_GPU_CC
+#ifndef EIGEN_GPUCC
 
 /** \internal
  * \brief Template specialization of the logistic function for float.

diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 55fa5ff..e9d0cae 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h

@@ -69,7 +69,7 @@
     gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
     gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
 
-#if defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)
+#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL))
     if (info) {
       // this is the parallel version!
       int tid = info->logical_thread_id;

diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index f569907..e138535 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h

@@ -84,7 +84,7 @@
                                         const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, \
                                         EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {           \
       /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/                                    \
-                                                                                                                    \
+      if (size == 0 || depth == 0) return;                                                                          \
       BlasIndex lda = convert_index<BlasIndex>(lhsStride), ldc = convert_index<BlasIndex>(resStride),               \
                 n = convert_index<BlasIndex>(size), k = convert_index<BlasIndex>(depth);                            \
       char uplo = ((IsLower) ? 'L' : 'U'), trans = ((AStorageOrder == RowMajor) ? 'T' : 'N');                       \
@@ -107,7 +107,7 @@
                                         const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, \
                                         EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {           \
       typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType;                                          \
-                                                                                                                    \
+      if (size == 0 || depth == 0) return;                                                                          \
       BlasIndex lda = convert_index<BlasIndex>(lhsStride), ldc = convert_index<BlasIndex>(resStride),               \
                 n = convert_index<BlasIndex>(size), k = convert_index<BlasIndex>(depth);                            \
       char uplo = ((IsLower) ? 'L' : 'U'), trans = ((AStorageOrder == RowMajor) ? 'C' : 'N');                       \

diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index af64fd2..56743da 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h

@@ -59,7 +59,7 @@
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                   \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, GemmParallelInfo<Index>* /*info = 0*/) {       \
       using std::conj;                                                                                              \
-                                                                                                                    \
+      if (rows == 0 || cols == 0 || depth == 0) return;                                                             \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                           \
       eigen_assert(resIncr == 1);                                                                                   \
       char transa, transb;                                                                                          \

diff --git a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index 556c6ac..4010a0a 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h

@@ -95,6 +95,7 @@
                                                                                                                     \
     static void run(Index rows, Index cols, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* rhs, Index rhsIncr, \
                     EIGTYPE* res, Index resIncr, EIGTYPE alpha) {                                                   \
+      if (rows == 0 || cols == 0) return;                                                                           \
       BlasIndex m = convert_index<BlasIndex>(rows), n = convert_index<BlasIndex>(cols),                             \
                 lda = convert_index<BlasIndex>(lhsStride), incx = convert_index<BlasIndex>(rhsIncr),                \
                 incy = convert_index<BlasIndex>(resIncr);                                                           \
@@ -111,8 +112,9 @@
         x_tmp = map_x.conjugate();                                                                                  \
         x_ptr = x_tmp.data();                                                                                       \
         incx = 1;                                                                                                   \
-      } else                                                                                                        \
+      } else {                                                                                                      \
         x_ptr = rhs;                                                                                                \
+      }                                                                                                             \
       BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda,               \
                (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy);     \
     }                                                                                                               \

diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index 25daba6..c0dbfd1 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h

@@ -49,6 +49,7 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,           \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                           \
+      if (rows == 0 || cols == 0) return;                                                                        \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                        \
       eigen_assert(resIncr == 1);                                                                                \
       char side = 'L', uplo = 'L';                                                                               \
@@ -91,6 +92,7 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,             \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                  \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                             \
+      if (rows == 0 || cols == 0) return;                                                                          \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                          \
       eigen_assert(resIncr == 1);                                                                                  \
       char side = 'L', uplo = 'L';                                                                                 \
@@ -164,6 +166,7 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,           \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                           \
+      if (rows == 0 || cols == 0) return;                                                                        \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                        \
       eigen_assert(resIncr == 1);                                                                                \
       char side = 'R', uplo = 'L';                                                                               \

diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index c3311da..187c911 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h

@@ -78,6 +78,7 @@
                                                                                                                    \
     static void run(Index size, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* _rhs, EIGTYPE* res,            \
                     EIGTYPE alpha) {                                                                               \
+      if (size == 0) return;                                                                                       \
       enum { IsRowMajor = StorageOrder == RowMajor ? 1 : 0, IsLower = UpLo == Lower ? 1 : 0 };                     \
       BlasIndex n = convert_index<BlasIndex>(size), lda = convert_index<BlasIndex>(lhsStride), incx = 1, incy = 1; \
       EIGTYPE beta(1);                                                                                             \

diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index 78e48ad..3d612b0 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h

@@ -90,6 +90,7 @@
     static void run(Index _rows, Index _cols, Index _depth, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs, \
                     Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha,                                     \
                     level3_blocking<EIGTYPE, EIGTYPE>& blocking) {                                                     \
+      if (_rows == 0 || _cols == 0 || _depth == 0) return;                                                             \
       Index diagSize = (std::min)(_rows, _depth);                                                                      \
       Index rows = IsLower ? _rows : diagSize;                                                                         \
       Index depth = IsLower ? diagSize : _depth;                                                                       \
@@ -211,6 +212,7 @@
     static void run(Index _rows, Index _cols, Index _depth, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs, \
                     Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha,                                     \
                     level3_blocking<EIGTYPE, EIGTYPE>& blocking) {                                                     \
+      if (_rows == 0 || _cols == 0 || _depth == 0) return;                                                             \
       Index diagSize = (std::min)(_cols, _depth);                                                                      \
       Index rows = _rows;                                                                                              \
       Index depth = IsLower ? _depth : diagSize;                                                                       \

diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 413f0ee..05a5827 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h

@@ -287,21 +287,39 @@
 
     constexpr bool DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1;
 
+    const RhsScalar* actualRhsPtr = actualRhs.data();
+
+    // Potentially create a temporary buffer to copy RHS to contiguous memory.
     gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,
                           ActualRhsTypeCleaned::MaxSizeAtCompileTime, !DirectlyUseRhs>
-        static_rhs;
-
-    ei_declare_aligned_stack_constructed_variable(
-        RhsScalar, actualRhsPtr, actualRhs.size(),
-        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
-
+        static_rhs;  // Fixed-sized array.
+    RhsScalar* buffer = nullptr;
     if (!DirectlyUseRhs) {
+      // Maybe used fixed-sized buffer, otherwise allocate.
+      if (static_rhs.data() != nullptr) {
+        buffer = static_rhs.data();
+      } else {
+        // Allocate either with alloca or malloc.
+        Eigen::internal::check_size_for_overflow<RhsScalar>(actualRhs.size());
+#ifdef EIGEN_ALLOCA
+        buffer = static_cast<RhsScalar*>((sizeof(RhsScalar) * actualRhs.size() <= EIGEN_STACK_ALLOCATION_LIMIT)
+                                             ? EIGEN_ALIGNED_ALLOCA(sizeof(RhsScalar) * actualRhs.size())
+                                             : Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
+#else
+        buffer = static_cast<RhsScalar*>(Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
+#endif
+      }
 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       Index size = actualRhs.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
 #endif
-      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject, Eigen::AlignedMax>(buffer, actualRhs.size()) = actualRhs;
+      actualRhsPtr = buffer;
     }
+    // Deallocate only if malloced.
+    Eigen::internal::aligned_stack_memory_handler<RhsScalar> buffer_stack_memory_destructor(
+        buffer, actualRhs.size(),
+        !DirectlyUseRhs && static_rhs.data() == nullptr && actualRhs.size() > EIGEN_STACK_ALLOCATION_LIMIT);
 
     internal::triangular_matrix_vector_product<Index, Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar,
                                                RhsBlasTraits::NeedToConjugate, RowMajor>::run(actualLhs.rows(),

diff --git a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 0c1d56b..1de6880 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h

@@ -87,6 +87,7 @@
     };                                                                                                               \
     static void run(Index rows_, Index cols_, const EIGTYPE* lhs_, Index lhsStride, const EIGTYPE* rhs_,             \
                     Index rhsIncr, EIGTYPE* res_, Index resIncr, EIGTYPE alpha) {                                    \
+      if (rows_ == 0 || cols_ == 0) return;                                                                          \
       if (ConjLhs || IsZeroDiag) {                                                                                   \
         triangular_matrix_vector_product<Index, Mode, EIGTYPE, ConjLhs, EIGTYPE, ConjRhs, ColMajor, BuiltIn>::run(   \
             rows_, cols_, lhs_, lhsStride, rhs_, rhsIncr, res_, resIncr, alpha);                                     \
@@ -183,6 +184,7 @@
     };                                                                                                               \
     static void run(Index rows_, Index cols_, const EIGTYPE* lhs_, Index lhsStride, const EIGTYPE* rhs_,             \
                     Index rhsIncr, EIGTYPE* res_, Index resIncr, EIGTYPE alpha) {                                    \
+      if (rows_ == 0 || cols_ == 0) return;                                                                          \
       if (IsZeroDiag) {                                                                                              \
         triangular_matrix_vector_product<Index, Mode, EIGTYPE, ConjLhs, EIGTYPE, ConjRhs, RowMajor, BuiltIn>::run(   \
             rows_, cols_, lhs_, lhsStride, rhs_, rhsIncr, res_, resIncr, alpha);                                     \

diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index ce8fcb9..9cc15fb 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h

@@ -52,6 +52,7 @@
     };                                                                                                              \
     static void run(Index size, Index otherSize, const EIGTYPE* _tri, Index triStride, EIGTYPE* _other,             \
                     Index otherIncr, Index otherStride, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {          \
+      if (size == 0 || otherSize == 0) return;                                                                      \
       EIGEN_ONLY_USED_FOR_DEBUG(otherIncr);                                                                         \
       eigen_assert(otherIncr == 1);                                                                                 \
       BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb;              \
@@ -110,6 +111,7 @@
     };                                                                                                              \
     static void run(Index size, Index otherSize, const EIGTYPE* _tri, Index triStride, EIGTYPE* _other,             \
                     Index otherIncr, Index otherStride, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {          \
+      if (size == 0 || otherSize == 0) return;                                                                      \
       EIGEN_ONLY_USED_FOR_DEBUG(otherIncr);                                                                         \
       eigen_assert(otherIncr == 1);                                                                                 \
       BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb;              \

diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index e692438..1c72173 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h

@@ -266,6 +266,9 @@
 #ifdef __AVX512BF16__
 #define EIGEN_VECTORIZE_AVX512BF16
 #endif
+#ifdef __AVX512VL__
+#define EIGEN_VECTORIZE_AVX512VL
+#endif
 #ifdef __AVX512FP16__
 #ifdef __AVX512VL__
 #define EIGEN_VECTORIZE_AVX512FP16

diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 8b06c67..9f4a2d8 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h

@@ -29,9 +29,9 @@
  */
 const int DynamicIndex = 0xffffff;
 
-/** This value means that the increment to go from one value to another in a sequence is not constant for each step.
+/** This value means that the requested value is not defined.
  */
-const int UndefinedIncr = 0xfffffe;
+const int Undefined = 0xfffffe;
 
 /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
  * The value Infinity there means the L-infinity norm.

diff --git a/Eigen/src/Core/util/EmulateArray.h b/Eigen/src/Core/util/EmulateArray.h
index 2b11552..f2fd10b 100644
--- a/Eigen/src/Core/util/EmulateArray.h
+++ b/Eigen/src/Core/util/EmulateArray.h

@@ -27,16 +27,14 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE iterator end() { return values + n; }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_iterator end() const { return values + n; }
 
-#if !defined(EIGEN_GPUCC)
   typedef std::reverse_iterator<iterator> reverse_iterator;
   typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); }
+  EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
-#endif
+  EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
+  EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& operator[](size_t index) {
     eigen_internal_assert(index < size());
@@ -204,19 +202,19 @@
 
 template <class T, std::size_t N>
 struct array_size<array<T, N> > {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 template <class T, std::size_t N>
 struct array_size<array<T, N>&> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 template <class T, std::size_t N>
 struct array_size<const array<T, N> > {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 template <class T, std::size_t N>
 struct array_size<const array<T, N>&> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 
 }  // end namespace internal

diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index c312939..2f2ba9b 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h

@@ -91,6 +91,8 @@
 class IndexedView;
 template <typename XprType, int Rows = Dynamic, int Cols = Dynamic, int Order = 0>
 class Reshaped;
+template <typename FirstType, typename SizeType, typename IncrType>
+class ArithmeticSequence;
 
 template <typename MatrixType, int Size = Dynamic>
 class VectorBlock;

diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h
index 3b45108..c187002 100644
--- a/Eigen/src/Core/util/IndexedViewHelper.h
+++ b/Eigen/src/Core/util/IndexedViewHelper.h

@@ -17,6 +17,9 @@
 
 namespace internal {
 struct symbolic_last_tag {};
+
+struct all_t {};
+
 }  // namespace internal
 
 namespace placeholders {
@@ -42,126 +45,7 @@
  *
  * \sa end
  */
-static const last_t last;
-
-}  // namespace placeholders
-
-namespace internal {
-
-// Replace symbolic last/end "keywords" by their true runtime value
-inline Index eval_expr_given_size(Index x, Index /* size */) { return x; }
-
-template <int N>
-FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) {
-  return x;
-}
-
-template <typename Derived>
-Index eval_expr_given_size(const symbolic::BaseExpr<Derived>& x, Index size) {
-  return x.derived().eval(Eigen::placeholders::last = size - 1);
-}
-
-// Extract increment/step at compile time
-template <typename T, typename EnableIf = void>
-struct get_compile_time_incr {
-  enum { value = UndefinedIncr };
-};
-
-// Analogue of std::get<0>(x), but tailored for our needs.
-template <typename T>
-EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT {
-  return x.first();
-}
-
-// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by
-// MatrixSlice The generic implementation is a no-op
-template <typename T, int XprSize, typename EnableIf = void>
-struct IndexedViewCompatibleType {
-  typedef T type;
-};
-
-template <typename T, typename Q>
-const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) {
-  return x;
-}
-
-//--------------------------------------------------------------------------------
-// Handling of a single Index
-//--------------------------------------------------------------------------------
-
-struct SingleRange {
-  enum { SizeAtCompileTime = 1 };
-  SingleRange(Index val) : m_value(val) {}
-  Index operator[](Index) const { return m_value; }
-  static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; }
-  Index first() const EIGEN_NOEXCEPT { return m_value; }
-  Index m_value;
-};
-
-template <>
-struct get_compile_time_incr<SingleRange> {
-  enum { value = 1 };  // 1 or 0 ??
-};
-
-// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int)
-// methods)
-template <typename T, int XprSize>
-struct IndexedViewCompatibleType<T, XprSize, std::enable_if_t<internal::is_integral<T>::value>> {
-  // Here we could simply use Array, but maybe it's less work for the compiler to use
-  // a simpler wrapper as SingleRange
-  // typedef Eigen::Array<Index,1,1> type;
-  typedef SingleRange type;
-};
-
-template <typename T, int XprSize>
-struct IndexedViewCompatibleType<T, XprSize, std::enable_if_t<symbolic::is_symbolic<T>::value>> {
-  typedef SingleRange type;
-};
-
-template <typename T>
-std::enable_if_t<symbolic::is_symbolic<T>::value, SingleRange> makeIndexedViewCompatible(const T& id, Index size,
-                                                                                         SpecializedType) {
-  return eval_expr_given_size(id, size);
-}
-
-//--------------------------------------------------------------------------------
-// Handling of all
-//--------------------------------------------------------------------------------
-
-struct all_t {
-  all_t() {}
-};
-
-// Convert a symbolic 'all' into a usable range type
-template <int XprSize>
-struct AllRange {
-  enum { SizeAtCompileTime = XprSize };
-  AllRange(Index size = XprSize) : m_size(size) {}
-  EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; }
-  EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); }
-  EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; }
-  variable_if_dynamic<Index, XprSize> m_size;
-};
-
-template <int XprSize>
-struct IndexedViewCompatibleType<all_t, XprSize> {
-  typedef AllRange<XprSize> type;
-};
-
-template <typename XprSizeType>
-inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t, XprSizeType size,
-                                                                               SpecializedType) {
-  return AllRange<get_fixed_value<XprSizeType>::value>(size);
-}
-
-template <int Size>
-struct get_compile_time_incr<AllRange<Size>> {
-  enum { value = 1 };
-};
-
-}  // end namespace internal
-
-namespace placeholders {
+static constexpr const last_t last;
 
 typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,
                           symbolic::ValueExpr<Eigen::internal::FixedInt<1>>>
@@ -181,28 +65,251 @@
  * \sa last
  */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
-static const auto lastp1 = last + fix<1>;
+static constexpr auto lastp1 = last + fix<1>;
 #else
 // Using a FixedExpr<1> expression is important here to make sure the compiler
 // can fully optimize the computation starting indices with zero overhead.
-static const lastp1_t lastp1(last + fix<1>());
+static constexpr lastp1_t lastp1(last + fix<1>());
 #endif
 
 /** \var end
  * \ingroup Core_Module
  * \sa lastp1
  */
-static const lastp1_t end = lastp1;
+static constexpr lastp1_t end = lastp1;
 
 /** \var all
  * \ingroup Core_Module
  * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or
  * columns
  */
-static const Eigen::internal::all_t all;
+static constexpr Eigen::internal::all_t all;
 
 }  // namespace placeholders
 
+namespace internal {
+
+// Evaluate a symbolic expression or constant given the "size" of an object, allowing
+// any symbols like `last` to be evaluated.  The default here assumes a dynamic constant.
+template <typename Expr, int SizeAtCompileTime, typename EnableIf = void>
+struct SymbolicExpressionEvaluator {
+  static constexpr Index ValueAtCompileTime = Undefined;
+  static Index eval(const Expr& expr, Index /*size*/) { return static_cast<Index>(expr); }
+};
+
+// Symbolic expression with size known at compile-time.
+template <typename Expr, int SizeAtCompileTime>
+struct SymbolicExpressionEvaluator<Expr, SizeAtCompileTime, std::enable_if_t<symbolic::is_symbolic<Expr>::value>> {
+  static constexpr Index ValueAtCompileTime =
+      Expr::Derived::eval_at_compile_time(Eigen::placeholders::last = fix<SizeAtCompileTime - 1>);
+  static Index eval(const Expr& expr, Index /*size*/) {
+    return expr.eval(Eigen::placeholders::last = fix<SizeAtCompileTime - 1>);
+  }
+};
+
+// Symbolic expression with dynamic size.
+template <typename Expr>
+struct SymbolicExpressionEvaluator<Expr, Dynamic, std::enable_if_t<symbolic::is_symbolic<Expr>::value>> {
+  static constexpr Index ValueAtCompileTime = Undefined;
+  static Index eval(const Expr& expr, Index size) { return expr.eval(Eigen::placeholders::last = size - 1); }
+};
+
+// Fixed int.
+template <int N, int SizeAtCompileTime>
+struct SymbolicExpressionEvaluator<FixedInt<N>, SizeAtCompileTime, void> {
+  static constexpr Index ValueAtCompileTime = static_cast<Index>(N);
+  static Index eval(const FixedInt<N>& /*expr*/, Index /*size*/) { return ValueAtCompileTime; }
+};
+
+//--------------------------------------------------------------------------------
+// Handling of generic indices (e.g. array)
+//--------------------------------------------------------------------------------
+
+// Potentially wrap indices in a type that is better-suited for IndexedView evaluation.
+template <typename Indices, int NestedSizeAtCompileTime, typename EnableIf = void>
+struct IndexedViewHelperIndicesWrapper {
+  using type = Indices;
+  static const type& CreateIndexSequence(const Indices& indices, Index /*nested_size*/) { return indices; }
+};
+
+// Extract compile-time and runtime first, size, increments.
+template <typename Indices, typename EnableIf = void>
+struct IndexedViewHelper {
+  static constexpr Index FirstAtCompileTime = Undefined;
+  static constexpr Index SizeAtCompileTime = array_size<Indices>::value;
+  static constexpr Index IncrAtCompileTime = Undefined;
+
+  static constexpr Index first(const Indices& indices) { return static_cast<Index>(indices[0]); }
+  static constexpr Index size(const Indices& indices) { return index_list_size(indices); }
+  static constexpr Index incr(const Indices& /*indices*/) { return Undefined; }
+};
+
+//--------------------------------------------------------------------------------
+// Handling of ArithmeticSequence
+//--------------------------------------------------------------------------------
+
+template <Index FirstAtCompileTime_, Index SizeAtCompileTime_, Index IncrAtCompileTime_>
+class ArithmeticSequenceRange {
+ public:
+  static constexpr Index FirstAtCompileTime = FirstAtCompileTime_;
+  static constexpr Index SizeAtCompileTime = SizeAtCompileTime_;
+  static constexpr Index IncrAtCompileTime = IncrAtCompileTime_;
+
+  constexpr ArithmeticSequenceRange(Index first, Index size, Index incr) : first_{first}, size_{size}, incr_{incr} {}
+  constexpr Index operator[](Index i) const { return first() + i * incr(); }
+  constexpr Index first() const noexcept { return first_.value(); }
+  constexpr Index size() const noexcept { return size_.value(); }
+  constexpr Index incr() const noexcept { return incr_.value(); }
+
+ private:
+  variable_if_dynamicindex<Index, int(FirstAtCompileTime)> first_;
+  variable_if_dynamic<Index, int(SizeAtCompileTime)> size_;
+  variable_if_dynamicindex<Index, int(IncrAtCompileTime)> incr_;
+};
+
+template <typename FirstType, typename SizeType, typename IncrType, int NestedSizeAtCompileTime>
+struct IndexedViewHelperIndicesWrapper<ArithmeticSequence<FirstType, SizeType, IncrType>, NestedSizeAtCompileTime,
+                                       void> {
+  static constexpr Index EvalFirstAtCompileTime =
+      SymbolicExpressionEvaluator<FirstType, NestedSizeAtCompileTime>::ValueAtCompileTime;
+  static constexpr Index EvalSizeAtCompileTime =
+      SymbolicExpressionEvaluator<SizeType, NestedSizeAtCompileTime>::ValueAtCompileTime;
+  static constexpr Index EvalIncrAtCompileTime =
+      SymbolicExpressionEvaluator<IncrType, NestedSizeAtCompileTime>::ValueAtCompileTime;
+
+  static constexpr Index FirstAtCompileTime =
+      (int(EvalFirstAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalFirstAtCompileTime;
+  static constexpr Index SizeAtCompileTime =
+      (int(EvalSizeAtCompileTime) == Undefined) ? Index(Dynamic) : EvalSizeAtCompileTime;
+  static constexpr Index IncrAtCompileTime =
+      (int(EvalIncrAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalIncrAtCompileTime;
+
+  using Indices = ArithmeticSequence<FirstType, SizeType, IncrType>;
+  using type = ArithmeticSequenceRange<FirstAtCompileTime, SizeAtCompileTime, IncrAtCompileTime>;
+
+  static type CreateIndexSequence(const Indices& indices, Index nested_size) {
+    Index first =
+        SymbolicExpressionEvaluator<FirstType, NestedSizeAtCompileTime>::eval(indices.firstObject(), nested_size);
+    Index size =
+        SymbolicExpressionEvaluator<SizeType, NestedSizeAtCompileTime>::eval(indices.sizeObject(), nested_size);
+    Index incr =
+        SymbolicExpressionEvaluator<IncrType, NestedSizeAtCompileTime>::eval(indices.incrObject(), nested_size);
+    return type(first, size, incr);
+  }
+};
+
+template <Index FirstAtCompileTime_, Index SizeAtCompileTime_, Index IncrAtCompileTime_>
+struct IndexedViewHelper<ArithmeticSequenceRange<FirstAtCompileTime_, SizeAtCompileTime_, IncrAtCompileTime_>, void> {
+ public:
+  using Indices = ArithmeticSequenceRange<FirstAtCompileTime_, SizeAtCompileTime_, IncrAtCompileTime_>;
+  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
+  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
+  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
+  static Index first(const Indices& indices) { return indices.first(); }
+  static Index size(const Indices& indices) { return indices.size(); }
+  static Index incr(const Indices& indices) { return indices.incr(); }
+};
+
+//--------------------------------------------------------------------------------
+// Handling of a single index.
+//--------------------------------------------------------------------------------
+
+template <Index ValueAtCompileTime>
+class SingleRange {
+ public:
+  static constexpr Index FirstAtCompileTime = ValueAtCompileTime;
+  static constexpr Index SizeAtCompileTime = Index(1);
+  static constexpr Index IncrAtCompileTime = Index(1);  // Needs to be 1 to be treated as block-like.
+
+  constexpr SingleRange(Index v) noexcept : value_(v) {}
+  constexpr Index operator[](Index) const noexcept { return first(); }
+  constexpr Index first() const noexcept { return value_.value(); }
+  constexpr Index size() const noexcept { return SizeAtCompileTime; }
+  constexpr Index incr() const noexcept { return IncrAtCompileTime; }
+
+ private:
+  variable_if_dynamicindex<Index, int(ValueAtCompileTime)> value_;
+};
+
+template <typename T>
+struct is_single_range : public std::false_type {};
+
+template <Index ValueAtCompileTime>
+struct is_single_range<SingleRange<ValueAtCompileTime>> : public std::true_type {};
+
+template <typename SingleIndex, int NestedSizeAtCompileTime>
+struct IndexedViewHelperIndicesWrapper<
+    SingleIndex, NestedSizeAtCompileTime,
+    std::enable_if_t<std::is_integral<SingleIndex>::value || symbolic::is_symbolic<SingleIndex>::value>> {
+  static constexpr Index EvalValueAtCompileTime =
+      SymbolicExpressionEvaluator<SingleIndex, NestedSizeAtCompileTime>::ValueAtCompileTime;
+  static constexpr Index ValueAtCompileTime =
+      (int(EvalValueAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalValueAtCompileTime;
+  using type = SingleRange<ValueAtCompileTime>;
+  static type CreateIndexSequence(const SingleIndex& index, Index nested_size) {
+    return type(SymbolicExpressionEvaluator<SingleIndex, NestedSizeAtCompileTime>::eval(index, nested_size));
+  }
+};
+
+template <int N, int NestedSizeAtCompileTime>
+struct IndexedViewHelperIndicesWrapper<FixedInt<N>, NestedSizeAtCompileTime, void> {
+  using type = SingleRange<Index(N)>;
+  static type CreateIndexSequence(const FixedInt<N>& /*index*/) { return type(Index(N)); }
+};
+
+template <Index ValueAtCompileTime>
+struct IndexedViewHelper<SingleRange<ValueAtCompileTime>, void> {
+  using Indices = SingleRange<ValueAtCompileTime>;
+  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
+  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
+  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
+
+  static constexpr Index first(const Indices& indices) { return indices.first(); }
+  static constexpr Index size(const Indices& /*indices*/) { return SizeAtCompileTime; }
+  static constexpr Index incr(const Indices& /*indices*/) { return IncrAtCompileTime; }
+};
+
+//--------------------------------------------------------------------------------
+// Handling of all
+//--------------------------------------------------------------------------------
+
+// Convert a symbolic 'all' into a usable range type
+template <Index SizeAtCompileTime_>
+class AllRange {
+ public:
+  static constexpr Index FirstAtCompileTime = Index(0);
+  static constexpr Index SizeAtCompileTime = SizeAtCompileTime_;
+  static constexpr Index IncrAtCompileTime = Index(1);
+  constexpr AllRange(Index size) : size_(size) {}
+  constexpr Index operator[](Index i) const noexcept { return i; }
+  constexpr Index first() const noexcept { return FirstAtCompileTime; }
+  constexpr Index size() const noexcept { return size_.value(); }
+  constexpr Index incr() const noexcept { return IncrAtCompileTime; }
+
+ private:
+  variable_if_dynamic<Index, int(SizeAtCompileTime)> size_;
+};
+
+template <int NestedSizeAtCompileTime>
+struct IndexedViewHelperIndicesWrapper<all_t, NestedSizeAtCompileTime, void> {
+  using type = AllRange<Index(NestedSizeAtCompileTime)>;
+  static type CreateIndexSequence(const all_t& /*indices*/, Index nested_size) { return type(nested_size); }
+};
+
+template <Index SizeAtCompileTime_>
+struct IndexedViewHelper<AllRange<SizeAtCompileTime_>, void> {
+  using Indices = AllRange<SizeAtCompileTime_>;
+  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
+  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
+  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
+
+  static Index first(const Indices& indices) { return indices.first(); }
+  static Index size(const Indices& indices) { return indices.size(); }
+  static Index incr(const Indices& indices) { return indices.incr(); }
+};
+
+}  // end namespace internal
+
 }  // end namespace Eigen
 
 #endif  // EIGEN_INDEXED_VIEW_HELPER_H

diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h
index 279d553..2eb5fd9 100644
--- a/Eigen/src/Core/util/IntegralConstant.h
+++ b/Eigen/src/Core/util/IntegralConstant.h

@@ -54,65 +54,60 @@
 template <int N>
 class FixedInt {
  public:
-  static const int value = N;
-  EIGEN_CONSTEXPR operator int() const { return value; }
+  static constexpr int value = N;
+  constexpr operator int() const { return N; }
 
-  EIGEN_CONSTEXPR
-  FixedInt() = default;
+  constexpr FixedInt() = default;
+  constexpr FixedInt(std::integral_constant<int, N>) {}
 
-  EIGEN_CONSTEXPR
-  FixedInt(std::integral_constant<int, N>) {}
-
-  EIGEN_CONSTEXPR
-  FixedInt(VariableAndFixedInt<N> other) {
+  constexpr FixedInt(VariableAndFixedInt<N> other) {
 #ifndef EIGEN_INTERNAL_DEBUGGING
     EIGEN_UNUSED_VARIABLE(other);
 #endif
     eigen_internal_assert(int(other) == N);
   }
 
-  EIGEN_CONSTEXPR
-  FixedInt<-N> operator-() const { return FixedInt<-N>(); }
+  constexpr FixedInt<-N> operator-() const { return FixedInt<-N>(); }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N + M> operator+(FixedInt<M>) const {
+  constexpr FixedInt<N + M> operator+(FixedInt<M>) const {
     return FixedInt<N + M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N - M> operator-(FixedInt<M>) const {
+  constexpr FixedInt<N - M> operator-(FixedInt<M>) const {
     return FixedInt<N - M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N * M> operator*(FixedInt<M>) const {
+  constexpr FixedInt<N * M> operator*(FixedInt<M>) const {
     return FixedInt<N * M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N / M> operator/(FixedInt<M>) const {
+  constexpr FixedInt<N / M> operator/(FixedInt<M>) const {
     return FixedInt<N / M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N % M> operator%(FixedInt<M>) const {
+  constexpr FixedInt<N % M> operator%(FixedInt<M>) const {
     return FixedInt<N % M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N | M> operator|(FixedInt<M>) const {
+  constexpr FixedInt<N | M> operator|(FixedInt<M>) const {
     return FixedInt<N | M>();
   }
 
   template <int M>
-  EIGEN_CONSTEXPR FixedInt<N & M> operator&(FixedInt<M>) const {
+  constexpr FixedInt<N & M> operator&(FixedInt<M>) const {
     return FixedInt<N & M>();
   }
 
   // Needed in C++14 to allow fix<N>():
-  EIGEN_CONSTEXPR FixedInt operator()() const { return *this; }
+  constexpr FixedInt operator()() const { return *this; }
 
-  VariableAndFixedInt<N> operator()(int val) const { return VariableAndFixedInt<N>(val); }
+  constexpr VariableAndFixedInt<N> operator()(int val) const { return VariableAndFixedInt<N>(val); }
 };
 
 /** \internal

diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 030d99f..0236b51 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h

@@ -710,7 +710,7 @@
     (EIGEN_COMP_ICC && EIGEN_COMP_ICC < 1500) || (EIGEN_COMP_NVCC && EIGEN_COMP_NVCC < 80000) ||       \
     (EIGEN_COMP_CLANG_STRICT && EIGEN_COMP_CLANG < 390) ||                                             \
     (EIGEN_COMP_CLANGAPPLE && EIGEN_COMP_CLANGAPPLE < 9000000) || (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 510)
-#error This compiler appears to be too old to be supported by Eigen
+#error Eigen requires at least c++14 support.
 #endif
 
 // Does the compiler support C99?

diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 859d2f1..d2336ce 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h

@@ -303,30 +303,30 @@
  */
 template <typename T, typename EnableIf = void>
 struct array_size {
-  enum { value = Dynamic };
+  static constexpr Index value = Dynamic;
 };
 
 template <typename T>
 struct array_size<T, std::enable_if_t<((T::SizeAtCompileTime & 0) == 0)>> {
-  enum { value = T::SizeAtCompileTime };
+  static constexpr Index value = T::SizeAtCompileTime;
 };
 
 template <typename T, int N>
 struct array_size<const T (&)[N]> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 template <typename T, int N>
 struct array_size<T (&)[N]> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 
 template <typename T, std::size_t N>
 struct array_size<const std::array<T, N>> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 template <typename T, std::size_t N>
 struct array_size<std::array<T, N>> {
-  enum { value = N };
+  static constexpr Index value = N;
 };
 
 /** \internal

diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h
index 136942c..befb485 100644
--- a/Eigen/src/Core/util/SymbolicIndex.h
+++ b/Eigen/src/Core/util/SymbolicIndex.h

@@ -44,6 +44,8 @@
 
 template <typename Tag>
 class Symbol;
+template <typename Tag, typename Type>
+class SymbolValue;
 template <typename Arg0>
 class NegateExpr;
 template <typename Arg1, typename Arg2>
@@ -52,136 +54,123 @@
 class ProductExpr;
 template <typename Arg1, typename Arg2>
 class QuotientExpr;
-
-// A simple wrapper around an integral value to provide the eval method.
-// We could also use a free-function symbolic_eval...
 template <typename IndexType = Index>
-class ValueExpr {
- public:
-  ValueExpr(IndexType val) : m_value(val) {}
-  template <typename T>
-  IndexType eval_impl(const T&) const {
-    return m_value;
-  }
-
- protected:
-  IndexType m_value;
-};
-
-// Specialization for compile-time value,
-// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
-template <int N>
-class ValueExpr<internal::FixedInt<N> > {
- public:
-  ValueExpr() {}
-  template <typename T>
-  EIGEN_CONSTEXPR Index eval_impl(const T&) const {
-    return N;
-  }
-};
+class ValueExpr;
 
 /** \class BaseExpr
  * \ingroup Core_Module
  * Common base class of any symbolic expressions
  */
-template <typename Derived>
+template <typename Derived_>
 class BaseExpr {
  public:
-  const Derived& derived() const { return *static_cast<const Derived*>(this); }
+  using Derived = Derived_;
+  constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
   /** Evaluate the expression given the \a values of the symbols.
    *
-   * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue
-   *               as constructed by SymbolExpr::operator= operator.
+   * \param values defines the values of the symbols, as constructed by SymbolExpr::operator= operator.
    *
    */
-  template <typename T>
-  Index eval(const T& values) const {
-    return derived().eval_impl(values);
+  template <typename... Tags, typename... Types>
+  constexpr Index eval(const SymbolValue<Tags, Types>&... values) const {
+    return derived().eval_impl(values...);
   }
 
-  template <typename... Types>
-  Index eval(Types&&... values) const {
-    return derived().eval_impl(std::make_tuple(values...));
+  /** Evaluate the expression at compile time given the \a values of the symbols.
+   *
+   * If a value is not known at compile-time, returns Eigen::Undefined.
+   *
+   */
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time(const SymbolValue<Tags, Types>&...) {
+    return Derived::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
   }
 
-  NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
+  constexpr NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
 
-  AddExpr<Derived, ValueExpr<> > operator+(Index b) const { return AddExpr<Derived, ValueExpr<> >(derived(), b); }
-  AddExpr<Derived, ValueExpr<> > operator-(Index a) const { return AddExpr<Derived, ValueExpr<> >(derived(), -a); }
-  ProductExpr<Derived, ValueExpr<> > operator*(Index a) const {
+  constexpr AddExpr<Derived, ValueExpr<>> operator+(Index b) const {
+    return AddExpr<Derived, ValueExpr<>>(derived(), b);
+  }
+  constexpr AddExpr<Derived, ValueExpr<>> operator-(Index a) const {
+    return AddExpr<Derived, ValueExpr<>>(derived(), -a);
+  }
+  constexpr ProductExpr<Derived, ValueExpr<>> operator*(Index a) const {
     return ProductExpr<Derived, ValueExpr<> >(derived(), a);
   }
-  QuotientExpr<Derived, ValueExpr<> > operator/(Index a) const {
+  constexpr QuotientExpr<Derived, ValueExpr<>> operator/(Index a) const {
     return QuotientExpr<Derived, ValueExpr<> >(derived(), a);
   }
 
-  friend AddExpr<Derived, ValueExpr<> > operator+(Index a, const BaseExpr& b) {
+  friend constexpr AddExpr<Derived, ValueExpr<>> operator+(Index a, const BaseExpr& b) {
     return AddExpr<Derived, ValueExpr<> >(b.derived(), a);
   }
-  friend AddExpr<NegateExpr<Derived>, ValueExpr<> > operator-(Index a, const BaseExpr& b) {
+  friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<>> operator-(Index a, const BaseExpr& b) {
     return AddExpr<NegateExpr<Derived>, ValueExpr<> >(-b.derived(), a);
   }
-  friend ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
+  friend constexpr ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
     return ProductExpr<ValueExpr<>, Derived>(a, b.derived());
   }
-  friend QuotientExpr<ValueExpr<>, Derived> operator/(Index a, const BaseExpr& b) {
+  friend constexpr QuotientExpr<ValueExpr<>, Derived> operator/(Index a, const BaseExpr& b) {
     return QuotientExpr<ValueExpr<>, Derived>(a, b.derived());
   }
 
   template <int N>
-  AddExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const {
+  constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>) const {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const {
+  constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>> operator-(internal::FixedInt<N>) const {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >());
   }
   template <int N>
-  ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const {
+  constexpr ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator*(internal::FixedInt<N>) const {
     return ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const {
+  constexpr QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator/(internal::FixedInt<N>) const {
     return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
 
   template <int N>
-  friend AddExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) {
+  friend constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>,
+                                                                                const BaseExpr& b) {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  friend AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>,
-                                                                                    const BaseExpr& b) {
+  friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>> operator-(internal::FixedInt<N>,
+                                                                                            const BaseExpr& b) {
     return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > >(-b.derived(),
                                                                             ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  friend ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) {
+  friend constexpr ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator*(internal::FixedInt<N>,
+                                                                                    const BaseExpr& b) {
     return ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
   }
   template <int N>
-  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) {
+  friend constexpr QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator/(internal::FixedInt<N>,
+                                                                                     const BaseExpr& b) {
     return QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
   }
 
   template <typename OtherDerived>
-  AddExpr<Derived, OtherDerived> operator+(const BaseExpr<OtherDerived>& b) const {
+  constexpr AddExpr<Derived, OtherDerived> operator+(const BaseExpr<OtherDerived>& b) const {
     return AddExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 
   template <typename OtherDerived>
-  AddExpr<Derived, NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived>& b) const {
+  constexpr AddExpr<Derived, NegateExpr<OtherDerived>> operator-(const BaseExpr<OtherDerived>& b) const {
     return AddExpr<Derived, NegateExpr<OtherDerived> >(derived(), -b.derived());
   }
 
   template <typename OtherDerived>
-  ProductExpr<Derived, OtherDerived> operator*(const BaseExpr<OtherDerived>& b) const {
+  constexpr ProductExpr<Derived, OtherDerived> operator*(const BaseExpr<OtherDerived>& b) const {
     return ProductExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 
   template <typename OtherDerived>
-  QuotientExpr<Derived, OtherDerived> operator/(const BaseExpr<OtherDerived>& b) const {
+  constexpr QuotientExpr<Derived, OtherDerived> operator/(const BaseExpr<OtherDerived>& b) const {
     return QuotientExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 };
@@ -193,21 +182,137 @@
   enum { value = internal::is_convertible<T, BaseExpr<T> >::value };
 };
 
+// A simple wrapper around an integral value to provide the eval method.
+// We could also use a free-function symbolic_eval...
+template <typename IndexType>
+class ValueExpr : BaseExpr<ValueExpr<IndexType>> {
+ public:
+  constexpr ValueExpr() = default;
+  constexpr ValueExpr(IndexType val) : value_(val) {}
+  template <typename... Tags, typename... Types>
+  constexpr IndexType eval_impl(const SymbolValue<Tags, Types>&...) const {
+    return value_;
+  }
+  template <typename... Tags, typename... Types>
+  static constexpr IndexType eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    return IndexType(Undefined);
+  }
+
+ protected:
+  IndexType value_;
+};
+
+// Specialization for compile-time value,
+// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
+template <int N>
+class ValueExpr<internal::FixedInt<N>> : public BaseExpr<ValueExpr<internal::FixedInt<N>>> {
+ public:
+  constexpr ValueExpr() = default;
+  constexpr ValueExpr(internal::FixedInt<N>) {}
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
+    return Index(N);
+  }
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    return Index(N);
+  }
+};
+
 /** Represents the actual value of a symbol identified by its tag
  *
  * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used.
  */
+template <typename Tag, typename Type>
+class SymbolValue : public BaseExpr<SymbolValue<Tag, Type>> {};
+
 template <typename Tag>
-class SymbolValue {
+class SymbolValue<Tag, Index> : public BaseExpr<SymbolValue<Tag, Index>> {
  public:
+  constexpr SymbolValue() = default;
+
   /** Default constructor from the value \a val */
-  SymbolValue(Index val) : m_value(val) {}
+  constexpr SymbolValue(Index val) : value_(val) {}
 
   /** \returns the stored value of the symbol */
-  Index value() const { return m_value; }
+  constexpr Index value() const { return value_; }
+
+  /** \returns the stored value of the symbol at compile time, or Undefined if not known. */
+  static constexpr Index value_at_compile_time() { return Index(Undefined); }
+
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
+    return value();
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    return value_at_compile_time();
+  }
 
  protected:
-  Index m_value;
+  Index value_;
+};
+
+template <typename Tag, int N>
+class SymbolValue<Tag, internal::FixedInt<N>> : public BaseExpr<SymbolValue<Tag, internal::FixedInt<N>>> {
+ public:
+  constexpr SymbolValue() = default;
+
+  /** Default constructor from the value \a val */
+  constexpr SymbolValue(internal::FixedInt<N>){};
+
+  /** \returns the stored value of the symbol */
+  constexpr Index value() const { return static_cast<Index>(N); }
+
+  /** \returns the stored value of the symbol at compile time, or Undefined if not known. */
+  static constexpr Index value_at_compile_time() { return static_cast<Index>(N); }
+
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
+    return value();
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    return value_at_compile_time();
+  }
+};
+
+// Find and return a symbol value based on the tag.
+template <typename Tag, typename... Types>
+struct EvalSymbolValueHelper;
+
+// Empty base case, symbol not found.
+template <typename Tag>
+struct EvalSymbolValueHelper<Tag> {
+  static constexpr Index eval_impl() {
+    eigen_assert(false && "Symbol not found.");
+    return Index(Undefined);
+  }
+  static constexpr Index eval_at_compile_time_impl() { return Index(Undefined); }
+};
+
+// We found a symbol value matching the provided Tag!
+template <typename Tag, typename Type, typename... OtherTypes>
+struct EvalSymbolValueHelper<Tag, SymbolValue<Tag, Type>, OtherTypes...> {
+  static constexpr Index eval_impl(const SymbolValue<Tag, Type>& symbol, const OtherTypes&...) {
+    return symbol.value();
+  }
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tag, Type>& symbol, const OtherTypes&...) {
+    return symbol.value_at_compile_time();
+  }
+};
+
+// No symbol value in first value, recursive search starting with next.
+template <typename Tag, typename T1, typename... OtherTypes>
+struct EvalSymbolValueHelper<Tag, T1, OtherTypes...> {
+  static constexpr Index eval_impl(const T1&, const OtherTypes&... values) {
+    return EvalSymbolValueHelper<Tag, OtherTypes...>::eval_impl(values...);
+  }
+  static constexpr Index eval_at_compile_time_impl(const T1&, const OtherTypes&...) {
+    return EvalSymbolValueHelper<Tag, OtherTypes...>::eval_at_compile_time_impl(OtherTypes{}...);
+  }
 };
 
 /** Expression of a symbol uniquely identified by the template parameter type \c tag */
@@ -217,32 +322,47 @@
   /** Alias to the template parameter \c tag */
   typedef tag Tag;
 
-  SymbolExpr() {}
+  constexpr SymbolExpr() = default;
 
   /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag.
    *
    * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified
    * runtime-time value.
    */
-  SymbolValue<Tag> operator=(Index val) const { return SymbolValue<Tag>(val); }
+  constexpr SymbolValue<Tag, Index> operator=(Index val) const { return SymbolValue<Tag, Index>(val); }
 
-  Index eval_impl(const SymbolValue<Tag>& values) const { return values.value(); }
+  template <int N>
+  constexpr SymbolValue<Tag, internal::FixedInt<N>> operator=(internal::FixedInt<N>) const {
+    return SymbolValue<Tag, internal::FixedInt<N>>{internal::FixedInt<N>{}};
+  }
 
-  // C++14 versions suitable for multiple symbols
-  template <typename... Types>
-  Index eval_impl(const std::tuple<Types...>& values) const {
-    return std::get<SymbolValue<Tag> >(values).value();
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
+    return EvalSymbolValueHelper<Tag, SymbolValue<Tags, Types>...>::eval_impl(values...);
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    return EvalSymbolValueHelper<Tag, SymbolValue<Tags, Types>...>::eval_at_compile_time_impl(
+        SymbolValue<Tags, Types>{}...);
   }
 };
 
 template <typename Arg0>
 class NegateExpr : public BaseExpr<NegateExpr<Arg0> > {
  public:
-  NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
+  constexpr NegateExpr() = default;
+  constexpr NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
 
-  template <typename T>
-  Index eval_impl(const T& values) const {
-    return -m_arg0.eval_impl(values);
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
+    return -m_arg0.eval_impl(values...);
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    constexpr Index v = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    return (v == Undefined) ? Undefined : -v;
   }
 
  protected:
@@ -252,11 +372,19 @@
 template <typename Arg0, typename Arg1>
 class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1> > {
  public:
-  AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  constexpr AddExpr() = default;
+  constexpr AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename T>
-  Index eval_impl(const T& values) const {
-    return m_arg0.eval_impl(values) + m_arg1.eval_impl(values);
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
+    return m_arg0.eval_impl(values...) + m_arg1.eval_impl(values...);
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 + v1;
   }
 
  protected:
@@ -267,11 +395,19 @@
 template <typename Arg0, typename Arg1>
 class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1> > {
  public:
-  ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  constexpr ProductExpr() = default;
+  constexpr ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename T>
-  Index eval_impl(const T& values) const {
-    return m_arg0.eval_impl(values) * m_arg1.eval_impl(values);
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
+    return m_arg0.eval_impl(values...) * m_arg1.eval_impl(values...);
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 * v1;
   }
 
  protected:
@@ -282,11 +418,19 @@
 template <typename Arg0, typename Arg1>
 class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1> > {
  public:
-  QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  constexpr QuotientExpr() = default;
+  constexpr QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename T>
-  Index eval_impl(const T& values) const {
-    return m_arg0.eval_impl(values) / m_arg1.eval_impl(values);
+  template <typename... Tags, typename... Types>
+  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
+    return m_arg0.eval_impl(values...) / m_arg1.eval_impl(values...);
+  }
+
+  template <typename... Tags, typename... Types>
+  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
+    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 / v1;
   }
 
  protected:

diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 5b7bdc0..555faa1 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h

@@ -484,7 +484,7 @@
                                       //      solution could be to count the number of temps?
     NAsInteger = n == Dynamic ? HugeCost : n,
     CostEval = (NAsInteger + 1) * ScalarReadCost + CoeffReadCost,
-    CostNoEval = NAsInteger * CoeffReadCost,
+    CostNoEval = int(NAsInteger) * int(CoeffReadCost),
     Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
   };
 

diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 970500c..5cef658 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h

@@ -408,28 +408,29 @@
   shiftInfo.coeffRef(1) = m_matT.coeff(iu - 1, iu - 1);
   shiftInfo.coeffRef(2) = m_matT.coeff(iu, iu - 1) * m_matT.coeff(iu - 1, iu);
 
-  // Wilkinson's original ad hoc shift
-  if (iter == 10) {
-    exshift += shiftInfo.coeff(0);
-    for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0);
-    Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2));
-    shiftInfo.coeffRef(0) = Scalar(0.75) * s;
-    shiftInfo.coeffRef(1) = Scalar(0.75) * s;
-    shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
-  }
-
-  // MATLAB's new ad hoc shift
-  if (iter == 30) {
-    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
-    s = s * s + shiftInfo.coeff(2);
-    if (s > Scalar(0)) {
-      s = sqrt(s);
-      if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s;
-      s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
-      s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
-      exshift += s;
-      for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s;
-      shiftInfo.setConstant(Scalar(0.964));
+  // Alternate exceptional shifting strategy every 16 iterations.
+  if (iter % 16 == 0) {
+    // Wilkinson's original ad hoc shift
+    if (iter % 32 != 0) {
+      exshift += shiftInfo.coeff(0);
+      for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0);
+      Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2));
+      shiftInfo.coeffRef(0) = Scalar(0.75) * s;
+      shiftInfo.coeffRef(1) = Scalar(0.75) * s;
+      shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
+    } else {
+      // MATLAB's new ad hoc shift
+      Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+      s = s * s + shiftInfo.coeff(2);
+      if (s > Scalar(0)) {
+        s = sqrt(s);
+        if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s;
+        s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+        s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
+        exshift += s;
+        for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s;
+        shiftInfo.setConstant(Scalar(0.964));
+      }
     }
   }
 }

diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index a4e76d9..e97a8f2 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h

@@ -173,7 +173,7 @@
   }
 
   /** \returns the volume of the bounding box */
-  EIGEN_DEVICE_FUNC inline Scalar volume() const { return sizes().prod(); }
+  EIGEN_DEVICE_FUNC inline Scalar volume() const { return isEmpty() ? Scalar(0) : sizes().prod(); }
 
   /** \returns an expression for the bounding box diagonal vector
    * if the length of the diagonal is needed: diagonal().norm()

diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 34399a7..a8e0502 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h

@@ -98,7 +98,7 @@
 namespace internal {
 
 template <int Arch, typename VectorLhs, typename VectorRhs, typename Scalar = typename VectorLhs::Scalar,
-          bool Vectorizable = bool((VectorLhs::Flags & VectorRhs::Flags) & PacketAccessBit)>
+          bool Vectorizable = bool((evaluator<VectorLhs>::Flags & evaluator<VectorRhs>::Flags) & PacketAccessBit)>
 struct cross3_impl {
   EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs,
                                                                                             const VectorRhs& rhs) {

diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h
index ce3cfea..e8b210e 100644
--- a/Eigen/src/Geometry/arch/Geometry_SIMD.h
+++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h

@@ -62,16 +62,19 @@
 
 template <typename VectorLhs, typename VectorRhs>
 struct cross3_impl<Architecture::Target, VectorLhs, VectorRhs, float, true> {
-  enum { ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment };
-  static inline typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) {
+  using DstPlainType = typename plain_matrix_type<VectorLhs>::type;
+  static constexpr int DstAlignment = evaluator<DstPlainType>::Alignment;
+  static constexpr int LhsAlignment = evaluator<VectorLhs>::Alignment;
+  static constexpr int RhsAlignment = evaluator<VectorRhs>::Alignment;
+  static inline DstPlainType run(const VectorLhs& lhs, const VectorRhs& rhs) {
     evaluator<VectorLhs> lhs_eval(lhs);
     evaluator<VectorRhs> rhs_eval(rhs);
-    Packet4f a = lhs_eval.template packet<traits<VectorLhs>::Alignment, Packet4f>(0);
-    Packet4f b = rhs_eval.template packet<traits<VectorRhs>::Alignment, Packet4f>(0);
+    Packet4f a = lhs_eval.template packet<LhsAlignment, Packet4f>(0);
+    Packet4f b = rhs_eval.template packet<RhsAlignment, Packet4f>(0);
     Packet4f mul1 = pmul(vec4f_swizzle1(a, 1, 2, 0, 3), vec4f_swizzle1(b, 2, 0, 1, 3));
     Packet4f mul2 = pmul(vec4f_swizzle1(a, 2, 0, 1, 3), vec4f_swizzle1(b, 1, 2, 0, 3));
-    typename plain_matrix_type<VectorLhs>::type res;
-    pstoret<float, Packet4f, ResAlignment>(&res.x(), psub(mul1, mul2));
+    DstPlainType res;
+    pstoret<float, Packet4f, DstAlignment>(&res.x(), psub(mul1, mul2));
     return res;
   }
 };

diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
index 14ae6ea..a97b905 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h

@@ -32,17 +32,19 @@
  *
  * \implsparsesolverconcept
  *
- * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$
- * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a
- * fill-in reducing permutation as computed by the ordering method.
+ * It performs the following incomplete factorization: \f$ S P A P' S + \sigma I \approx L L' \f$
+ * where L is a lower triangular factor, S is a diagonal scaling matrix, P is a
+ * fill-in reducing permutation as computed by the ordering method, and \f$ \sigma \f$ is a shift
+ * for ensuring the decomposed matrix is positive definite.
  *
  * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$  be the scaled matrix on which the factorization is carried out,
  * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly
- * performed on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I
- * \f$ where \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default
- * value is \f$ \sigma = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a
- * maximum of ten attempts. If it still fails, as returned by the info() method, then you can either increase the
- * initial shift, or better use another preconditioning technique.
+ * performed on the matrix B, and \sigma = 0. Otherwise, the factorization is performed on the shifted matrix \f$ B +
+ * \sigma I \f$ for a shifting factor  \f$ \sigma \f$.  We start with \f$ \sigma = \sigma_0 - \beta \f$, where \f$
+ * \sigma_0 \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$
+ * \sigma_0 = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a maximum of ten
+ * attempts. If it still fails, as returned by the info() method, then you can either increase the initial shift, or
+ * better use another preconditioning technique.
  *
  */
 template <typename Scalar, int UpLo_ = Lower, typename OrderingType_ = AMDOrdering<int> >
@@ -176,6 +178,9 @@
     return m_perm;
   }
 
+  /** \returns the final shift parameter from the computation */
+  RealScalar shift() const { return m_shift; }
+
  protected:
   FactorType m_L;             // The lower part stored in CSC
   VectorRx m_scale;           // The vector for scaling the matrix
@@ -184,6 +189,7 @@
   bool m_factorizationIsOk;
   ComputationInfo m_info;
   PermutationType m_perm;
+  RealScalar m_shift;  // The final shift parameter.
 
  private:
   inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col,
@@ -214,6 +220,20 @@
     m_L.template selfadjointView<Lower>() = mat.template selfadjointView<UpLo_>();
   }
 
+  // The algorithm will insert increasingly large shifts on the diagonal until
+  // factorization succeeds. Therefore we have to make sure that there is a
+  // space in the datastructure to store such values, even if the original
+  // matrix has a zero on the diagonal.
+  bool modified = false;
+  for (Index i = 0; i < mat.cols(); ++i) {
+    bool inserted = false;
+    m_L.findOrInsertCoeff(i, i, &inserted);
+    if (inserted) {
+      modified = true;
+    }
+  }
+  if (modified) m_L.makeCompressed();
+
   Index n = m_L.cols();
   Index nnz = m_L.nonZeros();
   Map<VectorSx> vals(m_L.valuePtr(), nnz);           // values
@@ -257,8 +277,8 @@
 
   FactorType L_save = m_L;
 
-  RealScalar shift = 0;
-  if (mindiag <= RealScalar(0.)) shift = m_initialShift - mindiag;
+  m_shift = RealScalar(0);
+  if (mindiag <= RealScalar(0.)) m_shift = m_initialShift - mindiag;
 
   m_info = NumericalIssue;
 
@@ -266,7 +286,7 @@
   int iter = 0;
   do {
     // Apply the shift to the diagonal elements of the matrix
-    for (Index j = 0; j < n; j++) vals[colPtr[j]] += shift;
+    for (Index j = 0; j < n; j++) vals[colPtr[j]] += m_shift;
 
     // jki version of the Cholesky factorization
     Index j = 0;
@@ -310,7 +330,7 @@
         if (++iter >= 10) return;
 
         // increase shift
-        shift = numext::maxi(m_initialShift, RealScalar(2) * shift);
+        m_shift = numext::maxi(m_initialShift, RealScalar(2) * m_shift);
         // restore m_L, col_pattern, and listCol
         vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
         rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);

diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index f53b8ec..2686a52 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h

@@ -265,7 +265,7 @@
   internal::apply_rotation_in_the_plane(x, y, j);
 }
 
-/** \ingroup Jacobi_Module
+/** \jacobi_module
  * Applies the rotation in the plane \a j to the columns \a p and \a q of \c *this, i.e., it computes B = B * J
  * with \f$ B = \left ( \begin{array}{cc} \text{*this.col}(p) & \text{*this.col}(q) \end{array} \right ) \f$.
  *

diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index f1de6fd..092c29d 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h

@@ -238,6 +238,20 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
+  /** \returns the sign of the determinant of the matrix of which
+   * *this is the QR decomposition. It has only linear complexity
+   * (that is, O(n) where n is the dimension of the square matrix)
+   * as the QR decomposition has already been computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \note This method is useful to work around the risk of overflow/underflow that's inherent
+   * to determinant computation.
+   *
+   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::Scalar signDeterminant() const;
+
   /** \returns the rank of the matrix of which *this is the QR decomposition.
    *
    * \note This method has to determine which pivots should be considered nonzero.
@@ -428,7 +442,7 @@
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
   Scalar detQ;
   internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
+  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0);
 }
 
 template <typename MatrixType, typename PermutationIndex>
@@ -436,14 +450,23 @@
   using std::abs;
   eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return abs(m_qr.diagonal().prod());
+  return isInjective() ? abs(m_qr.diagonal().prod()) : RealScalar(0);
 }
 
 template <typename MatrixType, typename PermutationIndex>
 typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType, PermutationIndex>::logAbsDeterminant() const {
   eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return m_qr.diagonal().cwiseAbs().array().log().sum();
+  return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits<RealScalar>::infinity();
+}
+
+template <typename MatrixType, typename PermutationIndex>
+typename MatrixType::Scalar ColPivHouseholderQR<MatrixType, PermutationIndex>::signDeterminant() const {
+  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0);
 }
 
 /** Performs the QR factorization of the given matrix \a matrix. The result of

diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index 8566e96..960ccb1 100644
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h

@@ -228,6 +228,21 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
+  /** \returns the sign of the determinant of the
+   * matrix of which *this is the complete orthogonal decomposition. It has
+   * only linear complexity (that is, O(n) where n is the dimension of the
+   * square matrix) as the complete orthogonal decomposition has already been
+   * computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \note This method is useful to work around the risk of overflow/underflow
+   * that's inherent to determinant computation.
+   *
+   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::Scalar signDeterminant() const;
+
   /** \returns the rank of the matrix of which *this is the complete orthogonal
    * decomposition.
    *
@@ -424,6 +439,11 @@
   return m_cpqr.logAbsDeterminant();
 }
 
+template <typename MatrixType, typename PermutationIndex>
+typename MatrixType::Scalar CompleteOrthogonalDecomposition<MatrixType, PermutationIndex>::signDeterminant() const {
+  return m_cpqr.signDeterminant();
+}
+
 /** Performs the complete orthogonal decomposition of the given matrix \a
  * matrix. The result of the factorization is stored into \c *this, and a
  * reference to \c *this is returned.

diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index d93a5d1..cae9ae4 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h

@@ -248,6 +248,20 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
+  /** \returns the sign of the determinant of the matrix of which
+   * *this is the QR decomposition. It has only linear complexity
+   * (that is, O(n) where n is the dimension of the square matrix)
+   * as the QR decomposition has already been computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \note This method is useful to work around the risk of overflow/underflow that's inherent
+   * to determinant computation.
+   *
+   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::Scalar signDeterminant() const;
+
   /** \returns the rank of the matrix of which *this is the QR decomposition.
    *
    * \note This method has to determine which pivots should be considered nonzero.
@@ -425,7 +439,7 @@
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
   Scalar detQ;
   internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
+  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0);
 }
 
 template <typename MatrixType, typename PermutationIndex>
@@ -433,14 +447,23 @@
   using std::abs;
   eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return abs(m_qr.diagonal().prod());
+  return isInjective() ? abs(m_qr.diagonal().prod()) : RealScalar(0);
 }
 
 template <typename MatrixType, typename PermutationIndex>
 typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType, PermutationIndex>::logAbsDeterminant() const {
   eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return m_qr.diagonal().cwiseAbs().array().log().sum();
+  return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits<RealScalar>::infinity();
+}
+
+template <typename MatrixType, typename PermutationIndex>
+typename MatrixType::Scalar FullPivHouseholderQR<MatrixType, PermutationIndex>::signDeterminant() const {
+  eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0);
 }
 
 /** Performs the QR factorization of the given matrix \a matrix. The result of

diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 9e73672..e297372 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h

@@ -187,6 +187,8 @@
    * \warning a determinant can be very big or small, so for matrices
    * of large enough dimension, there is a risk of overflow/underflow.
    * One way to work around that is to use logAbsDeterminant() instead.
+   * Also, do not rely on the determinant being exactly zero for testing
+   * singularity or rank-deficiency.
    *
    * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
    */
@@ -202,6 +204,8 @@
    * \warning a determinant can be very big or small, so for matrices
    * of large enough dimension, there is a risk of overflow/underflow.
    * One way to work around that is to use logAbsDeterminant() instead.
+   * Also, do not rely on the determinant being exactly zero for testing
+   * singularity or rank-deficiency.
    *
    * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
    */
@@ -217,10 +221,30 @@
    * \note This method is useful to work around the risk of overflow/underflow that's inherent
    * to determinant computation.
    *
+   * \warning Do not rely on the determinant being exactly zero for testing
+   * singularity or rank-deficiency.
+   *
    * \sa determinant(), absDeterminant(), MatrixBase::determinant()
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
+  /** \returns the sign of the determinant of the matrix of which
+   * *this is the QR decomposition. It has only linear complexity
+   * (that is, O(n) where n is the dimension of the square matrix)
+   * as the QR decomposition has already been computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \note This method is useful to work around the risk of overflow/underflow that's inherent
+   * to determinant computation.
+   *
+   * \warning Do not rely on the determinant being exactly zero for testing
+   * singularity or rank-deficiency.
+   *
+   * \sa determinant(), absDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::Scalar signDeterminant() const;
+
   inline Index rows() const { return m_qr.rows(); }
   inline Index cols() const { return m_qr.cols(); }
 
@@ -306,6 +330,15 @@
   return m_qr.diagonal().cwiseAbs().array().log().sum();
 }
 
+template <typename MatrixType>
+typename MatrixType::Scalar HouseholderQR<MatrixType>::signDeterminant() const {
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  Scalar detQ;
+  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
+  return detQ * m_qr.diagonal().array().sign().prod();
+}
+
 namespace internal {
 
 /** \internal */

diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index cb41123..086d750 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h

@@ -52,7 +52,10 @@
 class qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, Case, false> {
  public:
   void allocate(const JacobiSVD<MatrixType, Options>&) {}
-  bool run(JacobiSVD<MatrixType, Options>&, const MatrixType&) { return false; }
+  template <typename Xpr>
+  bool run(JacobiSVD<MatrixType, Options>&, const Xpr&) {
+    return false;
+  }
 };
 
 /*** preconditioner using FullPivHouseholderQR ***/
@@ -75,8 +78,8 @@
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
   }
-
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -117,14 +120,12 @@
       internal::destroy_at(&m_qr);
       internal::construct_at(&m_qr, svd.cols(), svd.rows());
     }
-    m_adjoint.resize(svd.cols(), svd.rows());
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
   }
-
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
+      m_qr.compute(matrix.adjoint());
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
       if (svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace);
@@ -137,7 +138,6 @@
  private:
   typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
   typename plain_row_type<MatrixType>::type m_workspace;
 };
 
@@ -167,8 +167,8 @@
     else if (svd.m_computeThinU)
       m_workspace.resize(svd.cols());
   }
-
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -222,13 +222,11 @@
       m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV)
       m_workspace.resize(svd.rows());
-    m_adjoint.resize(svd.cols(), svd.rows());
   }
-
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
+      m_qr.compute(matrix.adjoint());
 
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
@@ -247,7 +245,6 @@
  private:
   typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
   WorkspaceType m_workspace;
 };
 
@@ -276,8 +273,8 @@
     else if (svd.m_computeThinU)
       m_workspace.resize(svd.cols());
   }
-
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -330,13 +327,12 @@
       m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV)
       m_workspace.resize(svd.rows());
-    m_adjoint.resize(svd.cols(), svd.rows());
   }
 
-  bool run(SVDType& svd, const MatrixType& matrix) {
+  template <typename Xpr>
+  bool run(SVDType& svd, const Xpr& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
+      m_qr.compute(matrix.adjoint());
 
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
@@ -355,7 +351,6 @@
  private:
   typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
   WorkspaceType m_workspace;
 };
 
@@ -509,7 +504,6 @@
   typedef MatrixType_ MatrixType;
   typedef typename Base::Scalar Scalar;
   typedef typename Base::RealScalar RealScalar;
-  typedef typename Base::Index Index;
   enum : int {
     Options = Options_,
     QRPreconditioner = internal::get_qr_preconditioner(Options),
@@ -618,7 +612,18 @@
   using Base::rows;
 
  private:
-  void allocate(Index rows, Index cols, unsigned int computationOptions);
+  void allocate(Index rows_, Index cols_, unsigned int computationOptions) {
+    if (Base::allocate(rows_, cols_, computationOptions)) return;
+    eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+                 !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+                 "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
+                 "Use the ColPivHouseholderQR preconditioner instead.");
+
+    m_workMatrix.resize(diagSize(), diagSize());
+    if (cols() > rows()) m_qr_precond_morecols.allocate(*this);
+    if (rows() > cols()) m_qr_precond_morerows.allocate(*this);
+  }
+
   JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
 
  protected:
@@ -654,25 +659,9 @@
   internal::qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols>
       m_qr_precond_morerows;
   WorkMatrixType m_workMatrix;
-  MatrixType m_scaledMatrix;
 };
 
 template <typename MatrixType, int Options>
-void JacobiSVD<MatrixType, Options>::allocate(Index rows_, Index cols_, unsigned int computationOptions_) {
-  if (Base::allocate(rows_, cols_, computationOptions_)) return;
-
-  eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
-               !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
-               "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
-               "Use the ColPivHouseholderQR preconditioner instead.");
-
-  m_workMatrix.resize(diagSize(), diagSize());
-  if (cols() > rows()) m_qr_precond_morecols.allocate(*this);
-  if (rows() > cols()) m_qr_precond_morerows.allocate(*this);
-  if (rows() != cols()) m_scaledMatrix.resize(rows(), cols());
-}
-
-template <typename MatrixType, int Options>
 JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
                                                                              unsigned int computationOptions) {
   using std::abs;
@@ -699,9 +688,8 @@
   /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
 
   if (rows() != cols()) {
-    m_scaledMatrix = matrix / scale;
-    m_qr_precond_morecols.run(*this, m_scaledMatrix);
-    m_qr_precond_morerows.run(*this, m_scaledMatrix);
+    m_qr_precond_morecols.run(*this, matrix / scale);
+    m_qr_precond_morerows.run(*this, matrix / scale);
   } else {
     m_workMatrix =
         matrix.template topLeftCorner<DiagSizeAtCompileTime, DiagSizeAtCompileTime>(diagSize(), diagSize()) / scale;

diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index 5f04647..d1ad63d 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h

@@ -125,7 +125,6 @@
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   typedef typename Eigen::internal::traits<SVDBase>::StorageIndex StorageIndex;
-  typedef Eigen::Index Index;  ///< \deprecated since Eigen 3.3
 
   static constexpr bool ShouldComputeFullU = internal::traits<Derived>::ShouldComputeFullU;
   static constexpr bool ShouldComputeThinU = internal::traits<Derived>::ShouldComputeThinU;
@@ -355,11 +354,11 @@
         m_isInitialized(false),
         m_isAllocated(false),
         m_usePrescribedThreshold(false),
-        m_computeFullU(false),
-        m_computeThinU(false),
-        m_computeFullV(false),
-        m_computeThinV(false),
-        m_computationOptions(0),
+        m_computeFullU(ShouldComputeFullU),
+        m_computeThinU(ShouldComputeThinU),
+        m_computeFullV(ShouldComputeFullV),
+        m_computeThinV(ShouldComputeThinV),
+        m_computationOptions(internal::traits<Derived>::Options),
         m_nonzeroSingularValues(0),
         m_rows(RowsAtCompileTime),
         m_cols(ColsAtCompileTime),

diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 423287b..f3ce975 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h

@@ -58,6 +58,7 @@
   enum { UpLo = internal::traits<Derived>::UpLo };
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename internal::traits<Derived>::DiagonalScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef CholMatrixType const* ConstCholMatrixPtr;
@@ -114,7 +115,7 @@
    *
    * \returns a reference to \c *this.
    */
-  Derived& setShift(const RealScalar& offset, const RealScalar& scale = 1) {
+  Derived& setShift(const DiagonalScalar& offset, const DiagonalScalar& scale = 1) {
     m_shiftOffset = offset;
     m_shiftScale = scale;
     return derived();
@@ -178,18 +179,18 @@
 
  protected:
   /** Computes the sparse Cholesky decomposition of \a matrix */
-  template <bool DoLDLT>
+  template <bool DoLDLT, bool NonHermitian>
   void compute(const MatrixType& matrix) {
     eigen_assert(matrix.rows() == matrix.cols());
     Index size = matrix.cols();
     CholMatrixType tmp(size, size);
     ConstCholMatrixPtr pmat;
-    ordering(matrix, pmat, tmp);
+    ordering<NonHermitian>(matrix, pmat, tmp);
     analyzePattern_preordered(*pmat, DoLDLT);
-    factorize_preordered<DoLDLT>(*pmat);
+    factorize_preordered<DoLDLT, NonHermitian>(*pmat);
   }
 
-  template <bool DoLDLT>
+  template <bool DoLDLT, bool NonHermitian>
   void factorize(const MatrixType& a) {
     eigen_assert(a.rows() == a.cols());
     Index size = a.cols();
@@ -200,28 +201,33 @@
       // If there is no ordering, try to directly use the input matrix without any copy
       internal::simplicial_cholesky_grab_input<CholMatrixType, MatrixType>::run(a, pmat, tmp);
     } else {
-      tmp.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
+      internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, tmp, m_P.indices().data());
       pmat = &tmp;
     }
 
-    factorize_preordered<DoLDLT>(*pmat);
+    factorize_preordered<DoLDLT, NonHermitian>(*pmat);
   }
 
-  template <bool DoLDLT>
+  template <bool DoLDLT, bool NonHermitian>
   void factorize_preordered(const CholMatrixType& a);
 
-  void analyzePattern(const MatrixType& a, bool doLDLT) {
+  template <bool DoLDLT, bool NonHermitian>
+  void analyzePattern(const MatrixType& a) {
     eigen_assert(a.rows() == a.cols());
     Index size = a.cols();
     CholMatrixType tmp(size, size);
     ConstCholMatrixPtr pmat;
-    ordering(a, pmat, tmp);
-    analyzePattern_preordered(*pmat, doLDLT);
+    ordering<NonHermitian>(a, pmat, tmp);
+    analyzePattern_preordered(*pmat, DoLDLT);
   }
   void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT);
 
+  template <bool NonHermitian>
   void ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap);
 
+  inline DiagonalScalar getDiag(Scalar x) { return internal::traits<Derived>::getDiag(x); }
+  inline Scalar getSymm(Scalar x) { return internal::traits<Derived>::getSymm(x); }
+
   /** keeps off-diagonal entries; drops diagonal entries */
   struct keep_diag {
     inline bool operator()(const Index& row, const Index& col, const Scalar&) const { return row != col; }
@@ -238,8 +244,8 @@
   PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P;     // the permutation
   PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv;  // the inverse permutation
 
-  RealScalar m_shiftOffset;
-  RealScalar m_shiftScale;
+  DiagonalScalar m_shiftOffset;
+  DiagonalScalar m_shiftScale;
 };
 
 template <typename MatrixType_, int UpLo_ = Lower,
@@ -250,6 +256,12 @@
 class SimplicialLDLT;
 template <typename MatrixType_, int UpLo_ = Lower,
           typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
+class SimplicialNonHermitianLLT;
+template <typename MatrixType_, int UpLo_ = Lower,
+          typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
+class SimplicialNonHermitianLDLT;
+template <typename MatrixType_, int UpLo_ = Lower,
+          typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
 class SimplicialCholesky;
 
 namespace internal {
@@ -260,12 +272,15 @@
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
   typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU;
   static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
+  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
+  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
 };
 
 template <typename MatrixType_, int UpLo_, typename Ordering_>
@@ -274,12 +289,49 @@
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
   typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
   static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
+  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
+  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
+};
+
+template <typename MatrixType_, int UpLo_, typename Ordering_>
+struct traits<SimplicialNonHermitianLLT<MatrixType_, UpLo_, Ordering_> > {
+  typedef MatrixType_ MatrixType;
+  typedef Ordering_ OrderingType;
+  enum { UpLo = UpLo_ };
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Scalar DiagonalScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
+  typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
+  typedef TriangularView<const typename CholMatrixType::ConstTransposeReturnType, Eigen::Upper> MatrixU;
+  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
+  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); }
+  static inline DiagonalScalar getDiag(Scalar x) { return x; }
+  static inline Scalar getSymm(Scalar x) { return x; }
+};
+
+template <typename MatrixType_, int UpLo_, typename Ordering_>
+struct traits<SimplicialNonHermitianLDLT<MatrixType_, UpLo_, Ordering_> > {
+  typedef MatrixType_ MatrixType;
+  typedef Ordering_ OrderingType;
+  enum { UpLo = UpLo_ };
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Scalar DiagonalScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
+  typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
+  typedef TriangularView<const typename CholMatrixType::ConstTransposeReturnType, Eigen::UnitUpper> MatrixU;
+  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
+  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); }
+  static inline DiagonalScalar getDiag(Scalar x) { return x; }
+  static inline Scalar getSymm(Scalar x) { return x; }
 };
 
 template <typename MatrixType_, int UpLo_, typename Ordering_>
@@ -287,6 +339,10 @@
   typedef MatrixType_ MatrixType;
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar DiagonalScalar;
+  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
+  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
 };
 
 }  // namespace internal
@@ -346,7 +402,7 @@
 
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialLLT& compute(const MatrixType& matrix) {
-    Base::template compute<false>(matrix);
+    Base::template compute<false, false>(matrix);
     return *this;
   }
 
@@ -356,7 +412,7 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, false); }
+  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<false, false>(a); }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -364,7 +420,7 @@
    *
    * \sa analyzePattern()
    */
-  void factorize(const MatrixType& a) { Base::template factorize<false>(a); }
+  void factorize(const MatrixType& a) { Base::template factorize<false, false>(a); }
 
   /** \returns the determinant of the underlying matrix from the current factorization */
   Scalar determinant() const {
@@ -434,7 +490,7 @@
 
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialLDLT& compute(const MatrixType& matrix) {
-    Base::template compute<true>(matrix);
+    Base::template compute<true, false>(matrix);
     return *this;
   }
 
@@ -444,7 +500,7 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, true); }
+  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<true, false>(a); }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -452,7 +508,177 @@
    *
    * \sa analyzePattern()
    */
-  void factorize(const MatrixType& a) { Base::template factorize<true>(a); }
+  void factorize(const MatrixType& a) { Base::template factorize<true, false>(a); }
+
+  /** \returns the determinant of the underlying matrix from the current factorization */
+  Scalar determinant() const { return Base::m_diag.prod(); }
+};
+
+/** \ingroup SparseCholesky_Module
+ * \class SimplicialNonHermitianLLT
+ * \brief A direct sparse LLT Cholesky factorizations, for symmetric non-hermitian matrices.
+ *
+ * This class provides a LL^T Cholesky factorizations of sparse matrices that are
+ * symmetric but not hermitian. For real matrices, this is equivalent to the regular LLT factorization.
+ * The factorization allows for solving A.X = B where X and B can be either dense or sparse.
+ *
+ * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
+ * such that the factorized matrix is P A P^-1.
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
+ *               or Upper. Default is Lower.
+ * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
+ *
+ * \implsparsesolverconcept
+ *
+ * \sa class SimplicialNonHermitianLDLT, SimplicialLLT, class AMDOrdering, class NaturalOrdering
+ */
+template <typename MatrixType_, int UpLo_, typename Ordering_>
+class SimplicialNonHermitianLLT
+    : public SimplicialCholeskyBase<SimplicialNonHermitianLLT<MatrixType_, UpLo_, Ordering_> > {
+ public:
+  typedef MatrixType_ MatrixType;
+  enum { UpLo = UpLo_ };
+  typedef SimplicialCholeskyBase<SimplicialNonHermitianLLT> Base;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef internal::traits<SimplicialNonHermitianLLT> Traits;
+  typedef typename Traits::MatrixL MatrixL;
+  typedef typename Traits::MatrixU MatrixU;
+
+ public:
+  /** Default constructor */
+  SimplicialNonHermitianLLT() : Base() {}
+
+  /** Constructs and performs the LLT factorization of \a matrix */
+  explicit SimplicialNonHermitianLLT(const MatrixType& matrix) : Base(matrix) {}
+
+  /** \returns an expression of the factor L */
+  inline const MatrixL matrixL() const {
+    eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized");
+    return Traits::getL(Base::m_matrix);
+  }
+
+  /** \returns an expression of the factor U (= L^*) */
+  inline const MatrixU matrixU() const {
+    eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized");
+    return Traits::getU(Base::m_matrix);
+  }
+
+  /** Computes the sparse Cholesky decomposition of \a matrix */
+  SimplicialNonHermitianLLT& compute(const MatrixType& matrix) {
+    Base::template compute<false, true>(matrix);
+    return *this;
+  }
+
+  /** Performs a symbolic decomposition on the sparcity of \a matrix.
+   *
+   * This function is particularly useful when solving for several problems having the same structure.
+   *
+   * \sa factorize()
+   */
+  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<false, true>(a); }
+
+  /** Performs a numeric decomposition of \a matrix
+   *
+   * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
+   *
+   * \sa analyzePattern()
+   */
+  void factorize(const MatrixType& a) { Base::template factorize<false, true>(a); }
+
+  /** \returns the determinant of the underlying matrix from the current factorization */
+  Scalar determinant() const {
+    Scalar detL = Base::m_matrix.diagonal().prod();
+    return detL * detL;
+  }
+};
+
+/** \ingroup SparseCholesky_Module
+ * \class SimplicialNonHermitianLDLT
+ * \brief A direct sparse LDLT Cholesky factorizations without square root, for symmetric non-hermitian matrices.
+ *
+ * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are
+ * symmetric but not hermitian. For real matrices, this is equivalent to the regular LDLT factorization.
+ * The factorization allows for solving A.X = B where X and B can be either dense or sparse.
+ *
+ * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
+ * such that the factorized matrix is P A P^-1.
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
+ *               or Upper. Default is Lower.
+ * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
+ *
+ * \implsparsesolverconcept
+ *
+ * \sa class SimplicialNonHermitianLLT, SimplicialLDLT, class AMDOrdering, class NaturalOrdering
+ */
+template <typename MatrixType_, int UpLo_, typename Ordering_>
+class SimplicialNonHermitianLDLT
+    : public SimplicialCholeskyBase<SimplicialNonHermitianLDLT<MatrixType_, UpLo_, Ordering_> > {
+ public:
+  typedef MatrixType_ MatrixType;
+  enum { UpLo = UpLo_ };
+  typedef SimplicialCholeskyBase<SimplicialNonHermitianLDLT> Base;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef internal::traits<SimplicialNonHermitianLDLT> Traits;
+  typedef typename Traits::MatrixL MatrixL;
+  typedef typename Traits::MatrixU MatrixU;
+
+ public:
+  /** Default constructor */
+  SimplicialNonHermitianLDLT() : Base() {}
+
+  /** Constructs and performs the LLT factorization of \a matrix */
+  explicit SimplicialNonHermitianLDLT(const MatrixType& matrix) : Base(matrix) {}
+
+  /** \returns a vector expression of the diagonal D */
+  inline const VectorType vectorD() const {
+    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
+    return Base::m_diag;
+  }
+  /** \returns an expression of the factor L */
+  inline const MatrixL matrixL() const {
+    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
+    return Traits::getL(Base::m_matrix);
+  }
+
+  /** \returns an expression of the factor U (= L^*) */
+  inline const MatrixU matrixU() const {
+    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
+    return Traits::getU(Base::m_matrix);
+  }
+
+  /** Computes the sparse Cholesky decomposition of \a matrix */
+  SimplicialNonHermitianLDLT& compute(const MatrixType& matrix) {
+    Base::template compute<true, true>(matrix);
+    return *this;
+  }
+
+  /** Performs a symbolic decomposition on the sparcity of \a matrix.
+   *
+   * This function is particularly useful when solving for several problems having the same structure.
+   *
+   * \sa factorize()
+   */
+  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<true, true>(a); }
+
+  /** Performs a numeric decomposition of \a matrix
+   *
+   * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
+   *
+   * \sa analyzePattern()
+   */
+  void factorize(const MatrixType& a) { Base::template factorize<true, true>(a); }
 
   /** \returns the determinant of the underlying matrix from the current factorization */
   Scalar determinant() const { return Base::m_diag.prod(); }
@@ -475,7 +701,6 @@
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef Matrix<Scalar, Dynamic, 1> VectorType;
-  typedef internal::traits<SimplicialCholesky> Traits;
   typedef internal::traits<SimplicialLDLT<MatrixType, UpLo> > LDLTTraits;
   typedef internal::traits<SimplicialLLT<MatrixType, UpLo> > LLTTraits;
 
@@ -511,9 +736,9 @@
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialCholesky& compute(const MatrixType& matrix) {
     if (m_LDLT)
-      Base::template compute<true>(matrix);
+      Base::template compute<true, false>(matrix);
     else
-      Base::template compute<false>(matrix);
+      Base::template compute<false, false>(matrix);
     return *this;
   }
 
@@ -523,7 +748,12 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, m_LDLT); }
+  void analyzePattern(const MatrixType& a) {
+    if (m_LDLT)
+      Base::template analyzePattern<true, false>(a);
+    else
+      Base::template analyzePattern<false, false>(a);
+  }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -533,9 +763,9 @@
    */
   void factorize(const MatrixType& a) {
     if (m_LDLT)
-      Base::template factorize<true>(a);
+      Base::template factorize<true, false>(a);
     else
-      Base::template factorize<false>(a);
+      Base::template factorize<false, false>(a);
   }
 
   /** \internal */
@@ -594,6 +824,7 @@
 };
 
 template <typename Derived>
+template <bool NonHermitian>
 void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap) {
   eigen_assert(a.rows() == a.cols());
   const Index size = a.rows();
@@ -602,7 +833,7 @@
   if (!internal::is_same<OrderingType, NaturalOrdering<Index> >::value) {
     {
       CholMatrixType C;
-      C = a.template selfadjointView<UpLo>();
+      internal::permute_symm_to_fullsymm<UpLo, NonHermitian>(a, C, NULL);
 
       OrderingType ordering;
       ordering(C, m_Pinv);
@@ -614,14 +845,14 @@
       m_P.resize(0);
 
     ap.resize(size, size);
-    ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
+    internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, ap, m_P.indices().data());
   } else {
     m_Pinv.resize(0);
     m_P.resize(0);
     if (int(UpLo) == int(Lower) || MatrixType::IsRowMajor) {
       // we have to transpose the lower part to to the upper one
       ap.resize(size, size);
-      ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>();
+      internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, ap, NULL);
     } else
       internal::simplicial_cholesky_grab_input<CholMatrixType, MatrixType>::run(a, pmat, ap);
   }

diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
index abfbbe6..0b13c56 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h

@@ -67,7 +67,7 @@
 }
 
 template <typename Derived>
-template <bool DoLDLT>
+template <bool DoLDLT, bool NonHermitian>
 void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap) {
   using std::sqrt;
 
@@ -97,7 +97,7 @@
     for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) {
       StorageIndex i = it.index();
       if (i <= k) {
-        y[i] += numext::conj(it.value()); /* scatter A(i,k) into Y (sum duplicates) */
+        y[i] += getSymm(it.value()); /* scatter A(i,k) into Y (sum duplicates) */
         Index len;
         for (len = 0; tags[i] != k; i = m_parent[i]) {
           pattern[len++] = i; /* L(k,i) is nonzero */
@@ -109,8 +109,8 @@
 
     /* compute numerical values kth row of L (a sparse triangular solve) */
 
-    RealScalar d =
-        numext::real(y[k]) * m_shiftScale + m_shiftOffset;  // get D(k,k), apply the shift function, and clear Y(k)
+    DiagonalScalar d =
+        getDiag(y[k]) * m_shiftScale + m_shiftOffset;  // get D(k,k), apply the shift function, and clear Y(k)
     y[k] = Scalar(0);
     for (; top < size; ++top) {
       Index i = pattern[top]; /* pattern[top:n-1] is pattern of L(:,k) */
@@ -120,14 +120,14 @@
       /* the nonzero entry L(k,i) */
       Scalar l_ki;
       if (DoLDLT)
-        l_ki = yi / numext::real(m_diag[i]);
+        l_ki = yi / getDiag(m_diag[i]);
       else
         yi = l_ki = yi / Lx[Lp[i]];
 
       Index p2 = Lp[i] + m_nonZerosPerCol[i];
       Index p;
-      for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= numext::conj(Lx[p]) * yi;
-      d -= numext::real(l_ki * numext::conj(yi));
+      for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi;
+      d -= getDiag(l_ki * getSymm(yi));
       Li[p] = k; /* store L(k,i) in column form of L */
       Lx[p] = l_ki;
       ++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */
@@ -141,7 +141,7 @@
     } else {
       Index p = Lp[k] + m_nonZerosPerCol[k]++;
       Li[p] = k; /* store L(k,k) = sqrt (d) in column k */
-      if (d <= RealScalar(0)) {
+      if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) {
         ok = false; /* failure, matrix is not positive definite */
         break;
       }

diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index aa876ec..f040915 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h

@@ -17,7 +17,8 @@
 
 template <typename Derived>
 template <typename OtherDerived>
-typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
+inline typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
+    const MatrixBase<OtherDerived>& other) const {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
   EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
@@ -30,17 +31,23 @@
 
   internal::evaluator<Derived> thisEval(derived());
   typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
-  Scalar res(0);
-  while (i) {
-    res += numext::conj(i.value()) * other.coeff(i.index());
+  // Two accumulators, which breaks the dependency chain on the accumulator
+  // and allows more instruction-level parallelism in the following loop.
+  Scalar res1(0);
+  Scalar res2(0);
+  for (; i; ++i) {
+    res1 += numext::conj(i.value()) * other.coeff(i.index());
     ++i;
+    if (i) {
+      res2 += numext::conj(i.value()) * other.coeff(i.index());
+    }
   }
-  return res;
+  return res1 + res2;
 }
 
 template <typename Derived>
 template <typename OtherDerived>
-typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
+inline typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
     const SparseMatrixBase<OtherDerived>& other) const {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)

diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 81b0a11..849970a 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h

@@ -217,15 +217,18 @@
     return m_data.atInRange(m_outerIndex[outer], end, inner);
   }
 
-  /** \returns a non-const reference to the value of the matrix at position \a i, \a j
+  /** \returns a non-const reference to the value of the matrix at position \a i, \a j.
    *
    * If the element does not exist then it is inserted via the insert(Index,Index) function
    * which itself turns the matrix into a non compressed form if that was not the case.
+   * The output parameter `inserted` is set to true.
+   *
+   * Otherwise, if the element does exist, `inserted` will be set to false.
    *
    * This is a O(log(nnz_j)) operation (binary search) plus the cost of insert(Index,Index)
    * function if the element does not already exist.
    */
-  inline Scalar& coeffRef(Index row, Index col) {
+  inline Scalar& findOrInsertCoeff(Index row, Index col, bool* inserted) {
     eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
     const Index outer = IsRowMajor ? row : col;
     const Index inner = IsRowMajor ? col : row;
@@ -240,17 +243,37 @@
         m_innerNonZeros[outer]++;
         m_data.index(end) = StorageIndex(inner);
         m_data.value(end) = Scalar(0);
+        if (inserted != nullptr) {
+          *inserted = true;
+        }
         return m_data.value(end);
       }
     }
-    if ((dst < end) && (m_data.index(dst) == inner))
+    if ((dst < end) && (m_data.index(dst) == inner)) {
       // this coefficient exists, return a refernece to it
+      if (inserted != nullptr) {
+        *inserted = false;
+      }
       return m_data.value(dst);
-    else
+    } else {
+      if (inserted != nullptr) {
+        *inserted = true;
+      }
       // insertion will require reconfiguring the buffer
       return insertAtByOuterInner(outer, inner, dst);
+    }
   }
 
+  /** \returns a non-const reference to the value of the matrix at position \a i, \a j
+   *
+   * If the element does not exist then it is inserted via the insert(Index,Index) function
+   * which itself turns the matrix into a non compressed form if that was not the case.
+   *
+   * This is a O(log(nnz_j)) operation (binary search) plus the cost of insert(Index,Index)
+   * function if the element does not already exist.
+   */
+  inline Scalar& coeffRef(Index row, Index col) { return findOrInsertCoeff(row, col, nullptr); }
+
   /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col.
    * The non zero coefficient must \b not already exist.
    *
@@ -765,8 +788,11 @@
     Base::operator=(other);
   }
 
-  inline SparseMatrix(SparseMatrix&& other)
-      : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) {
+  /** Move constructor */
+  inline SparseMatrix(SparseMatrix&& other) : SparseMatrix() { this->swap(other); }
+
+  template <typename OtherDerived>
+  inline SparseMatrix(SparseCompressedBase<OtherDerived>&& other) : SparseMatrix() {
     *this = other.derived().markAsRValue();
   }
 
@@ -834,7 +860,10 @@
     return *this;
   }
 
-  inline SparseMatrix& operator=(SparseMatrix&& other) { return *this = other.derived().markAsRValue(); }
+  inline SparseMatrix& operator=(SparseMatrix&& other) {
+    this->swap(other);
+    return *this;
+  }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   template <typename OtherDerived>
@@ -849,6 +878,12 @@
   template <typename OtherDerived>
   EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
 
+  template <typename OtherDerived>
+  inline SparseMatrix& operator=(SparseCompressedBase<OtherDerived>&& other) {
+    *this = other.derived().markAsRValue();
+    return *this;
+  }
+
 #ifndef EIGEN_NO_IO
   friend std::ostream& operator<<(std::ostream& s, const SparseMatrix& m) {
     EIGEN_DBG_SPARSE(

diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 129899c..3402bae 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h

@@ -34,13 +34,13 @@
 template <typename MatrixType, unsigned int Mode>
 struct traits<SparseSelfAdjointView<MatrixType, Mode> > : traits<MatrixType> {};
 
-template <int SrcMode, int DstMode, typename MatrixType, int DestOrder>
+template <int SrcMode, int DstMode, bool NonHermitian, typename MatrixType, int DestOrder>
 void permute_symm_to_symm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
     const typename MatrixType::StorageIndex* perm = 0);
 
-template <int Mode, typename MatrixType, int DestOrder>
+template <int Mode, bool NonHermitian, typename MatrixType, int DestOrder>
 void permute_symm_to_fullsymm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
@@ -53,7 +53,7 @@
  public:
   enum {
     Mode = Mode_,
-    TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
+    TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0),
     RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
     ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
   };
@@ -234,7 +234,7 @@
   template <typename DestScalar, int StorageOrder>
   static void run(SparseMatrix<DestScalar, StorageOrder, StorageIndex>& dst, const SrcXprType& src,
                   const AssignOpType& /*func*/) {
-    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
+    internal::permute_symm_to_fullsymm<SrcXprType::Mode, false>(src.matrix(), dst);
   }
 
   // FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced
@@ -405,7 +405,7 @@
  ***************************************************************************/
 namespace internal {
 
-template <int Mode, typename MatrixType, int DestOrder>
+template <int Mode, bool NonHermitian, typename MatrixType, int DestOrder>
 void permute_symm_to_fullsymm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
@@ -476,13 +476,13 @@
         dest.valuePtr()[k] = it.value();
         k = count[ip]++;
         dest.innerIndexPtr()[k] = jp;
-        dest.valuePtr()[k] = numext::conj(it.value());
+        dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value()));
       }
     }
   }
 }
 
-template <int SrcMode_, int DstMode_, typename MatrixType, int DstOrder>
+template <int SrcMode_, int DstMode_, bool NonHermitian, typename MatrixType, int DstOrder>
 void permute_symm_to_symm(const MatrixType& mat,
                           SparseMatrix<typename MatrixType::Scalar, DstOrder, typename MatrixType::StorageIndex>& _dest,
                           const typename MatrixType::StorageIndex* perm) {
@@ -534,7 +534,7 @@
 
       if (!StorageOrderMatch) std::swap(ip, jp);
       if (((int(DstMode) == int(Lower) && ip < jp) || (int(DstMode) == int(Upper) && ip > jp)))
-        dest.valuePtr()[k] = numext::conj(it.value());
+        dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value()));
       else
         dest.valuePtr()[k] = it.value();
     }
@@ -595,14 +595,14 @@
                   const internal::assign_op<Scalar, typename MatrixType::Scalar>&) {
     // internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
     SparseMatrix<Scalar, (Options & RowMajor) == RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
-    internal::permute_symm_to_fullsymm<Mode>(src.matrix(), tmp, src.perm().indices().data());
+    internal::permute_symm_to_fullsymm<Mode, false>(src.matrix(), tmp, src.perm().indices().data());
     dst = tmp;
   }
 
   template <typename DestType, unsigned int DestMode>
   static void run(SparseSelfAdjointView<DestType, DestMode>& dst, const SrcXprType& src,
                   const internal::assign_op<Scalar, typename MatrixType::Scalar>&) {
-    internal::permute_symm_to_symm<Mode, DestMode>(src.matrix(), dst.matrix(), src.perm().indices().data());
+    internal::permute_symm_to_symm<Mode, DestMode, false>(src.matrix(), dst.matrix(), src.perm().indices().data());
   }
 };
 

diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 0733718..fac162e 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h

@@ -304,6 +304,24 @@
     return *this;
   }
 
+  inline SparseVector(SparseVector&& other) : SparseVector() { this->swap(other); }
+
+  template <typename OtherDerived>
+  inline SparseVector(SparseCompressedBase<OtherDerived>&& other) : SparseVector() {
+    *this = other.derived().markAsRValue();
+  }
+
+  inline SparseVector& operator=(SparseVector&& other) {
+    this->swap(other);
+    return *this;
+  }
+
+  template <typename OtherDerived>
+  inline SparseVector& operator=(SparseCompressedBase<OtherDerived>&& other) {
+    *this = other.derived().markAsRValue();
+    return *this;
+  }
+
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   template <typename Lhs, typename Rhs>
   inline SparseVector& operator=(const SparseSparseProduct<Lhs, Rhs>& product) {

diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 3e3352f..acb0c5f 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h

@@ -481,9 +481,7 @@
       tdot *= m_hcoeffs(curIdx);
 
       // Then update tval = tval - q * tau
-      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense
-      // ?= sparse")
-      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) tval(itq.row()) -= itq.value() * tdot;
+      tval -= tdot * m_Q.col(curIdx);
 
       // Detect fill-in for the current column of Q
       if (m_etree(Ridx(i)) == nonzeroCol) {

diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.inc b/Eigen/src/plugins/CommonCwiseUnaryOps.inc
index f20f2f8..64f3648 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.inc
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.inc

@@ -118,7 +118,7 @@
   return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
 }
 
-/// \returns an expression of a custom coefficient-wise unary operator \a func of *this
+/// \returns a const expression of a custom coefficient-wise unary operator \a func of *this
 ///
 /// The template parameter \a CustomUnaryOp is the type of the functor
 /// of the custom unary operator.
@@ -137,6 +137,21 @@
   return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
 }
 
+/// \returns a non-const expression of a custom coefficient-wise unary view \a func of *this
+///
+/// The template parameter \a CustomUnaryOp is the type of the functor
+/// of the custom unary operator.
+///
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr, unary function)
+///
+/// \sa unaryExpr, binaryExpr class CwiseUnaryOp
+///
+template <typename CustomViewOp>
+EIGEN_DEVICE_FUNC inline CwiseUnaryView<CustomViewOp, Derived> unaryViewExpr(
+    const CustomViewOp& func = CustomViewOp()) {
+  return CwiseUnaryView<CustomViewOp, Derived>(derived(), func);
+}
+
 /// \returns a non const expression of the real part of \c *this.
 ///
 EIGEN_DOC_UNARY_ADDONS(real, real part function)

diff --git a/Eigen/src/plugins/IndexedViewMethods.inc b/Eigen/src/plugins/IndexedViewMethods.inc
index 26e7b5f..c3df429 100644
--- a/Eigen/src/plugins/IndexedViewMethods.inc
+++ b/Eigen/src/plugins/IndexedViewMethods.inc

@@ -9,51 +9,47 @@
 
 #if !defined(EIGEN_PARSED_BY_DOXYGEN)
 
-protected:
+public:
 // define some aliases to ease readability
 
 template <typename Indices>
-using IvcRowType = typename internal::IndexedViewCompatibleType<Indices, RowsAtCompileTime>::type;
+using IvcRowType = typename internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::type;
 
 template <typename Indices>
-using IvcColType = typename internal::IndexedViewCompatibleType<Indices, ColsAtCompileTime>::type;
+using IvcColType = typename internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::type;
 
 template <typename Indices>
-using IvcType = typename internal::IndexedViewCompatibleType<Indices, SizeAtCompileTime>::type;
-
-typedef typename internal::IndexedViewCompatibleType<Index, 1>::type IvcIndex;
+using IvcSizeType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type;
 
 template <typename Indices>
 inline IvcRowType<Indices> ivcRow(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(
-      indices, internal::variable_if_dynamic<Index, RowsAtCompileTime>(derived().rows()), Specialized);
+  return internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::CreateIndexSequence(indices,
+                                                                                                    derived().rows());
 }
 
 template <typename Indices>
 inline IvcColType<Indices> ivcCol(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(
-      indices, internal::variable_if_dynamic<Index, ColsAtCompileTime>(derived().cols()), Specialized);
+  return internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::CreateIndexSequence(indices,
+                                                                                                    derived().cols());
 }
 
 template <typename Indices>
-inline IvcType<Indices> ivcSize(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(
-      indices, internal::variable_if_dynamic<Index, SizeAtCompileTime>(derived().size()), Specialized);
+inline IvcSizeType<Indices> ivcSize(const Indices& indices) const {
+  return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices,
+                                                                                                    derived().size());
+  ;
 }
 
 // this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true
-template <typename RowIndices, typename ColIndices,
-          bool UseSymbolic =
-              internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar,
-          bool UseBlock =
-              internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock,
-          bool UseGeneric = internal::traits<
-              IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>
+template <typename RowIndices, typename ColIndices, typename EnableIf = void>
 struct IndexedViewSelector;
 
 // Generic
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices, false, false, true> {
+struct IndexedViewSelector<
+    RowIndices, ColIndices,
+    std::enable_if_t<
+        internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>> {
   using ReturnType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
   using ConstReturnType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
 
@@ -68,60 +64,73 @@
 
 // Block
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices, false, true, false> {
-  using IndexedViewType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
-  using ConstIndexedViewType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
+struct IndexedViewSelector<RowIndices, ColIndices,
+                           std::enable_if_t<internal::traits<
+                               IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock>> {
+  using ActualRowIndices = IvcRowType<RowIndices>;
+  using ActualColIndices = IvcColType<ColIndices>;
+  using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>;
+  using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>;
   using ReturnType = typename internal::traits<IndexedViewType>::BlockType;
   using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType;
+  using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
+  using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
 
   static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
-    IvcRowType<RowIndices> actualRowIndices = derived.ivcRow(rowIndices);
-    IvcColType<ColIndices> actualColIndices = derived.ivcCol(colIndices);
-    return ReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices),
-                      internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
+    auto actualRowIndices = derived.ivcRow(rowIndices);
+    auto actualColIndices = derived.ivcCol(colIndices);
+    return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
+                      RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
   }
   static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
                                     const ColIndices& colIndices) {
-    IvcRowType<RowIndices> actualRowIndices = derived.ivcRow(rowIndices);
-    IvcColType<ColIndices> actualColIndices = derived.ivcCol(colIndices);
-    return ConstReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices),
-                           internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
+    auto actualRowIndices = derived.ivcRow(rowIndices);
+    auto actualColIndices = derived.ivcCol(colIndices);
+    return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
+                           RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
   }
 };
 
-// Symbolic
+// Scalar
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices, true, false, false> {
+struct IndexedViewSelector<RowIndices, ColIndices,
+                           std::enable_if_t<internal::traits<
+                               IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar>> {
   using ReturnType = typename DenseBase<Derived>::Scalar&;
   using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
-
+  using ActualRowIndices = IvcRowType<RowIndices>;
+  using ActualColIndices = IvcColType<ColIndices>;
+  using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
+  using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
   static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
-    return derived(internal::eval_expr_given_size(rowIndices, derived.rows()),
-                   internal::eval_expr_given_size(colIndices, derived.cols()));
+    auto actualRowIndices = derived.ivcRow(rowIndices);
+    auto actualColIndices = derived.ivcCol(colIndices);
+    return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
   }
   static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
                                     const ColIndices& colIndices) {
-    return derived(internal::eval_expr_given_size(rowIndices, derived.rows()),
-                   internal::eval_expr_given_size(colIndices, derived.cols()));
+    auto actualRowIndices = derived.ivcRow(rowIndices);
+    auto actualColIndices = derived.ivcCol(colIndices);
+    return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
   }
 };
 
 // this helper class assumes internal::is_valid_index_type<Indices>::value == false
-template <typename Indices, bool UseSymbolic = symbolic::is_symbolic<Indices>::value,
-          bool UseBlock = !UseSymbolic && internal::get_compile_time_incr<IvcType<Indices>>::value == 1,
-          bool UseGeneric = !UseSymbolic && !UseBlock>
+template <typename Indices, typename EnableIf = void>
 struct VectorIndexedViewSelector;
 
 // Generic
 template <typename Indices>
-struct VectorIndexedViewSelector<Indices, false, false, true> {
+struct VectorIndexedViewSelector<
+    Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
+                              internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime != 1>> {
   static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor;
+  using ZeroIndex = internal::SingleRange<Index(0)>;
+  using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcSizeType<Indices>>;
+  using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcSizeType<Indices>>;
 
-  using RowMajorReturnType = IndexedView<Derived, IvcIndex, IvcType<Indices>>;
-  using ConstRowMajorReturnType = IndexedView<const Derived, IvcIndex, IvcType<Indices>>;
-
-  using ColMajorReturnType = IndexedView<Derived, IvcType<Indices>, IvcIndex>;
-  using ConstColMajorReturnType = IndexedView<const Derived, IvcType<Indices>, IvcIndex>;
+  using ColMajorReturnType = IndexedView<Derived, IvcSizeType<Indices>, ZeroIndex>;
+  using ConstColMajorReturnType = IndexedView<const Derived, IvcSizeType<Indices>, ZeroIndex>;
 
   using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type;
   using ConstReturnType =
@@ -129,49 +138,53 @@
 
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
   static inline RowMajorReturnType run(Derived& derived, const Indices& indices) {
-    return RowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices));
+    return RowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
   static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) {
-    return ConstRowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices));
+    return ConstRowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
   static inline ColMajorReturnType run(Derived& derived, const Indices& indices) {
-    return ColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0));
+    return ColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
   static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) {
-    return ConstColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0));
+    return ConstColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
   }
 };
 
 // Block
 template <typename Indices>
-struct VectorIndexedViewSelector<Indices, false, true, false> {
-  using ReturnType = VectorBlock<Derived, internal::array_size<Indices>::value>;
-  using ConstReturnType = VectorBlock<const Derived, internal::array_size<Indices>::value>;
-
+struct VectorIndexedViewSelector<
+    Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
+                              internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime == 1>> {
+  using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
+  using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>;
+  using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>;
   static inline ReturnType run(Derived& derived, const Indices& indices) {
-    IvcType<Indices> actualIndices = derived.ivcSize(indices);
-    return ReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices));
+    auto actualIndices = derived.ivcSize(indices);
+    return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
   }
   static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
-    IvcType<Indices> actualIndices = derived.ivcSize(indices);
-    return ConstReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices));
+    auto actualIndices = derived.ivcSize(indices);
+    return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
   }
 };
 
 // Symbolic
 template <typename Indices>
-struct VectorIndexedViewSelector<Indices, true, false, false> {
+struct VectorIndexedViewSelector<Indices, std::enable_if_t<internal::is_single_range<IvcSizeType<Indices>>::value>> {
   using ReturnType = typename DenseBase<Derived>::Scalar&;
   using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
-
-  static inline ReturnType run(Derived& derived, const Indices& id) {
-    return derived(internal::eval_expr_given_size(id, derived.size()));
+  using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
+  static inline ReturnType run(Derived& derived, const Indices& indices) {
+    auto actualIndices = derived.ivcSize(indices);
+    return derived(Helper::first(actualIndices));
   }
-  static inline ConstReturnType run(const Derived& derived, const Indices& id) {
-    return derived(internal::eval_expr_given_size(id, derived.size()));
+  static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
+    auto actualIndices = derived.ivcSize(indices);
+    return derived(Helper::first(actualIndices));
   }
 };
 

diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.inc b/Eigen/src/plugins/MatrixCwiseUnaryOps.inc
index b23f4a5..325b0fb 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.inc
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.inc

@@ -17,6 +17,7 @@
 typedef CwiseUnaryOp<internal::scalar_carg_op<Scalar>, const Derived> CwiseCArgReturnType;
 typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
 typedef CwiseUnaryOp<internal::scalar_cbrt_op<Scalar>, const Derived> CwiseCbrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> CwiseSquareReturnType;
 typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
 typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
 
@@ -66,7 +67,15 @@
 ///
 /// \sa cwiseSqrt(), cwiseSquare(), cwisePow()
 ///
-EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseSCbrtReturnType(derived()); }
+EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseCbrtReturnType(derived()); }
+
+/// \returns an expression of the coefficient-wise square of *this.
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseSquare, square)
+///
+/// \sa cwisePow(), cwiseSqrt(), cwiseCbrt()
+///
+EIGEN_DEVICE_FUNC inline const CwiseSquareReturnType cwiseSquare() const { return CwiseSquareReturnType(derived()); }
 
 /// \returns an expression of the coefficient-wise signum of *this.
 ///

diff --git a/blas/BandTriangularSolver.h b/blas/BandTriangularSolver.h
index 014af24..f9bfdc1 100644
--- a/blas/BandTriangularSolver.h
+++ b/blas/BandTriangularSolver.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_BAND_TRIANGULARSOLVER_H
 #define EIGEN_BAND_TRIANGULARSOLVER_H
 
+namespace Eigen {
 namespace internal {
 
 /* \internal
@@ -77,6 +78,7 @@
   }
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_BAND_TRIANGULARSOLVER_H

diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h
index dd363e5..e6c3cab 100644
--- a/blas/GeneralRank1Update.h
+++ b/blas/GeneralRank1Update.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_GENERAL_RANK1UPDATE_H
 #define EIGEN_GENERAL_RANK1UPDATE_H
 
+namespace Eigen {
 namespace internal {
 
 /* Optimized matrix += alpha * uv' */
@@ -35,6 +36,7 @@
   }
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_GENERAL_RANK1UPDATE_H

diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h
index 655da51..5109960 100644
--- a/blas/PackedSelfadjointProduct.h
+++ b/blas/PackedSelfadjointProduct.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H
 #define EIGEN_SELFADJOINT_PACKED_PRODUCT_H
 
+namespace Eigen {
 namespace internal {
 
 /* Optimized matrix += alpha * uv'
@@ -45,6 +46,7 @@
   }
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_SELFADJOINT_PACKED_PRODUCT_H

diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h
index bb830cb..4e8e085 100644
--- a/blas/PackedTriangularMatrixVector.h
+++ b/blas/PackedTriangularMatrixVector.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H
 #define EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H
 
+namespace Eigen {
 namespace internal {
 
 template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,
@@ -75,6 +76,7 @@
   };
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H

diff --git a/blas/PackedTriangularSolverVector.h b/blas/PackedTriangularSolverVector.h
index 6a1a8c1..92964fb 100644
--- a/blas/PackedTriangularSolverVector.h
+++ b/blas/PackedTriangularSolverVector.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H
 #define EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H
 
+namespace Eigen {
 namespace internal {
 
 template <typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
@@ -69,6 +70,7 @@
   }
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H

diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h
index e5046cf..9cb96ee 100644
--- a/blas/Rank2Update.h
+++ b/blas/Rank2Update.h

@@ -10,6 +10,7 @@
 #ifndef EIGEN_RANK2UPDATE_H
 #define EIGEN_RANK2UPDATE_H
 
+namespace Eigen {
 namespace internal {
 
 /* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
@@ -49,6 +50,7 @@
   }
 };
 
-}  // end namespace internal
+}  // namespace internal
+}  // namespace Eigen
 
 #endif  // EIGEN_RANK2UPDATE_H

diff --git a/blas/common.h b/blas/common.h
index 2456273..9e46b6e 100644
--- a/blas/common.h
+++ b/blas/common.h

@@ -29,6 +29,13 @@
 
 #include "blas.h"
 
+#include "BandTriangularSolver.h"
+#include "GeneralRank1Update.h"
+#include "PackedSelfadjointProduct.h"
+#include "PackedTriangularMatrixVector.h"
+#include "PackedTriangularSolverVector.h"
+#include "Rank2Update.h"
+
 #define NOTR 0
 #define TR 1
 #define ADJ 2
@@ -59,58 +66,58 @@
 
 inline bool check_uplo(const char* uplo) { return UPLO(*uplo) != 0xff; }
 
-namespace Eigen {
-#include "BandTriangularSolver.h"
-#include "GeneralRank1Update.h"
-#include "PackedSelfadjointProduct.h"
-#include "PackedTriangularMatrixVector.h"
-#include "PackedTriangularSolverVector.h"
-#include "Rank2Update.h"
-}  // namespace Eigen
-
-using namespace Eigen;
-
 typedef SCALAR Scalar;
-typedef NumTraits<Scalar>::Real RealScalar;
+typedef Eigen::NumTraits<Scalar>::Real RealScalar;
 typedef std::complex<RealScalar> Complex;
 
 enum { IsComplex = Eigen::NumTraits<SCALAR>::IsComplex, Conj = IsComplex };
 
-typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> PlainMatrixType;
-typedef Map<Matrix<Scalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > MatrixType;
-typedef Map<const Matrix<Scalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > ConstMatrixType;
-typedef Map<Matrix<Scalar, Dynamic, 1>, 0, InnerStride<Dynamic> > StridedVectorType;
-typedef Map<Matrix<Scalar, Dynamic, 1> > CompactVectorType;
+typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> PlainMatrixType;
+typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >
+    MatrixType;
+typedef Eigen::Map<const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0,
+                   Eigen::OuterStride<> >
+    ConstMatrixType;
+typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > StridedVectorType;
+typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1> > CompactVectorType;
 
 template <typename T>
-Map<Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > matrix(T* data, int rows, int cols, int stride) {
-  return Map<Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride));
+Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> > matrix(
+    T* data, int rows, int cols, int stride) {
+  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >(
+      data, rows, cols, Eigen::OuterStride<>(stride));
 }
 
 template <typename T>
-Map<const Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > matrix(const T* data, int rows, int cols,
-                                                                           int stride) {
-  return Map<const Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride));
+Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> > matrix(
+    const T* data, int rows, int cols, int stride) {
+  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >(
+      data, rows, cols, Eigen::OuterStride<>(stride));
 }
 
 template <typename T>
-Map<Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> > make_vector(T* data, int size, int incr) {
-  return Map<Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr));
+Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > make_vector(T* data, int size,
+                                                                                                    int incr) {
+  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> >(
+      data, size, Eigen::InnerStride<Eigen::Dynamic>(incr));
 }
 
 template <typename T>
-Map<const Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> > make_vector(const T* data, int size, int incr) {
-  return Map<const Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr));
+Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > make_vector(const T* data,
+                                                                                                          int size,
+                                                                                                          int incr) {
+  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> >(
+      data, size, Eigen::InnerStride<Eigen::Dynamic>(incr));
 }
 
 template <typename T>
-Map<Matrix<T, Dynamic, 1> > make_vector(T* data, int size) {
-  return Map<Matrix<T, Dynamic, 1> >(data, size);
+Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> > make_vector(T* data, int size) {
+  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >(data, size);
 }
 
 template <typename T>
-Map<const Matrix<T, Dynamic, 1> > make_vector(const T* data, int size) {
-  return Map<const Matrix<T, Dynamic, 1> >(data, size);
+Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> > make_vector(const T* data, int size) {
+  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >(data, size);
 }
 
 template <typename T>

diff --git a/blas/double.cpp b/blas/double.cpp
index 4298665..28a2563 100644
--- a/blas/double.cpp
+++ b/blas/double.cpp

@@ -19,7 +19,7 @@
 #include "level2_real_impl.h"
 #include "level3_impl.h"
 
-double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
+extern "C" double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
   if (*n <= 0) return 0;
 
   if (*incx == 1 && *incy == 1)

diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h
index be88b92..3181a50 100644
--- a/blas/level1_cplx_impl.h
+++ b/blas/level1_cplx_impl.h

@@ -11,7 +11,7 @@
 
 struct scalar_norm1_op {
   typedef RealScalar result_type;
-  inline RealScalar operator()(const Scalar &a) const { return numext::norm1(a); }
+  inline RealScalar operator()(const Scalar &a) const { return Eigen::numext::norm1(a); }
 };
 namespace Eigen {
 namespace internal {
@@ -40,7 +40,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  DenseIndex ret;
+  Eigen::DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).unaryExpr<scalar_norm1_op>().maxCoeff(&ret);
   else
@@ -52,7 +52,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  DenseIndex ret;
+  Eigen::DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).unaryExpr<scalar_norm1_op>().minCoeff(&ret);
   else
@@ -132,16 +132,16 @@
   StridedVectorType vx(make_vector(x, *n, std::abs(*incx)));
   StridedVectorType vy(make_vector(y, *n, std::abs(*incy)));
 
-  Reverse<StridedVectorType> rvx(vx);
-  Reverse<StridedVectorType> rvy(vy);
+  Eigen::Reverse<StridedVectorType> rvx(vx);
+  Eigen::Reverse<StridedVectorType> rvy(vy);
 
   // TODO implement mixed real-scalar rotations
   if (*incx < 0 && *incy > 0)
-    internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(rvx, vy, Eigen::JacobiRotation<Scalar>(c, s));
   else if (*incx > 0 && *incy < 0)
-    internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(vx, rvy, Eigen::JacobiRotation<Scalar>(c, s));
   else
-    internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation<Scalar>(c, s));
 }
 
 EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) {

diff --git a/blas/level1_impl.h b/blas/level1_impl.h
index 2422d10..a65af92 100644
--- a/blas/level1_impl.h
+++ b/blas/level1_impl.h

@@ -88,10 +88,10 @@
     a = b;
   } else {
     scale = abs(a) + abs(b);
-    norm = scale * sqrt((numext::abs2(a / scale)) + (numext::abs2(b / scale)));
+    norm = scale * sqrt((Eigen::numext::abs2(a / scale)) + (Eigen::numext::abs2(b / scale)));
     alpha = a / abs(a);
     *c = abs(a) / norm;
-    *s = alpha * numext::conj(b) / norm;
+    *s = alpha * Eigen::numext::conj(b) / norm;
     a = alpha * norm;
   }
 #endif

diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h
index cd9c189..202f432 100644
--- a/blas/level1_real_impl.h
+++ b/blas/level1_real_impl.h

@@ -28,7 +28,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  DenseIndex ret;
+  Eigen::DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).cwiseAbs().maxCoeff(&ret);
   else
@@ -40,7 +40,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  DenseIndex ret;
+  Eigen::DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).cwiseAbs().minCoeff(&ret);
   else
@@ -97,15 +97,15 @@
   StridedVectorType vx(make_vector(x, *n, std::abs(*incx)));
   StridedVectorType vy(make_vector(y, *n, std::abs(*incy)));
 
-  Reverse<StridedVectorType> rvx(vx);
-  Reverse<StridedVectorType> rvy(vy);
+  Eigen::Reverse<StridedVectorType> rvx(vx);
+  Eigen::Reverse<StridedVectorType> rvy(vy);
 
   if (*incx < 0 && *incy > 0)
-    internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(rvx, vy, Eigen::JacobiRotation<Scalar>(c, s));
   else if (*incx > 0 && *incy < 0)
-    internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(vx, rvy, Eigen::JacobiRotation<Scalar>(c, s));
   else
-    internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
+    Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation<Scalar>(c, s));
 }
 
 /*

diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
index f04dda1..d1ce492 100644
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h

@@ -22,9 +22,11 @@
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
+      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, Eigen::ColMajor, Eigen::Upper, false,
+                                                          false>::run),
       // array index: LO
-      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
+      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, Eigen::ColMajor, Eigen::Lower, false,
+                                                          false>::run),
   };
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -107,9 +109,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, RealScalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
+      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, Conj>::run),
       // array index: LO
-      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
+      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, Conj>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -149,9 +151,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::packed_rank2_update_selector<Scalar, int, Upper>::run),
+      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Upper>::run),
       // array index: LO
-      (internal::packed_rank2_update_selector<Scalar, int, Lower>::run),
+      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -195,9 +197,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
   static const functype func[2] = {
       // array index: UP
-      (selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
+      (Eigen::selfadjoint_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, Conj>::run),
       // array index: LO
-      (selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
+      (Eigen::selfadjoint_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, Conj>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -242,9 +244,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::rank2_update_selector<Scalar, int, Upper>::run),
+      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Upper>::run),
       // array index: LO
-      (internal::rank2_update_selector<Scalar, int, Lower>::run),
+      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -313,7 +315,8 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  internal::general_rank1_update<Scalar, int, ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
+  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy,
+                                                                                         alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
@@ -351,7 +354,8 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  internal::general_rank1_update<Scalar, int, ColMajor, false, Conj>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
+  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, Conj>::run(*m, *n, a, *lda, x_cpy, y_cpy,
+                                                                                        alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;

diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index 5721ee6..ca9f48f 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h

@@ -13,12 +13,13 @@
 struct general_matrix_vector_product_wrapper {
   static void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsIncr,
                   Scalar *res, Index resIncr, Scalar alpha) {
-    typedef internal::const_blas_data_mapper<Scalar, Index, StorageOrder> LhsMapper;
-    typedef internal::const_blas_data_mapper<Scalar, Index, RowMajor> RhsMapper;
+    typedef Eigen::internal::const_blas_data_mapper<Scalar, Index, StorageOrder> LhsMapper;
+    typedef Eigen::internal::const_blas_data_mapper<Scalar, Index, Eigen::RowMajor> RhsMapper;
 
-    internal::general_matrix_vector_product<Index, Scalar, LhsMapper, StorageOrder, ConjugateLhs, Scalar, RhsMapper,
-                                            ConjugateRhs>::run(rows, cols, LhsMapper(lhs, lhsStride),
-                                                               RhsMapper(rhs, rhsIncr), res, resIncr, alpha);
+    Eigen::internal::general_matrix_vector_product<Index, Scalar, LhsMapper, StorageOrder, ConjugateLhs, Scalar,
+                                                   RhsMapper, ConjugateRhs>::run(rows, cols, LhsMapper(lhs, lhsStride),
+                                                                                 RhsMapper(rhs, rhsIncr), res, resIncr,
+                                                                                 alpha);
   }
 };
 
@@ -26,12 +27,13 @@
 (const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda,
  const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
-  static const functype func[4] = {// array index: NOTR
-                                   (general_matrix_vector_product_wrapper<int, Scalar, ColMajor, false, false>::run),
-                                   // array index: TR
-                                   (general_matrix_vector_product_wrapper<int, Scalar, RowMajor, false, false>::run),
-                                   // array index: ADJ
-                                   (general_matrix_vector_product_wrapper<int, Scalar, RowMajor, Conj, false>::run), 0};
+  static const functype func[4] = {
+      // array index: NOTR
+      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::ColMajor, false, false>::run),
+      // array index: TR
+      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::RowMajor, false, false>::run),
+      // array index: ADJ
+      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::RowMajor, Conj, false>::run), 0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *b = reinterpret_cast<const Scalar *>(pb);
@@ -84,31 +86,43 @@
 (const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
  RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, const Scalar *, int, Scalar *);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false, ColMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false, RowMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run), 0,
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run),
+      0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false, ColMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false, RowMajor>::run),
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run), 0};
+      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run),
+      0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   Scalar *b = reinterpret_cast<Scalar *>(pb);
@@ -140,32 +154,46 @@
 (const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
  RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar &);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run), 0,
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
+      0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run), 0,
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
+      0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                         ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                         RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
+                                                         RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                         ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                         RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false, RowMajor>::run),
+      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
+                                                         RowMajor>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -189,7 +217,7 @@
   if (*n == 0) return;
 
   Scalar *actual_b = get_compact_vector(b, *n, *incb);
-  Matrix<Scalar, Dynamic, 1> res(*n);
+  Eigen::Matrix<Scalar, Eigen::Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
@@ -345,34 +373,40 @@
 EIGEN_BLAS_FUNC(tbsv)
 (char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) {
   typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, ColMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, Conj, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, ColMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, Conj, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
+      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
       0,
   };
 
@@ -420,40 +454,52 @@
  */
 EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, const Scalar *, Scalar *, Scalar);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false,
+                                                                ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false,
+                                                                RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false,
+                                                                RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false,
+                                                                ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false,
+                                                                RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false,
+                                                                RowMajor>::run),
       0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                         ColMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                                ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                         RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                                RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
-                                                         RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
+                                                                RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                         ColMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                                ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                         RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                                RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
-                                                         RowMajor>::run),
+      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
+                                                                RowMajor>::run),
       0};
 
   Scalar *ap = reinterpret_cast<Scalar *>(pap);
@@ -475,7 +521,7 @@
   if (*n == 0) return;
 
   Scalar *actual_x = get_compact_vector(x, *n, *incx);
-  Matrix<Scalar, Dynamic, 1> res(*n);
+  Eigen::Matrix<Scalar, Eigen::Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
@@ -499,36 +545,50 @@
  */
 EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, Scalar *);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false,
+                                                       ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false,
+                                                       RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run),
+      0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false,
+                                                       ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false,
+                                                       RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run),
+      0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                ColMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                       ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                       RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj,
+                                                       RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                ColMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                       ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                       RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run),
+      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj,
+                                                       RowMajor>::run),
       0};
 
   Scalar *ap = reinterpret_cast<Scalar *>(pap);

diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
index 5653767..415944c 100644
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h

@@ -14,11 +14,14 @@
 (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px,
  const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) {
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::Upper;
   static const functype func[2] = {
       // array index: UP
-      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
+      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
       // array index: LO
-      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
+      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
   };
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -67,11 +70,14 @@
 (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc,
  const int *ldc) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
+  using Eigen::ColMajor;
+  using Eigen::Lower;
+  using Eigen::Upper;
   static const functype func[2] = {
       // array index: UP
-      (selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
+      (Eigen::selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
       // array index: LO
-      (selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
+      (Eigen::selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
   };
 
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
@@ -109,9 +115,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::rank2_update_selector<Scalar, int, Upper>::run),
+      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Upper>::run),
       // array index: LO
-      (internal::rank2_update_selector<Scalar, int, Lower>::run),
+      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Lower>::run),
   };
 
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
@@ -190,9 +196,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Upper, false, false>::run),
+      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, false>::run),
       // array index: LO
-      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Lower, false, false>::run),
+      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, false>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -232,9 +238,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (internal::packed_rank2_update_selector<Scalar, int, Upper>::run),
+      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Upper>::run),
       // array index: LO
-      (internal::packed_rank2_update_selector<Scalar, int, Lower>::run),
+      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -299,7 +305,8 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  internal::general_rank1_update<Scalar, int, ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
+  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy,
+                                                                                         alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;

diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index a6ddf26..66a7d46 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h

@@ -15,39 +15,43 @@
  const int *ldc) {
   //   std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " <<
   //   *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::RowMajor;
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
                            Scalar *, DenseIndex, DenseIndex, Scalar, Eigen::internal::level3_blocking<Scalar, Scalar> &,
                            Eigen::internal::GemmParallelInfo<DenseIndex> *);
   static const functype func[12] = {
       // array index: NOTR  | (NOTR << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, ColMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, ColMajor, false,
+                                                      ColMajor, 1>::run),
       // array index: TR    | (NOTR << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, false,
+                                                      ColMajor, 1>::run),
       // array index: ADJ   | (NOTR << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
+                                                      ColMajor, 1>::run),
       0,
       // array index: NOTR  | (TR   << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, false,
+                                                      ColMajor, 1>::run),
       // array index: TR    | (TR   << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, false,
+                                                      ColMajor, 1>::run),
       // array index: ADJ   | (TR   << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, false, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, false,
+                                                      ColMajor, 1>::run),
       0,
       // array index: NOTR  | (ADJ  << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
+                                                      ColMajor, 1>::run),
       // array index: TR    | (ADJ  << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, Conj, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, Conj,
+                                                      ColMajor, 1>::run),
       // array index: ADJ   | (ADJ  << 2)
-      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, Conj, ColMajor,
-                                               1>::run),
+      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, Conj,
+                                                      ColMajor, 1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -86,7 +90,8 @@
 
   if (*k == 0) return;
 
-  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1, true);
+  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1,
+                                                                                                     true);
 
   int code = OP(*opa) | (OP(*opb) << 2);
   func[code](*m, *n, *k, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking, 0);
@@ -97,76 +102,97 @@
  const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " "
   //   << *palpha << " " << *lda << " " << *ldb<< "\n";
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::Lower;
+  using Eigen::OnTheLeft;
+  using Eigen::OnTheRight;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex,
                            Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[32] = {
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor,
+                                                1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, RowMajor, ColMajor,
+                                                1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, Conj, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, Conj, RowMajor, ColMajor,
+                                                1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, ColMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, ColMajor, ColMajor,
+                                                1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, RowMajor, ColMajor,
+                                                1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, Conj, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, Conj, RowMajor, ColMajor,
+                                                1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, ColMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, ColMajor, ColMajor,
+                                                1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, RowMajor, ColMajor,
+                                                1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, Conj, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, Conj, RowMajor, ColMajor,
+                                                1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, ColMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, ColMajor, ColMajor,
+                                                1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, RowMajor, ColMajor,
+                                                1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, Conj, RowMajor, ColMajor, 1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, Conj, RowMajor, ColMajor,
+                                                1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, ColMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, ColMajor,
+                                                ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, RowMajor,
+                                                ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, Conj, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, Conj, RowMajor,
+                                                ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, ColMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, ColMajor,
+                                                ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, RowMajor,
+                                                ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, Conj, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, Conj, RowMajor,
+                                                ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, ColMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, ColMajor,
+                                                ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, RowMajor,
+                                                ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, Conj, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, Conj, RowMajor,
+                                                ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, ColMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, ColMajor,
+                                                ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, RowMajor,
+                                                ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, Conj, RowMajor, ColMajor,
-                                         1>::run),
+      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, Conj, RowMajor,
+                                                ColMajor, 1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -197,12 +223,12 @@
   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
 
   if (SIDE(*side) == LEFT) {
-    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
-                                                                                                   false);
+    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
+                                                                                                          false);
     func[code](*m, *n, a, *lda, b, 1, *ldb, blocking);
   } else {
-    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
-                                                                                                   false);
+    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
+                                                                                                          false);
     func[code](*n, *m, a, *lda, b, 1, *ldb, blocking);
   }
 
@@ -216,89 +242,96 @@
  const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " "
   //   << *lda << " " << *ldb << " " << *palpha << "\n";
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::Lower;
+  using Eigen::RowMajor;
+  using Eigen::UnitDiag;
+  using Eigen::Upper;
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
                            Scalar *, DenseIndex, DenseIndex, const Scalar &,
-                           internal::level3_blocking<Scalar, Scalar> &);
+                           Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[32] = {
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, ColMajor, false, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, ColMajor, false, ColMajor,
+                                                         false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, false, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, false, ColMajor,
+                                                         false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, Conj, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, Conj, ColMajor,
+                                                         false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, RowMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
+                                                         RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, RowMajor, Conj,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
+                                                         RowMajor, Conj, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, ColMajor, false, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, ColMajor, false, ColMajor,
+                                                         false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, false, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, false, ColMajor,
+                                                         false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, Conj, ColMajor, false,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, Conj, ColMajor,
+                                                         false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, RowMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
+                                                         RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, RowMajor, Conj,
-                                                  ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
+                                                         RowMajor, Conj, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, ColMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, Conj, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, Conj,
+                                                         ColMajor, false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                  ColMajor, false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                  RowMajor, false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                         RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                  RowMajor, Conj, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                         RowMajor, Conj, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, ColMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, false, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, Conj, ColMajor,
-                                                  false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, Conj,
+                                                         ColMajor, false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                  ColMajor, false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                         ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                  RowMajor, false, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                         RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                  RowMajor, Conj, ColMajor, 1>::run),
+      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                         RowMajor, Conj, ColMajor, 1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -329,16 +362,16 @@
   if (*m == 0 || *n == 0) return;
 
   // FIXME find a way to avoid this copy
-  Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb);
+  Eigen::Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb);
   matrix(b, *m, *n, *ldb).setZero();
 
   if (SIDE(*side) == LEFT) {
-    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
-                                                                                                   false);
+    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
+                                                                                                          false);
     func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, 1, *ldb, alpha, blocking);
   } else {
-    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
-                                                                                                   false);
+    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
+                                                                                                          false);
     func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking);
   }
 }
@@ -383,9 +416,15 @@
   if (*m == 0 || *n == 0) return;
 
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::Lower;
+  using Eigen::RowMajor;
+  using Eigen::Upper;
 #if ISCOMPLEX
   // FIXME add support for symmetric complex matrix
-  Matrix<Scalar, Dynamic, Dynamic, ColMajor> matA(size, size);
+  Eigen::Matrix<Scalar, Dynamic, Dynamic, ColMajor> matA(size, size);
   if (UPLO(*uplo) == UP) {
     matA.triangularView<Upper>() = matrix(a, size, size, *lda);
     matA.triangularView<Lower>() = matrix(a, size, size, *lda).transpose();
@@ -398,24 +437,29 @@
   else if (SIDE(*side) == RIGHT)
     matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
 #else
-  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false);
+  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1,
+                                                                                                     false);
 
   if (SIDE(*side) == LEFT)
     if (UPLO(*uplo) == UP)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, false, ColMajor, false, false, ColMajor,
-                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, false, ColMajor, false, false,
+                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
+                                                                    blocking);
     else if (UPLO(*uplo) == LO)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
-                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false,
+                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
+                                                                    blocking);
     else
       return;
   else if (SIDE(*side) == RIGHT)
     if (UPLO(*uplo) == UP)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false, ColMajor,
-                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false,
+                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
+                                                                    blocking);
     else if (UPLO(*uplo) == LO)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
-                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false,
+                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
+                                                                    blocking);
     else
       return;
   else
@@ -430,29 +474,35 @@
  const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::Lower;
+  using Eigen::RowMajor;
+  using Eigen::Upper;
 #if !ISCOMPLEX
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *,
-                           DenseIndex, DenseIndex, const Scalar &, internal::level3_blocking<Scalar, Scalar> &);
+                           DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[8] = {
       // array index: NOTR  | (UP << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
-                                                          ColMajor, 1, Upper>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
+                                                                 Conj, ColMajor, 1, Upper>::run),
       // array index: TR    | (UP << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, Conj,
-                                                          ColMajor, 1, Upper>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor,
+                                                                 Conj, ColMajor, 1, Upper>::run),
       // array index: ADJ   | (UP << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
-                                                          ColMajor, 1, Upper>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
+                                                                 false, ColMajor, 1, Upper>::run),
       0,
       // array index: NOTR  | (LO << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
-                                                          ColMajor, 1, Lower>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
+                                                                 Conj, ColMajor, 1, Lower>::run),
       // array index: TR    | (LO << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, Conj,
-                                                          ColMajor, 1, Lower>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor,
+                                                                 Conj, ColMajor, 1, Lower>::run),
       // array index: ADJ   | (LO << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
-                                                          ColMajor, 1, Lower>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
+                                                                 false, ColMajor, 1, Lower>::run),
       0};
 #endif
 
@@ -508,7 +558,8 @@
           alpha * matrix(a, *k, *n, *lda).transpose() * matrix(a, *k, *n, *lda);
   }
 #else
-  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1, false);
+  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1,
+                                                                                                     false);
 
   int code = OP(*op) | (UPLO(*uplo) << 2);
   func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
@@ -546,6 +597,8 @@
     info = 12;
   if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info);
 
+  using Eigen::Lower;
+  using Eigen::Upper;
   if (beta != Scalar(1)) {
     if (UPLO(*uplo) == UP)
       if (beta == Scalar(0))
@@ -621,16 +674,25 @@
 
   if (*m == 0 || *n == 0) return;
 
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::RowMajor;
+  using Eigen::Upper;
+
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
-  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false);
+  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1,
+                                                                                                     false);
 
   if (SIDE(*side) == LEFT) {
     if (UPLO(*uplo) == UP)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, Conj, ColMajor, false, false, ColMajor,
-                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, Conj, ColMajor, false, false,
+                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
+                                                                    blocking);
     else if (UPLO(*uplo) == LO)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
-                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false,
+                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
+                                                                    blocking);
     else
       return;
   } else if (SIDE(*side) == RIGHT) {
@@ -642,8 +704,9 @@
 RowMajor,true,Conj,  ColMajor, 1>
 ::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);*/
     else if (UPLO(*uplo) == LO)
-      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
-                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
+      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false,
+                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
+                                                                    blocking);
     else
       return;
   } else {
@@ -658,25 +721,32 @@
  const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
-
+  using Eigen::ColMajor;
+  using Eigen::DenseIndex;
+  using Eigen::Dynamic;
+  using Eigen::Lower;
+  using Eigen::RowMajor;
+  using Eigen::StrictlyLower;
+  using Eigen::StrictlyUpper;
+  using Eigen::Upper;
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *,
                            DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[8] = {
       // array index: NOTR  | (UP << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
-                                                          ColMajor, 1, Upper>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
+                                                                 Conj, ColMajor, 1, Upper>::run),
       0,
       // array index: ADJ   | (UP << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
-                                                          ColMajor, 1, Upper>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
+                                                                 false, ColMajor, 1, Upper>::run),
       0,
       // array index: NOTR  | (LO << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
-                                                          ColMajor, 1, Lower>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
+                                                                 Conj, ColMajor, 1, Lower>::run),
       0,
       // array index: ADJ   | (LO << 2)
-      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
-                                                          ColMajor, 1, Lower>::run),
+      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
+                                                                 false, ColMajor, 1, Lower>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -722,7 +792,8 @@
   }
 
   if (*k > 0 && alpha != RealScalar(0)) {
-    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1, false);
+    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1,
+                                                                                                       false);
     func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
   }
@@ -759,6 +830,10 @@
     info = 12;
   if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info);
 
+  using Eigen::Lower;
+  using Eigen::StrictlyLower;
+  using Eigen::StrictlyUpper;
+  using Eigen::Upper;
   if (beta != RealScalar(1)) {
     if (UPLO(*uplo) == UP)
       if (beta == Scalar(0))
@@ -783,20 +858,20 @@
     if (UPLO(*uplo) == UP) {
       matrix(c, *n, *n, *ldc).triangularView<Upper>() +=
           alpha * matrix(a, *n, *k, *lda) * matrix(b, *n, *k, *ldb).adjoint() +
-          numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
+          Eigen::numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
     } else if (UPLO(*uplo) == LO)
       matrix(c, *n, *n, *ldc).triangularView<Lower>() +=
           alpha * matrix(a, *n, *k, *lda) * matrix(b, *n, *k, *ldb).adjoint() +
-          numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
+          Eigen::numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
   } else if (OP(*op) == ADJ) {
     if (UPLO(*uplo) == UP)
       matrix(c, *n, *n, *ldc).triangularView<Upper>() +=
           alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) +
-          numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
+          Eigen::numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
     else if (UPLO(*uplo) == LO)
       matrix(c, *n, *n, *ldc).triangularView<Lower>() +=
           alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) +
-          numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
+          Eigen::numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
   }
 }
 

diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index da89857..22cf346 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis

@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>

-

-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

-

-  <!-- Fixed x Fixed Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

-      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : $T3</Size>

-          <ValuePointer>m_storage.m_data.array</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : $T3</Size>

-          <ValuePointer>m_storage.m_data.array</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- 2 x 2 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>

-      <DisplayString>[2, 2] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>

-  

-  <!-- 3 x 3 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>

-      <DisplayString>[3, 3] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>

-  

-  <!-- 4 x 4 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>

-      <DisplayString>[4, 4] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 3]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 3]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>  

-  

-  <!-- Dynamic x Dynamic Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Fixed x Dynamic Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic x Fixed Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic Column Vector -->

-  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>

-      <Expand>

-        <Item Name="[size]">m_storage.m_cols</Item>

-        <ArrayItems>

-          <Size>m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic Row Vector -->

-  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>

-      <Expand>

-        <Item Name="[size]">m_storage.m_rows</Item>

-        <ArrayItems>

-          <Size>m_storage.m_rows</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Fixed Vector -->

-  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>

-      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-      </Expand>

-  </Type>

-  

-  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>

-      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-      </Expand>

-  </Type>

-  

-  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>

-      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-        <Item Name="[z]">m_storage.m_data.array[2]</Item>

-      </Expand>

-  </Type>

-  

-    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>

-      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-        <Item Name="[z]">m_storage.m_data.array[2]</Item>

-        <Item Name="[w]">m_storage.m_data.array[3]</Item>

-      </Expand>

-  </Type>

-

-</AutoVisualizer>

+<?xml version="1.0" encoding="utf-8"?>
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+  <!-- Fixed x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- 2 x 2 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 3 x 3 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 4 x 4 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>  
+  
+  <!-- Dynamic x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Column Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_cols</Item>
+        <ArrayItems>
+          <Size>m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Row Vector -->
+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_rows</Item>
+        <ArrayItems>
+          <Size>m_storage.m_rows</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+      </Expand>
+  </Type>
+  
+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+        <Item Name="[w]">m_storage.m_data.array[3]</Item>
+      </Expand>
+  </Type>
+
+</AutoVisualizer>

diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 35ef580..273c10d 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat

@@ -1,295 +1,295 @@
-; ***************************************************************

-; * Eigen Visualizer

-; *

-; * Author: Hauke Heibel <hauke.heibel@gmail.com>

-; *

-; * Support the enhanced debugging of the following Eigen

-; * types (*: any, +:fixed dimension) :

-; *

-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>

-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>

-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>

-; * - Eigen::Matrix<*,-1,-1,*,*,*>

-; * - Eigen::Matrix<*,+,-1,*,*,*>

-; * - Eigen::Matrix<*,-1,+,*,*,*>

-; * - Eigen::Matrix<*,+,+,*,*,*>

-; *

-; * Matrices are displayed properly independently of the memory

-; * alignment (RowMajor vs. ColMajor).

-; *

-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure

-; * that your original autoexp.dat file is copied to a safe 

-; * place before proceeding with its modification.

-; ***************************************************************

-

-[Visualizer]

-

-; Fixed size 4-vectors

-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1],

-         z : ($c.m_storage.m_data.array)[2],

-         w : ($c.m_storage.m_data.array)[3]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        4,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 4),

-        ")"

-      )

-   )

-}

-

-; Fixed size 3-vectors

-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1],

-         z : ($c.m_storage.m_data.array)[2]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        3,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 3),

-        ")"

-      )

-   )

-}

-

-; Fixed size 2-vectors

-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        2,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 2),

-        ")"

-      )

-   )

-}

-

-; Fixed size 1-vectors

-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        1,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 1),

-        ")"

-      )

-   )

-}

-

-; Dynamic matrices (ColMajor and RowMajor support)

-Eigen::Matrix<*,-1,-1,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.m_storage.m_rows,

-         cols: $c.m_storage.m_cols,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.m_storage.m_rows,

-         ",",

-           $c.m_storage.m_cols,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.m_storage.m_rows*$c.m_storage.m_cols

-           ),

-         ")"

-      )

-   )

-}

-

-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,*,-1,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.RowsAtCompileTime,

-         cols: $c.m_storage.m_cols,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.RowsAtCompileTime,

-         ",",

-           $c.m_storage.m_cols,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols

-           ),

-         ")"

-      )

-   )

-}

-

-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,-1,*,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.m_storage.m_rows,

-         cols: $c.ColsAtCompileTime,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.m_storage.m_rows,

-         ",",

-           $c.ColsAtCompileTime,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime

-           ),

-         ")"

-      )

-   )

-}

-

-; Fixed size matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,*,*,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.RowsAtCompileTime,

-         cols: $c.ColsAtCompileTime,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data.array)[$i],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.RowsAtCompileTime,

-         ",",

-           $c.ColsAtCompileTime,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data.array)[$i],g],

-            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime

-           ),

-         ")"

-      )

-   )

-}

+; ***************************************************************
+; * Eigen Visualizer
+; *
+; * Author: Hauke Heibel <hauke.heibel@gmail.com>
+; *
+; * Support the enhanced debugging of the following Eigen
+; * types (*: any, +:fixed dimension) :
+; *
+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
+; * - Eigen::Matrix<*,-1,-1,*,*,*>
+; * - Eigen::Matrix<*,+,-1,*,*,*>
+; * - Eigen::Matrix<*,-1,+,*,*,*>
+; * - Eigen::Matrix<*,+,+,*,*,*>
+; *
+; * Matrices are displayed properly independently of the memory
+; * alignment (RowMajor vs. ColMajor).
+; *
+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
+; * that your original autoexp.dat file is copied to a safe 
+; * place before proceeding with its modification.
+; ***************************************************************
+
+[Visualizer]
+
+; Fixed size 4-vectors
+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1],
+         z : ($c.m_storage.m_data.array)[2],
+         w : ($c.m_storage.m_data.array)[3]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        4,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 4),
+        ")"
+      )
+   )
+}
+
+; Fixed size 3-vectors
+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1],
+         z : ($c.m_storage.m_data.array)[2]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        3,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 3),
+        ")"
+      )
+   )
+}
+
+; Fixed size 2-vectors
+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        2,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 2),
+        ")"
+      )
+   )
+}
+
+; Fixed size 1-vectors
+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        1,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 1),
+        ")"
+      )
+   )
+}
+
+; Dynamic matrices (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,-1,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.m_storage.m_rows,
+         cols: $c.m_storage.m_cols,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.m_storage.m_rows,
+         ",",
+           $c.m_storage.m_cols,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.m_storage.m_rows*$c.m_storage.m_cols
+           ),
+         ")"
+      )
+   )
+}
+
+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,-1,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.RowsAtCompileTime,
+         cols: $c.m_storage.m_cols,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.RowsAtCompileTime,
+         ",",
+           $c.m_storage.m_cols,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols
+           ),
+         ")"
+      )
+   )
+}
+
+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,*,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.m_storage.m_rows,
+         cols: $c.ColsAtCompileTime,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.m_storage.m_rows,
+         ",",
+           $c.ColsAtCompileTime,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime
+           ),
+         ")"
+      )
+   )
+}
+
+; Fixed size matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,*,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.RowsAtCompileTime,
+         cols: $c.ColsAtCompileTime,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data.array)[$i],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.RowsAtCompileTime,
+         ",",
+           $c.ColsAtCompileTime,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data.array)[$i],g],
+            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime
+           ),
+         ")"
+      )
+   )
+}

diff --git a/doc/TutorialSlicingIndexing.dox b/doc/TutorialSlicingIndexing.dox
index 7f89554..6ebaa2d 100644
--- a/doc/TutorialSlicingIndexing.dox
+++ b/doc/TutorialSlicingIndexing.dox

@@ -86,12 +86,12 @@
   <td></td>
 </tr>
 <tr>
-  <td>First \c n odd rows A</td>
+  <td>First \c n odd rows of A</td>
   <td>\code A(seqN(1,n,2), all) \endcode</td>
   <td></td>
 </tr>
 <tr>
-  <td>The last past one column</td>
+  <td>The second-last column</td>
   <td>\code A(all, last-1) \endcode</td>
   <td>\code A.col(A.cols()-2) \endcode</td>
 </tr>
@@ -158,7 +158,7 @@
 \endcode
 
 We can revisit the <i>even columns of A</i> example as follows:
-\code A(all, seq(0,last,fix<2>))
+\code A(all, seq(fix<0>,last,fix<2>))
 \endcode
 
 

diff --git a/failtest/cwiseunaryview_on_const_type_actually_const.cpp b/failtest/cwiseunaryview_on_const_type_actually_const.cpp
index 7ecf542..fd3c1d6 100644
--- a/failtest/cwiseunaryview_on_const_type_actually_const.cpp
+++ b/failtest/cwiseunaryview_on_const_type_actually_const.cpp

@@ -10,7 +10,7 @@
 
 void foo() {
   MatrixXf m;
-  CwiseUnaryView<internal::scalar_real_ref_op<double>, CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f;
+  CwiseUnaryView<internal::scalar_real_ref_op<float>, CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f;
 }
 
 int main() {}

diff --git a/lapack/cholesky.inc b/lapack/cholesky.inc
index dea5bf6..a93a511 100644
--- a/lapack/cholesky.inc
+++ b/lapack/cholesky.inc

@@ -28,9 +28,9 @@
   MatrixType A(a, *n, *n, *lda);
   int ret;
   if (UPLO(*uplo) == UP)
-    ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A));
+    ret = int(Eigen::internal::llt_inplace<Scalar, Eigen::Upper>::blocked(A));
   else
-    ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A));
+    ret = int(Eigen::internal::llt_inplace<Scalar, Eigen::Lower>::blocked(A));
 
   if (ret >= 0) *info = ret + 1;
 }
@@ -61,10 +61,10 @@
   MatrixType B(b, *n, *nrhs, *ldb);
 
   if (UPLO(*uplo) == UP) {
-    A.triangularView<Upper>().adjoint().solveInPlace(B);
-    A.triangularView<Upper>().solveInPlace(B);
+    A.triangularView<Eigen::Upper>().adjoint().solveInPlace(B);
+    A.triangularView<Eigen::Upper>().solveInPlace(B);
   } else {
-    A.triangularView<Lower>().solveInPlace(B);
-    A.triangularView<Lower>().adjoint().solveInPlace(B);
+    A.triangularView<Eigen::Lower>().solveInPlace(B);
+    A.triangularView<Eigen::Lower>().adjoint().solveInPlace(B);
   }
 }

diff --git a/lapack/eigenvalues.inc b/lapack/eigenvalues.inc
index 6f168de..211a7ff 100644
--- a/lapack/eigenvalues.inc
+++ b/lapack/eigenvalues.inc

@@ -47,9 +47,10 @@
     mat = matrix(a, *n, *n, *lda);
 
   bool computeVectors = *jobz == 'V' || *jobz == 'v';
-  SelfAdjointEigenSolver<PlainMatrixType> eig(mat, computeVectors ? ComputeEigenvectors : EigenvaluesOnly);
+  Eigen::SelfAdjointEigenSolver<PlainMatrixType> eig(
+      mat, computeVectors ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly);
 
-  if (eig.info() == NoConvergence) {
+  if (eig.info() == Eigen::NoConvergence) {
     make_vector(w, *n).setZero();
     if (computeVectors) matrix(a, *n, *n, *lda).setIdentity();
     //*info = 1;

diff --git a/lapack/lu.inc b/lapack/lu.inc
index d30c8ce..2ddaf95 100644
--- a/lapack/lu.inc
+++ b/lapack/lu.inc

@@ -62,6 +62,8 @@
   MatrixType lu(a, *n, *n, *lda);
   MatrixType B(b, *n, *nrhs, *ldb);
 
+  using Eigen::UnitLower;
+  using Eigen::Upper;
   for (int i = 0; i < *n; ++i) ipiv[i]--;
   if (OP(*trans) == NOTR) {
     B = PivotsType(ipiv, *n) * B;

diff --git a/lapack/svd.inc b/lapack/svd.inc
index 8e45310..262c5c6 100644
--- a/lapack/svd.inc
+++ b/lapack/svd.inc

@@ -56,12 +56,12 @@
   PlainMatrixType mat(*m, *n);
   mat = matrix(a, *m, *n, *lda);
 
-  int option = *jobz == 'A'   ? ComputeFullU | ComputeFullV
-               : *jobz == 'S' ? ComputeThinU | ComputeThinV
-               : *jobz == 'O' ? ComputeThinU | ComputeThinV
+  int option = *jobz == 'A'   ? Eigen::ComputeFullU | Eigen::ComputeFullV
+               : *jobz == 'S' ? Eigen::ComputeThinU | Eigen::ComputeThinV
+               : *jobz == 'O' ? Eigen::ComputeThinU | Eigen::ComputeThinV
                               : 0;
 
-  BDCSVD<PlainMatrixType> svd(mat, option);
+  Eigen::BDCSVD<PlainMatrixType> svd(mat, option);
 
   make_vector(s, diag_size) = svd.singularValues().head(diag_size);
 
@@ -119,14 +119,14 @@
   PlainMatrixType mat(*m, *n);
   mat = matrix(a, *m, *n, *lda);
 
-  int option = (*jobu == 'A'                   ? ComputeFullU
-                : *jobu == 'S' || *jobu == 'O' ? ComputeThinU
+  int option = (*jobu == 'A'                   ? Eigen::ComputeFullU
+                : *jobu == 'S' || *jobu == 'O' ? Eigen::ComputeThinU
                                                : 0) |
-               (*jobv == 'A'                   ? ComputeFullV
-                : *jobv == 'S' || *jobv == 'O' ? ComputeThinV
+               (*jobv == 'A'                   ? Eigen::ComputeFullV
+                : *jobv == 'S' || *jobv == 'O' ? Eigen::ComputeThinV
                                                : 0);
 
-  JacobiSVD<PlainMatrixType> svd(mat, option);
+  Eigen::JacobiSVD<PlainMatrixType> svd(mat, option);
 
   make_vector(s, diag_size) = svd.singularValues().head(diag_size);
   {

diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h
index 637fdbf..00a20c7 100644
--- a/test/AnnoyingScalar.h
+++ b/test/AnnoyingScalar.h

@@ -184,19 +184,6 @@
   return *x.v;
 }
 
-template <>
-struct random_impl<AnnoyingScalar> {
-  using Impl = random_impl<float>;
-  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run(const AnnoyingScalar& x, const AnnoyingScalar& y) {
-    float result = Impl::run(*x.v, *y.v);
-    return AnnoyingScalar(result);
-  }
-  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run() {
-    float result = Impl::run();
-    return AnnoyingScalar(result);
-  }
-};
-
 }  // namespace internal
 }  // namespace Eigen
 

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4c7c3a4..4692584 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt

@@ -218,9 +218,10 @@
 ei_add_test(smallvectors)
 ei_add_test(mapped_matrix)
 ei_add_test(mapstride)
-ei_add_test(unaryviewstride)
+ei_add_test(unaryview)
 ei_add_test(mapstaticmethods)
 ei_add_test(array_cwise)
+ei_add_test(matrix_cwise)
 ei_add_test(array_for_matrix)
 ei_add_test(array_replicate)
 ei_add_test(array_reverse)

diff --git a/test/MovableScalar.h b/test/MovableScalar.h
index 56a873e..c8bf546 100644
--- a/test/MovableScalar.h
+++ b/test/MovableScalar.h

@@ -26,24 +26,10 @@
   operator Scalar() const { return this->size() > 0 ? this->back() : Scalar(); }
 };
 
-template <>
-struct NumTraits<MovableScalar<float>> : GenericNumTraits<float> {};
-
-namespace internal {
-template <typename T>
-struct random_impl<MovableScalar<T>> {
-  using MoveableT = MovableScalar<T>;
-  using Impl = random_impl<T>;
-  static EIGEN_DEVICE_FUNC inline MoveableT run(const MoveableT& x, const MoveableT& y) {
-    T result = Impl::run(x, y);
-    return MoveableT(result);
-  }
-  static EIGEN_DEVICE_FUNC inline MoveableT run() {
-    T result = Impl::run();
-    return MoveableT(result);
-  }
+template <typename Scalar>
+struct NumTraits<MovableScalar<Scalar>> : GenericNumTraits<Scalar> {
+  enum { RequireInitialization = 1 };
 };
-}  // namespace internal
 
 }  // namespace Eigen
 

diff --git a/test/SafeScalar.h b/test/SafeScalar.h
index 4f4da56..33a54c5 100644
--- a/test/SafeScalar.h
+++ b/test/SafeScalar.h

@@ -4,43 +4,30 @@
 class SafeScalar {
  public:
   SafeScalar() : initialized_(false) {}
-  SafeScalar(const SafeScalar& other) { *this = other; }
-  SafeScalar& operator=(const SafeScalar& other) {
-    val_ = T(other);
-    initialized_ = true;
-    return *this;
-  }
 
-  SafeScalar(T val) : val_(val), initialized_(true) {}
-  SafeScalar& operator=(T val) {
-    val_ = val;
-    initialized_ = true;
-  }
+  SafeScalar(const T& val) : val_(val), initialized_(true) {}
+
+  template <typename Source>
+  explicit SafeScalar(const Source& val) : SafeScalar(T(val)) {}
 
   operator T() const {
     VERIFY(initialized_ && "Uninitialized access.");
     return val_;
   }
 
+  template <typename Target>
+  explicit operator Target() const {
+    return Target(this->operator T());
+  }
+
  private:
   T val_;
   bool initialized_;
 };
 
 namespace Eigen {
-namespace internal {
 template <typename T>
-struct random_impl<SafeScalar<T>> {
-  using SafeT = SafeScalar<T>;
-  using Impl = random_impl<T>;
-  static EIGEN_DEVICE_FUNC inline SafeT run(const SafeT& x, const SafeT& y) {
-    T result = Impl::run(x, y);
-    return SafeT(result);
-  }
-  static EIGEN_DEVICE_FUNC inline SafeT run() {
-    T result = Impl::run();
-    return SafeT(result);
-  }
+struct NumTraits<SafeScalar<T>> : GenericNumTraits<T> {
+  enum { RequireInitialization = 1 };
 };
-}  // namespace internal
-}  // namespace Eigen
+}  // namespace Eigen
\ No newline at end of file

diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp
index f335b34..da49c08 100644
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp

@@ -51,6 +51,8 @@
   kill_extra_precision(p0);
   kill_extra_precision(p1);
 
+  VERIFY(numext::equal_strict(b0.volume(), Scalar(0)));
+
   b0.extend(p0);
   b0.extend(p1);
   VERIFY(b0.contains(p0 * s1 + (Scalar(1) - s1) * p1));
@@ -423,6 +425,8 @@
 
   BoxType b0(dim);
 
+  VERIFY(numext::equal_strict(b0.volume(), Scalar(0)));
+
   b0.extend(p0);
   b0.extend(p1);
 

diff --git a/test/incomplete_cholesky.cpp b/test/incomplete_cholesky.cpp
index 2b03ba1..fccc207 100644
--- a/test/incomplete_cholesky.cpp
+++ b/test/incomplete_cholesky.cpp

@@ -54,10 +54,28 @@
   }
 }
 
+void test_non_spd() {
+  Eigen::SparseMatrix<double> A(2, 2);
+  A.insert(0, 0) = 0;
+  A.insert(1, 1) = 3;
+
+  Eigen::IncompleteCholesky<double> solver(A);
+
+  // Recover original matrix.
+  Eigen::MatrixXd M = solver.permutationP().transpose() *
+                      (solver.scalingS().asDiagonal().inverse() *
+                       (solver.matrixL() * solver.matrixL().transpose() -
+                        solver.shift() * Eigen::MatrixXd::Identity(A.rows(), A.cols())) *
+                       solver.scalingS().asDiagonal().inverse()) *
+                      solver.permutationP();
+  VERIFY_IS_APPROX(A.toDense(), M);
+}
+
 EIGEN_DECLARE_TEST(incomplete_cholesky) {
   CALL_SUBTEST_1((test_incomplete_cholesky_T<double, int>()));
   CALL_SUBTEST_2((test_incomplete_cholesky_T<std::complex<double>, int>()));
   CALL_SUBTEST_3((test_incomplete_cholesky_T<double, long int>()));
 
-  CALL_SUBTEST_1((bug1150<0>()));
+  CALL_SUBTEST_4((bug1150<0>()));
+  CALL_SUBTEST_4(test_non_spd());
 }

diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index 4040448..f165e8b 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp

@@ -498,12 +498,352 @@
     // A(1, seq(0,2,1)).cwiseAbs().colwise().replicate(2).eval();
     STATIC_CHECK(((internal::evaluator<decltype(A(1, seq(0, 2, 1)))>::Flags & RowMajorBit) == RowMajorBit));
   }
+
+  // Direct access.
+  {
+    int rows = 3;
+    int row_start = internal::random<int>(0, rows - 1);
+    int row_inc = internal::random<int>(1, rows - row_start);
+    int row_size = internal::random<int>(1, (rows - row_start) / row_inc);
+    auto row_seq = seqN(row_start, row_size, row_inc);
+
+    int cols = 3;
+    int col_start = internal::random<int>(0, cols - 1);
+    int col_inc = internal::random<int>(1, cols - col_start);
+    int col_size = internal::random<int>(1, (cols - col_start) / col_inc);
+    auto col_seq = seqN(col_start, col_size, col_inc);
+
+    MatrixXd m1 = MatrixXd::Random(rows, cols);
+    MatrixXd m2 = MatrixXd::Random(cols, rows);
+    VERIFY_IS_APPROX(m1(row_seq, indexing::all) * m2, m1(row_seq, indexing::all).eval() * m2);
+    VERIFY_IS_APPROX(m1 * m2(indexing::all, col_seq), m1 * m2(indexing::all, col_seq).eval());
+    VERIFY_IS_APPROX(m1(row_seq, col_seq) * m2(col_seq, row_seq),
+                     m1(row_seq, col_seq).eval() * m2(col_seq, row_seq).eval());
+
+    VectorXd v1 = VectorXd::Random(cols);
+    VERIFY_IS_APPROX(m1(row_seq, col_seq) * v1(col_seq), m1(row_seq, col_seq).eval() * v1(col_seq).eval());
+    VERIFY_IS_APPROX(v1(col_seq).transpose() * m2(col_seq, row_seq),
+                     v1(col_seq).transpose().eval() * m2(col_seq, row_seq).eval());
+  }
+}
+
+void check_tutorial_examples() {
+  constexpr int kRows = 11;
+  constexpr int kCols = 21;
+  Matrix<double, kRows, kCols> A = Matrix<double, kRows, kCols>::Random();
+  Vector<double, kRows> v = Vector<double, kRows>::Random();
+
+  {
+    auto slice = A(seqN(fix<0>, fix<5>, fix<2>), seqN(fix<2>, fix<7>, fix<1>));
+    EIGEN_UNUSED_VARIABLE(slice);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), 5);
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), 7);
+  }
+  {
+    auto slice = A(seqN(fix<0>, fix<5>, fix<2>), indexing::all);
+    EIGEN_UNUSED_VARIABLE(slice);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), 5);
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), kCols);
+  }
+
+  // Examples from slicing tutorial.
+  // Bottom-left corner.
+  {
+    Index i = 3;
+    Index n = 5;
+    auto slice = A(seq(i, indexing::last), seqN(0, n));
+    auto block = A.bottomLeftCorner(A.rows() - i, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), Dynamic);
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), Dynamic);
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto i = fix<3>;
+    auto n = fix<5>;
+    auto slice = A(seq(i, indexing::last), seqN(fix<0>, n));
+    auto block = A.bottomLeftCorner(fix<kRows> - i, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), A.RowsAtCompileTime - i);
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), n);
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Block starting at i,j of size m,n.
+  {
+    Index i = 4;
+    Index j = 2;
+    Index m = 3;
+    Index n = 5;
+    auto slice = A(seqN(i, m), seqN(j, n));
+    auto block = A.block(i, j, m, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto i = fix<4>;
+    auto j = fix<2>;
+    auto m = fix<3>;
+    auto n = fix<5>;
+    auto slice = A(seqN(i, m), seqN(j, n));
+    auto block = A.block(i, j, m, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Block starting at i0,j0 and ending at i1,j1.
+  {
+    Index i0 = 4;
+    Index i1 = 7;
+    Index j0 = 3;
+    Index j1 = 5;
+    auto slice = A(seq(i0, i1), seq(j0, j1));
+    auto block = A.block(i0, j0, i1 - i0 + 1, j1 - j0 + 1);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto i0 = fix<4>;
+    auto i1 = fix<7>;
+    auto j0 = fix<3>;
+    auto j1 = fix<5>;
+    auto slice = A(seq(i0, i1), seq(j0, j1));
+    auto block = A.block(i0, j0, i1 - i0 + fix<1>, j1 - j0 + fix<1>);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Even columns of A.
+  {
+    auto slice = A(all, seq(0, last, 2));
+    auto block =
+        Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<2 * kRows>>(A.data(), kRows, (kCols + 1) / 2);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto slice = A(all, seq(fix<0>, last, fix<2>));
+    auto block = Eigen::Map<Eigen::Matrix<double, kRows, (kCols + 1) / 2>, 0, OuterStride<2 * kRows>>(A.data());
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // First n odd rows of A.
+  {
+    Index n = 3;
+    auto slice = A(seqN(1, n, 2), all);
+    auto block = Eigen::Map<Eigen::Matrix<double, Dynamic, kCols>, 0, Stride<kRows, 2>>(A.data() + 1, n, kCols);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto n = fix<3>;
+    auto slice = A(seqN(fix<1>, n, fix<2>), all);
+    auto block = Eigen::Map<Eigen::Matrix<double, 3, kCols>, 0, Stride<kRows, 2>>(A.data() + 1);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // The second-last column.
+  {
+    auto slice = A(all, last - 1);
+    auto block = A.col(A.cols() - 2);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto slice = A(all, last - fix<1>);
+    auto block = A.col(fix<kCols> - fix<2>);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // The middle row.
+  {
+    auto slice = A(last / 2, all);
+    auto block = A.row((A.rows() - 1) / 2);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto slice = A(last / fix<2>, all);
+    auto block = A.row(fix<(kRows - 1) / 2>);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Last elements of v starting at i.
+  {
+    Index i = 7;
+    auto slice = v(seq(i, last));
+    auto block = v.tail(v.size() - i);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto i = fix<7>;
+    auto slice = v(seq(i, last));
+    auto block = v.tail(fix<kRows> - i);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Last n elements of v.
+  {
+    Index n = 6;
+    auto slice = v(seq(last + 1 - n, last));
+    auto block = v.tail(n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto n = fix<6>;
+    auto slice = v(seq(last + fix<1> - n, last));
+    auto block = v.tail(n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Last n elements of v.
+  {
+    Index n = 6;
+    auto slice = v(lastN(n));
+    auto block = v.tail(n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto n = fix<6>;
+    auto slice = v(lastN(n));
+    auto block = v.tail(n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Bottom-right corner of A of size m times n.
+  {
+    Index m = 3;
+    Index n = 6;
+    auto slice = A(lastN(m), lastN(n));
+    auto block = A.bottomRightCorner(m, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    auto m = fix<3>;
+    auto n = fix<6>;
+    auto slice = A(lastN(m), lastN(n));
+    auto block = A.bottomRightCorner(m, n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Last n columns with a stride of 3.
+  {
+    Index n = 4;
+    constexpr Index stride = 3;
+    auto slice = A(all, lastN(n, stride));
+    auto block = Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<stride * kRows>>(
+        A.data() + (kCols - 1 - (n - 1) * stride) * kRows, A.rows(), n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    constexpr auto n = fix<4>;
+    constexpr auto stride = fix<3>;
+    auto slice = A(all, lastN(n, stride));
+    auto block = Eigen::Map<Eigen::Matrix<double, kRows, n>, 0, OuterStride<stride * kRows>>(
+        A.data() + (kCols - 1 - (n - 1) * stride) * kRows, A.rows(), n);
+    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
+    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
+    VERIFY_IS_EQUAL(slice, block);
+  }
+
+  // Compile time size and increment.
+  {
+    auto slice1 = v(seq(last - fix<7>, last - fix<2>));
+    auto slice2 = v(seqN(last - 7, fix<6>));
+    VERIFY_IS_EQUAL(slice1, slice2);
+    VERIFY_IS_EQUAL(int(slice1.SizeAtCompileTime), 6);
+    VERIFY_IS_EQUAL(int(slice2.SizeAtCompileTime), 6);
+    auto slice3 = A(all, seq(fix<0>, last, fix<2>));
+    VERIFY_IS_EQUAL(int(slice3.RowsAtCompileTime), kRows);
+    VERIFY_IS_EQUAL(int(slice3.ColsAtCompileTime), (kCols + 1) / 2);
+  }
+
+  // Reverse order.
+  {
+    auto slice = A(all, seq(20, 10, fix<-2>));
+    auto block = Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<-2 * kRows>>(
+        A.data() + 20 * kRows, A.rows(), (20 - 10 + 2) / 2);
+    VERIFY_IS_EQUAL(slice, block);
+  }
+  {
+    Index n = 10;
+    auto slice1 = A(seqN(last, n, fix<-1>), all);
+    auto slice2 = A(lastN(n).reverse(), all);
+    VERIFY_IS_EQUAL(slice1, slice2);
+  }
+
+  // Array of indices.
+  {
+    std::vector<int> ind{4, 2, 5, 5, 3};
+    auto slice1 = A(all, ind);
+    for (int i = 0; i < ind.size(); ++i) {
+      VERIFY_IS_EQUAL(slice1.col(i), A.col(ind[i]));
+    }
+
+    auto slice2 = A(all, {4, 2, 5, 5, 3});
+    VERIFY_IS_EQUAL(slice1, slice2);
+
+    Eigen::ArrayXi indarray(5);
+    indarray << 4, 2, 5, 5, 3;
+    auto slice3 = A(all, indarray);
+    VERIFY_IS_EQUAL(slice1, slice3);
+  }
+
+  // Custom index list.
+  {
+    struct pad {
+      Index size() const { return out_size; }
+      Index operator[](Index i) const { return std::max<Index>(0, i - (out_size - in_size)); }
+      Index in_size, out_size;
+    };
+
+    auto slice = A(pad{3, 5}, pad{3, 5});
+    Eigen::MatrixXd B = slice;
+    VERIFY_IS_EQUAL(B.block(2, 2, 3, 3), A.block(0, 0, 3, 3));
+  }
 }
 
 EIGEN_DECLARE_TEST(indexed_view) {
-  //   for(int i = 0; i < g_repeat; i++) {
-  CALL_SUBTEST_1(check_indexed_view());
-  //   }
+  for (int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1(check_indexed_view());
+  }
+  CALL_SUBTEST_1(check_tutorial_examples());
 
   // static checks of some internals:
   STATIC_CHECK((internal::is_valid_index_type<int>::value));

diff --git a/test/matrix_cwise.cpp b/test/matrix_cwise.cpp
new file mode 100644
index 0000000..56cd2d6
--- /dev/null
+++ b/test/matrix_cwise.cpp

@@ -0,0 +1,302 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <vector>
+#include "main.h"
+
+template <typename MatrixType, typename NewScalar>
+struct matrix_of {
+  using type = MatrixType;
+};
+
+template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols, typename NewScalar>
+struct matrix_of<Eigen::Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, NewScalar> {
+  using type = Eigen::Matrix<NewScalar, Rows, Cols, Options, MaxRows, MaxCols>;
+};
+
+// Unary function reference.
+template <typename MatrixType, typename Func,
+          typename OutMatrixType = typename matrix_of<
+              MatrixType, typename Eigen::internal::result_of<Func(typename MatrixType::Scalar)>::type>::type>
+OutMatrixType cwise_ref(const MatrixType& m, Func f = Func()) {
+  OutMatrixType out(m.rows(), m.cols());
+  for (Eigen::Index r = 0; r < m.rows(); ++r) {
+    for (Eigen::Index c = 0; c < m.cols(); ++c) {
+      out(r, c) = f(m(r, c));
+    }
+  }
+  return out;
+}
+
+// Binary function reference.
+template <typename MatrixType, typename Func,
+          typename OutMatrixType = typename matrix_of<
+              MatrixType, typename Eigen::internal::result_of<Func(typename MatrixType::Scalar,
+                                                                   typename MatrixType::Scalar)>::type>::type>
+OutMatrixType cwise_ref(const MatrixType& m1, const MatrixType& m2, Func f = Func()) {
+  OutMatrixType out(m1.rows(), m1.cols());
+  for (Eigen::Index r = 0; r < m1.rows(); ++r) {
+    for (Eigen::Index c = 0; c < m1.cols(); ++c) {
+      out(r, c) = f(m1(r, c), m2(r, c));
+    }
+  }
+  return out;
+}
+
+template <typename MatrixType>
+void test_cwise_real(const MatrixType& m) {
+  using Scalar = typename MatrixType::Scalar;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  MatrixType m1 = MatrixType::Random(rows, cols);
+  MatrixType m2, m3, m4;
+
+  // Supported unary ops.
+  VERIFY_IS_CWISE_APPROX(m1.cwiseAbs(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::abs(x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseSign(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::sign(x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseCbrt(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::cbrt(x); }));
+  // For integers, avoid division by zero.
+  m2 = m1;
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    m2 = m1.unaryExpr([](const Scalar& x) { return Eigen::numext::equal_strict(x, Scalar(0)) ? Scalar(1) : x; });
+  }
+  VERIFY_IS_CWISE_APPROX(m2.cwiseInverse(), cwise_ref(m2, [](const Scalar& x) { return Scalar(Scalar(1) / x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseArg(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::arg(x); }));
+  // Only take sqrt of positive values.
+  m2 = m1.cwiseAbs();
+  VERIFY_IS_CWISE_APPROX(m2.cwiseSqrt(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::sqrt(x); }));
+  // Only find Square/Abs2 of +/- sqrt values so we don't overflow.
+  m2 = m2.cwiseSqrt().array() * m1.cwiseSign().array();
+  VERIFY_IS_CWISE_APPROX(m2.cwiseAbs2(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::abs2(x); }));
+  VERIFY_IS_CWISE_APPROX(m2.cwiseSquare(), cwise_ref(m2, [](const Scalar& x) { return Scalar(x * x); }));
+  VERIFY_IS_CWISE_APPROX(m2.cwisePow(Scalar(2)),
+                         cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::pow(x, Scalar(2)); }));
+
+  // Supported binary ops.
+  m1.setRandom(rows, cols);
+  m2.setRandom(rows, cols);
+  VERIFY_IS_CWISE_EQUAL(m1.cwiseMin(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateFast>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNaN>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNumbers>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.cwiseMax(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateFast>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNaN>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNumbers>(m2),
+                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  // Scalar comparison.
+  Scalar mean = Eigen::NumTraits<Scalar>::highest() / Scalar(2) + Eigen::NumTraits<Scalar>::lowest() / Scalar(2);
+  m4.setConstant(rows, cols, mean);
+  VERIFY_IS_CWISE_EQUAL(m1.cwiseMin(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateFast>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNaN>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNumbers>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.cwiseMax(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateFast>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNaN>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNumbers>(mean),
+                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
+  // For products, avoid integer overflow by limiting the input < sqrt(max).
+  m3 = m1;
+  m4 = m2;
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    const Scalar kMax = Eigen::numext::sqrt(Eigen::NumTraits<Scalar>::highest());
+    m3 = m1 - ((m1 / kMax) * kMax);
+    m4 = m2 - ((m2 / kMax) * kMax);
+  }
+  VERIFY_IS_CWISE_APPROX(m3.cwiseProduct(m4),
+                         cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return Scalar(x * y); }));
+  // For quotients involving integers, avoid division by zero.
+  m4 = m2;
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    m4 = m2.unaryExpr([](const Scalar& x) { return Eigen::numext::equal_strict(x, Scalar(0)) ? Scalar(1) : x; });
+  }
+  VERIFY_IS_CWISE_APPROX(m1.cwiseQuotient(m4),
+                         cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Scalar(x / y); }));
+  // For equality comparisons, limit range to increase number of equalities.
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    const Scalar kMax = Scalar(10);
+    m3 = m1 - ((m1 / kMax) * kMax);
+    m4 = m2 - ((m2 / kMax) * kMax);
+    mean = Eigen::NumTraits<Scalar>::IsSigned ? Scalar(0) : kMax / Scalar(2);
+  } else {
+    const Scalar kShift = Scalar(10);
+    m3 = (m1 * kShift).array().floor() / kShift;
+    m4 = (m2 * kShift).array().floor() / kShift;
+    mean = Scalar(0);
+  }
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseLess(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x < y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreater(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x > y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseLessOrEqual(m4),
+                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x <= y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreaterOrEqual(m4),
+                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x >= y; }));
+  // Typed-Equality.
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLess(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x < y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreater(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x > y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLessOrEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x <= y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreaterOrEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x >= y ? Scalar(1) : Scalar(0);
+                        }));
+  // Scalar.
+  m4.setConstant(rows, cols, mean);
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseLess(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x < y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreater(mean),
+                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x > y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseLessOrEqual(mean),
+                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x <= y; }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreaterOrEqual(mean),
+                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x >= y; }));
+  // Typed.
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLess(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x < y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreater(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x > y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLessOrEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x <= y ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreaterOrEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return x >= y ? Scalar(1) : Scalar(0);
+                        }));
+}
+
+template <typename MatrixType>
+void test_cwise_complex(const MatrixType& m) {
+  using Scalar = typename MatrixType::Scalar;
+  using RealScalar = typename NumTraits<Scalar>::Real;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  MatrixType m1 = MatrixType::Random(rows, cols);
+  MatrixType m2, m3, m4;
+
+  // Supported unary ops.
+  VERIFY_IS_CWISE_APPROX(m1.cwiseAbs(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::abs(x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseSqrt(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::sqrt(x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseInverse(), cwise_ref(m1, [](const Scalar& x) { return Scalar(Scalar(1) / x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseArg(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::arg(x); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseCArg(), cwise_ref(m1, [](const Scalar& x) { return Scalar(Eigen::numext::arg(x)); }));
+  // Only find Square/Abs2 of +/- sqrt values so we don't overflow.
+  m2 = m1.cwiseSqrt().array() * m1.cwiseSign().array();
+  VERIFY_IS_CWISE_APPROX(m2.cwiseAbs2(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::abs2(x); }));
+  VERIFY_IS_CWISE_APPROX(m2.cwiseSquare(), cwise_ref(m2, [](const Scalar& x) { return Scalar(x * x); }));
+  VERIFY_IS_CWISE_APPROX(m2.cwisePow(Scalar(2)),
+                         cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::pow(x, Scalar(2)); }));
+
+  // Supported binary ops.
+  m1.setRandom(rows, cols);
+  m2.setRandom(rows, cols);
+  VERIFY_IS_CWISE_APPROX(m1.cwiseProduct(m2),
+                         cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Scalar(x * y); }));
+  VERIFY_IS_CWISE_APPROX(m1.cwiseQuotient(m2),
+                         cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Scalar(x / y); }));
+  // For equality comparisons, limit range to increase number of equalities.
+  {
+    const RealScalar kShift = RealScalar(10);
+    m3 = m1;
+    m4 = m2;
+    m3.real() = (m1.real() * kShift).array().floor() / kShift;
+    m3.imag() = (m1.imag() * kShift).array().floor() / kShift;
+    m4.real() = (m2.real() * kShift).array().floor() / kShift;
+    m4.imag() = (m2.imag() * kShift).array().floor() / kShift;
+  }
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y);
+                        }));
+  // Typed-Equality.
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  // Scalar.
+  Scalar mean = Scalar(0);
+  m4.setConstant(rows, cols, mean);
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y);
+                        }));
+  // Typed.
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
+                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
+                        }));
+}
+
+EIGEN_DECLARE_TEST(matrix_cwise) {
+  for (int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<Eigen::half, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<Eigen::bfloat16, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_2(test_cwise_complex(Eigen::Matrix<std::complex<float>, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_2(test_cwise_complex(Eigen::Matrix<std::complex<double>, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int8_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int16_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int32_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int64_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint8_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint16_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint32_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint64_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
+  }
+}

diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index bf2970c..db8c9b5 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp

@@ -277,6 +277,7 @@
 
 template <typename Scalar, typename Packet>
 void packetmath_boolean_mask_ops() {
+  using RealScalar = typename NumTraits<Scalar>::Real;
   const int PacketSize = internal::unpacket_traits<Packet>::size;
   const int size = 2 * PacketSize;
   EIGEN_ALIGN_MAX Scalar data1[size];
@@ -289,7 +290,7 @@
   CHECK_CWISE1(internal::ptrue, internal::ptrue);
   CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
   for (int i = 0; i < PacketSize; ++i) {
-    data1[i] = Scalar(i);
+    data1[i] = Scalar(RealScalar(i));
     data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
   }
 
@@ -1332,6 +1333,138 @@
   VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
 }
 
+template <typename Scalar, typename Packet, bool HasExp = internal::packet_traits<Scalar>::HasExp>
+struct exp_complex_test_impl {
+  typedef typename Scalar::value_type RealScalar;
+
+  static Scalar pexp1(const Scalar& x) {
+    Packet px = internal::pset1<Packet>(x);
+    Packet py = internal::pexp(px);
+    return internal::pfirst(py);
+  }
+
+  static Scalar cis(const RealScalar& x) { return Scalar(numext::cos(x), numext::sin(x)); }
+
+  // Verify equality with signed zero.
+  static bool is_exactly_equal(RealScalar a, RealScalar b) {
+    // NaNs are always unsigned, and always compare not equal directly.
+    if ((numext::isnan)(a)) {
+      return (numext::isnan)(b);
+    }
+
+    RealScalar zero(0);
+#ifdef EIGEN_ARCH_ARM
+    // ARM automatically flushes denormals to zero.
+    // Preserve sign by multiplying by +0.
+    if (numext::abs(a) < (std::numeric_limits<RealScalar>::min)()) {
+      a = a * zero;
+    }
+    if (numext::abs(b) < (std::numeric_limits<RealScalar>::min)()) {
+      b = b * zero;
+    }
+#endif
+
+    // Signed zero.
+    if (a == zero) {
+      // Signs are either 0 or NaN, so verify that their comparisons to zero are equal.
+      return (a == b) && ((numext::signbit(a) == zero) == (numext::signbit(b) == zero));
+    }
+    // Allow _some_ tolerance.
+    return verifyIsApprox(a, b);
+  }
+
+  // Verify equality with signed zero.
+  static bool is_exactly_equal(const Scalar& a, const Scalar& b) {
+    bool result = is_exactly_equal(numext::real_ref(a), numext::real_ref(b)) &&
+                  is_exactly_equal(numext::imag_ref(a), numext::imag_ref(b));
+    if (!result) {
+      std::cout << a << " != " << b << std::endl;
+    }
+    return result;
+  }
+
+  static bool is_sign_exp_unspecified(const Scalar& z) {
+    const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
+    // If z is (-∞,±∞), the result is (±0,±0) (signs are unspecified)
+    if (numext::real_ref(z) == -inf && (numext::isinf)(numext::imag_ref(z))) {
+      return true;
+    }
+    // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
+    if (numext::real_ref(z) == +inf && (numext::isinf)(numext::imag_ref(z))) {
+      return true;
+    }
+    // If z is (-∞,NaN), the result is (±0,±0) (signs are unspecified)
+    if (numext::real_ref(z) == -inf && (numext::isnan)(numext::imag_ref(z))) {
+      return true;
+    }
+    // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
+    if (numext::real_ref(z) == +inf && (numext::isnan)(numext::imag_ref(z))) {
+      return true;
+    }
+    return false;
+  }
+
+  static void run(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
+    const int PacketSize = internal::unpacket_traits<Packet>::size;
+
+    for (int i = 0; i < size; ++i) {
+      data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
+    }
+    CHECK_CWISE1_N(std::exp, internal::pexp, size);
+
+    // Test all corner cases (and more).
+    const RealScalar edges[] = {RealScalar(0),
+                                RealScalar(1),
+                                RealScalar(2),
+                                RealScalar(EIGEN_PI / 2),
+                                RealScalar(EIGEN_PI),
+                                RealScalar(3 * EIGEN_PI / 2),
+                                RealScalar(2 * EIGEN_PI),
+                                numext::log(NumTraits<RealScalar>::highest()) - 1,
+                                NumTraits<RealScalar>::highest(),
+                                std::numeric_limits<RealScalar>::infinity(),
+                                std::numeric_limits<RealScalar>::quiet_NaN(),
+                                -RealScalar(0),
+                                -RealScalar(1),
+                                -RealScalar(2),
+                                -RealScalar(EIGEN_PI / 2),
+                                -RealScalar(EIGEN_PI),
+                                -RealScalar(3 * EIGEN_PI / 2),
+                                -RealScalar(2 * EIGEN_PI),
+                                -numext::log(NumTraits<RealScalar>::highest()) + 1,
+                                -NumTraits<RealScalar>::highest(),
+                                -std::numeric_limits<RealScalar>::infinity(),
+                                -std::numeric_limits<RealScalar>::quiet_NaN()};
+
+    for (RealScalar x : edges) {
+      for (RealScalar y : edges) {
+        Scalar z = Scalar(x, y);
+        Scalar w = pexp1(z);
+        if (is_sign_exp_unspecified(z)) {
+          Scalar abs_w = Scalar(numext::abs(numext::real_ref(w)), numext::abs(numext::imag_ref(w)));
+          Scalar expected = numext::exp(z);
+          Scalar abs_expected =
+              Scalar(numext::abs(numext::real_ref(expected)), numext::abs(numext::imag_ref(expected)));
+          VERIFY(is_exactly_equal(abs_w, abs_expected));
+        } else {
+          VERIFY(is_exactly_equal(w, numext::exp(z)));
+        }
+      }
+    }
+  }
+};
+
+template <typename Scalar, typename Packet>
+struct exp_complex_test_impl<Scalar, Packet, false> {
+  typedef typename Scalar::value_type RealScalar;
+  static void run(Scalar*, Scalar*, Scalar*, int){};
+};
+
+template <typename Scalar, typename Packet>
+void exp_complex_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
+  exp_complex_test_impl<Scalar, Packet>::run(data1, data2, ref, size);
+}
+
 template <typename Scalar, typename Packet>
 void packetmath_complex() {
   typedef internal::packet_traits<Scalar> PacketTraits;
@@ -1445,8 +1578,9 @@
     data1[1] = Scalar(-inf, nan);
     data1[2] = Scalar(nan, inf);
     data1[3] = Scalar(nan, -inf);
-    CHECK_CWISE1_IM1ULP_N(std::log, internal::plog, 4);
+    CHECK_CWISE1_IM1ULP_N(numext::log, internal::plog, 4);
   }
+  exp_complex_test<Scalar, Packet>(data1, data2, ref, size);
 }
 
 template <typename Scalar, typename Packet>

diff --git a/test/qr.cpp b/test/qr.cpp
index de470ca..f7f6990 100644
--- a/test/qr.cpp
+++ b/test/qr.cpp

@@ -82,6 +82,7 @@
   m1 = m3 * m1 * m3.adjoint();
   qr.compute(m1);
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
+  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
   // This test is tricky if the determinant becomes too small.
   // Since we generate random numbers with magnitude range [0,1], the average determinant is 0.5^size
   RealScalar tol =
@@ -102,7 +103,7 @@
   VERIFY_RAISES_ASSERT(qr.householderQ())
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
-  VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
+  VERIFY_RAISES_ASSERT(qr.signDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr) {

diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp
index 4f8711f..c821304 100644
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp

@@ -21,6 +21,7 @@
   Index rank = internal::random<Index>(1, (std::min)(rows, cols) - 1);
 
   typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   MatrixType matrix;
   createRandomPIMatrixOfRank(rank, rows, cols, matrix);
@@ -56,6 +57,23 @@
 
   MatrixType pinv = cod.pseudoInverse();
   VERIFY_IS_APPROX(cod_solution, pinv * rhs);
+
+  // now construct a (square) matrix with prescribed determinant
+  Index size = internal::random<Index>(2, 20);
+  matrix.setZero(size, size);
+  for (int i = 0; i < size; i++) {
+    matrix(i, i) = internal::random<Scalar>();
+  }
+  Scalar det = matrix.diagonal().prod();
+  RealScalar absdet = numext::abs(det);
+  CompleteOrthogonalDecomposition<MatrixType> cod2(matrix);
+  cod2.compute(matrix);
+  q = cod2.householderQ();
+  matrix = q * matrix * q.adjoint();
+  VERIFY_IS_APPROX(det, cod2.determinant());
+  VERIFY_IS_APPROX(absdet, cod2.absDeterminant());
+  VERIFY_IS_APPROX(numext::log(absdet), cod2.logAbsDeterminant());
+  VERIFY_IS_APPROX(numext::sign(det), cod2.signDeterminant());
 }
 
 template <typename MatrixType, int Cols2>
@@ -265,6 +283,7 @@
   VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
+  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
 }
 
 template <typename MatrixType>
@@ -285,6 +304,7 @@
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
+  VERIFY_RAISES_ASSERT(qr.signDeterminant())
 }
 
 template <typename MatrixType>
@@ -305,6 +325,7 @@
   VERIFY_RAISES_ASSERT(cod.determinant())
   VERIFY_RAISES_ASSERT(cod.absDeterminant())
   VERIFY_RAISES_ASSERT(cod.logAbsDeterminant())
+  VERIFY_RAISES_ASSERT(cod.signDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr_colpivoting) {

diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 71f3a51..2b6ecc5 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp

@@ -105,6 +105,7 @@
   VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
+  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
 }
 
 template <typename MatrixType>
@@ -125,6 +126,7 @@
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
+  VERIFY_RAISES_ASSERT(qr.signDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr_fullpivoting) {

diff --git a/test/rand.cpp b/test/rand.cpp
index b5cf801..4131f38 100644
--- a/test/rand.cpp
+++ b/test/rand.cpp

@@ -9,6 +9,10 @@
 
 #include <cstdlib>
 #include "main.h"
+#include "SafeScalar.h"
+
+// SafeScalar<T> is used to simulate custom Scalar types, which use a more generalized approach to generate random
+// numbers
 
 // For GCC-6, if this function is inlined then there seems to be an optimization
 // bug that triggers a failure.  This failure goes away if you access `r` in
@@ -25,15 +29,28 @@
 
 template <typename Scalar>
 void check_all_in_range(Scalar x, Scalar y) {
-  Array<int, 1, Dynamic> mask(y - x + 1);
-  mask.fill(0);
-  int64_t n = (y - x + 1) * 32;
-  for (int64_t k = 0; k < n; ++k) {
-    mask(check_in_range(x, y) - x)++;
-  }
+  constexpr int repeats = 32;
+  uint64_t count = static_cast<uint64_t>(y) - static_cast<uint64_t>(x) + 1;
+  ArrayX<bool> mask(count);
+  // ensure that `count` does not overflow the return type of `mask.size()`
+  VERIFY(count == static_cast<uint64_t>(mask.size()));
+  mask.setConstant(false);
+  for (uint64_t k = 0; k < count; k++)
+    for (int repeat = 0; repeat < repeats; repeat++) {
+      Scalar r = check_in_range(x, y);
+      Index i = static_cast<Index>(r) - static_cast<Index>(x);
+      mask(i) = true;
+    }
   for (Index i = 0; i < mask.size(); ++i)
-    if (mask(i) == 0) std::cout << "WARNING: value " << x + i << " not reached." << std::endl;
-  VERIFY((mask > 0).all());
+    if (mask(i) == false) std::cout << "WARNING: value " << x + i << " not reached." << std::endl;
+  VERIFY(mask.cwiseEqual(true).all());
+}
+
+template <typename Scalar>
+void check_all_in_range() {
+  const Scalar x = NumTraits<Scalar>::lowest();
+  const Scalar y = NumTraits<Scalar>::highest();
+  check_all_in_range(x, y);
 }
 
 template <typename Scalar, typename EnableIf = void>
@@ -66,72 +83,108 @@
   double bin_width_;
 };
 
+// helper class to avoid extending std:: namespace
+template <typename T>
+struct get_range_type : internal::make_unsigned<T> {};
+template <typename T>
+struct get_range_type<SafeScalar<T>> : internal::make_unsigned<T> {};
+
 template <typename Scalar>
 class HistogramHelper<Scalar, std::enable_if_t<Eigen::NumTraits<Scalar>::IsInteger>> {
  public:
-  using RangeType = typename Eigen::internal::make_unsigned<Scalar>::type;
+  using RangeType = typename get_range_type<Scalar>::type;
   HistogramHelper(int nbins)
       : HistogramHelper(Eigen::NumTraits<Scalar>::lowest(), Eigen::NumTraits<Scalar>::highest(), nbins) {}
   HistogramHelper(Scalar lower, Scalar upper, int nbins)
       : lower_{lower}, upper_{upper}, num_bins_{nbins}, bin_width_{bin_width(lower, upper, nbins)} {}
 
-  int bin(Scalar v) { return static_cast<int>(RangeType(v - lower_) / bin_width_); }
+  int bin(Scalar v) { return static_cast<int>(RangeType(RangeType(v) - RangeType(lower_)) / bin_width_); }
 
   double uniform_bin_probability(int bin) {
-    // Avoid overflow in computing range.
-    double range = static_cast<double>(RangeType(upper_ - lower_)) + 1.0;
+    // The full range upper - lower + 1 might overflow the RangeType by one.
+    // So instead, we know we have (nbins - 1) bins of width bin_width_,
+    // and the last bin of width:
+    RangeType last_bin_width =
+        RangeType(upper_) - (RangeType(lower_) + RangeType(num_bins_ - 1) * bin_width_) + RangeType(1);
+    double last_bin_ratio = static_cast<double>(last_bin_width) / static_cast<double>(bin_width_);
+    // Total probability = (nbins - 1) * p + last_bin_ratio * p = 1.0
+    // p = 1.0 / (nbins - 1 + last_bin_ratio)
+    double p = 1.0 / (last_bin_ratio + num_bins_ - 1);
     if (bin < num_bins_ - 1) {
-      return static_cast<double>(bin_width_) / range;
+      return p;
     }
-    return static_cast<double>(RangeType(upper_) - RangeType((lower_ + bin * bin_width_)) + 1) / range;
+    return last_bin_ratio * p;
   }
 
  private:
-  static constexpr Scalar bin_width(Scalar lower, Scalar upper, int nbins) {
+  static constexpr RangeType bin_width(Scalar lower, Scalar upper, int nbins) {
     // Avoid overflow in computing the full range.
-    return RangeType(upper - nbins - lower + 1) / nbins + 1;
+    // floor( (upper - lower + 1) / nbins) )
+    //    = floor( (upper- nbins - lower + 1 + nbins) / nbins) )
+    return RangeType(RangeType(upper - nbins) - RangeType(lower) + 1) / nbins + 1;
   }
 
   Scalar lower_;
   Scalar upper_;
   int num_bins_;
-  Scalar bin_width_;
+  RangeType bin_width_;
 };
 
 template <typename Scalar>
 void check_histogram(Scalar x, Scalar y, int bins) {
+  constexpr int repeats = 10000;
+  double count = double(bins) * double(repeats);
   Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
   HistogramHelper<Scalar> hist_helper(x, y, bins);
-  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
-  for (int64_t k = 0; k < n; ++k) {
-    Scalar r = check_in_range(x, y);
-    int bin = hist_helper.bin(r);
-    hist(bin)++;
-  }
-  // Normalize bins by probability.
+  for (int k = 0; k < bins; k++)
+    for (int repeat = 0; repeat < repeats; repeat++) {
+      Scalar r = check_in_range(x, y);
+      int bin = hist_helper.bin(r);
+      hist(bin)++;
+    }
+  //  Normalize bins by probability.
+  hist /= count;
   for (int i = 0; i < bins; ++i) {
-    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
+    hist(i) = hist(i) / hist_helper.uniform_bin_probability(i);
   }
   VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
 }
 
 template <typename Scalar>
 void check_histogram(int bins) {
+  constexpr int repeats = 10000;
+  double count = double(bins) * double(repeats);
   Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
   HistogramHelper<Scalar> hist_helper(bins);
-  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
-  for (int64_t k = 0; k < n; ++k) {
-    Scalar r = Eigen::internal::random<Scalar>();
-    int bin = hist_helper.bin(r);
-    hist(bin)++;
-  }
-  // Normalize bins by probability.
+  for (int k = 0; k < bins; k++)
+    for (int repeat = 0; repeat < repeats; repeat++) {
+      Scalar r = Eigen::internal::random<Scalar>();
+      int bin = hist_helper.bin(r);
+      hist(bin)++;
+    }
+  //  Normalize bins by probability.
+  hist /= count;
   for (int i = 0; i < bins; ++i) {
-    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
+    hist(i) = hist(i) / hist_helper.uniform_bin_probability(i);
   }
   VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
 }
 
+template <>
+void check_histogram<bool>(int) {
+  constexpr int bins = 2;
+  constexpr int repeats = 10000;
+  double count = double(bins) * double(repeats);
+  double true_count = 0.0;
+  for (int k = 0; k < bins; k++)
+    for (int repeat = 0; repeat < repeats; repeat++) {
+      bool r = Eigen::internal::random<bool>();
+      if (r) true_count += 1.0;
+    }
+  double p = true_count / count;
+  VERIFY(numext::abs(p - 0.5) < 0.05);
+}
+
 EIGEN_DECLARE_TEST(rand) {
   int64_t int64_ref = NumTraits<int64_t>::highest() / 10;
   // the minimum guarantees that these conversions are safe
@@ -182,14 +235,16 @@
   CALL_SUBTEST_7(check_all_in_range<int8_t>(-11 - int8t_offset, -11));
   CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, -126 + int8t_offset));
   CALL_SUBTEST_7(check_all_in_range<int8_t>(126 - int8t_offset, 126));
-  CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, 126));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>());
+  CALL_SUBTEST_7(check_all_in_range<uint8_t>());
 
   CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11 + int16t_offset));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-5, 5));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-11 - int16t_offset, -11));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-24345, -24345 + int16t_offset));
-  CALL_SUBTEST_8(check_all_in_range<int16_t>(24345, 24345 + int16t_offset));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>());
+  CALL_SUBTEST_8(check_all_in_range<uint16_t>());
 
   CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11));
   CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11 + g_repeat));
@@ -214,6 +269,7 @@
   CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10,
                                            -int64_t(RAND_MAX) + 10 + bins * (2 * int64_t(RAND_MAX) / bins) - 1, bins));
 
+  CALL_SUBTEST_12(check_histogram<bool>(/*bins=*/2));
   CALL_SUBTEST_12(check_histogram<uint8_t>(/*bins=*/16));
   CALL_SUBTEST_12(check_histogram<uint16_t>(/*bins=*/1024));
   CALL_SUBTEST_12(check_histogram<uint32_t>(/*bins=*/1024));
@@ -229,10 +285,16 @@
   CALL_SUBTEST_14(check_histogram<long double>(-10.0L, 10.0L, /*bins=*/1024));
   CALL_SUBTEST_14(check_histogram<half>(half(-10.0f), half(10.0f), /*bins=*/512));
   CALL_SUBTEST_14(check_histogram<bfloat16>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64));
+  CALL_SUBTEST_14(check_histogram<SafeScalar<float>>(-10.0f, 10.0f, /*bins=*/1024));
+  CALL_SUBTEST_14(check_histogram<SafeScalar<half>>(half(-10.0f), half(10.0f), /*bins=*/512));
+  CALL_SUBTEST_14(check_histogram<SafeScalar<bfloat16>>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64));
 
   CALL_SUBTEST_15(check_histogram<float>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<double>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<long double>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<half>(/*bins=*/512));
   CALL_SUBTEST_15(check_histogram<bfloat16>(/*bins=*/64));
+  CALL_SUBTEST_15(check_histogram<SafeScalar<float>>(/*bins=*/1024));
+  CALL_SUBTEST_15(check_histogram<SafeScalar<half>>(/*bins=*/512));
+  CALL_SUBTEST_15(check_histogram<SafeScalar<bfloat16>>(/*bins=*/64));
 }

diff --git a/test/schur_real.cpp b/test/schur_real.cpp
index cd0be92..4a9dd89 100644
--- a/test/schur_real.cpp
+++ b/test/schur_real.cpp

@@ -97,6 +97,13 @@
   }
 }
 
+void test_bug2633() {
+  Eigen::MatrixXd A(4, 4);
+  A << 0, 0, 0, -2, 1, 0, 0, -0, 0, 1, 0, 2, 0, 0, 2, -0;
+  RealSchur<Eigen::MatrixXd> schur(A);
+  VERIFY(schur.info() == Eigen::Success);
+}
+
 EIGEN_DECLARE_TEST(schur_real) {
   CALL_SUBTEST_1((schur<Matrix4f>()));
   CALL_SUBTEST_2((schur<MatrixXd>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE / 4))));
@@ -105,4 +112,6 @@
 
   // Test problem size constructors
   CALL_SUBTEST_5(RealSchur<MatrixXf>(10));
+
+  CALL_SUBTEST_6((test_bug2633()));
 }

diff --git a/test/simplicial_cholesky.cpp b/test/simplicial_cholesky.cpp
index ca67496..ed93218 100644
--- a/test/simplicial_cholesky.cpp
+++ b/test/simplicial_cholesky.cpp

@@ -20,6 +20,12 @@
   SimplicialLDLT<SparseMatrixType, Upper> ldlt_colmajor_upper_amd;
   SimplicialLDLT<SparseMatrixType, Lower, NaturalOrdering<I_> > ldlt_colmajor_lower_nat;
   SimplicialLDLT<SparseMatrixType, Upper, NaturalOrdering<I_> > ldlt_colmajor_upper_nat;
+  SimplicialNonHermitianLLT<SparseMatrixType, Lower> nhllt_colmajor_lower_amd;
+  SimplicialNonHermitianLLT<SparseMatrixType, Upper> nhllt_colmajor_upper_amd;
+  SimplicialNonHermitianLDLT<SparseMatrixType, Lower> nhldlt_colmajor_lower_amd;
+  SimplicialNonHermitianLDLT<SparseMatrixType, Upper> nhldlt_colmajor_upper_amd;
+  SimplicialNonHermitianLDLT<SparseMatrixType, Lower, NaturalOrdering<I_> > nhldlt_colmajor_lower_nat;
+  SimplicialNonHermitianLDLT<SparseMatrixType, Upper, NaturalOrdering<I_> > nhldlt_colmajor_upper_nat;
 
   check_sparse_spd_solving(chol_colmajor_lower_amd);
   check_sparse_spd_solving(chol_colmajor_upper_amd);
@@ -27,6 +33,10 @@
   check_sparse_spd_solving(llt_colmajor_upper_amd);
   check_sparse_spd_solving(ldlt_colmajor_lower_amd);
   check_sparse_spd_solving(ldlt_colmajor_upper_amd);
+  check_sparse_nonhermitian_solving(nhllt_colmajor_lower_amd);
+  check_sparse_nonhermitian_solving(nhllt_colmajor_upper_amd);
+  check_sparse_nonhermitian_solving(nhldlt_colmajor_lower_amd);
+  check_sparse_nonhermitian_solving(nhldlt_colmajor_upper_amd);
 
   check_sparse_spd_determinant(chol_colmajor_lower_amd);
   check_sparse_spd_determinant(chol_colmajor_upper_amd);
@@ -34,9 +44,15 @@
   check_sparse_spd_determinant(llt_colmajor_upper_amd);
   check_sparse_spd_determinant(ldlt_colmajor_lower_amd);
   check_sparse_spd_determinant(ldlt_colmajor_upper_amd);
+  check_sparse_nonhermitian_determinant(nhllt_colmajor_lower_amd);
+  check_sparse_nonhermitian_determinant(nhllt_colmajor_upper_amd);
+  check_sparse_nonhermitian_determinant(nhldlt_colmajor_lower_amd);
+  check_sparse_nonhermitian_determinant(nhldlt_colmajor_upper_amd);
 
   check_sparse_spd_solving(ldlt_colmajor_lower_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
   check_sparse_spd_solving(ldlt_colmajor_upper_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
+  check_sparse_nonhermitian_solving(nhldlt_colmajor_lower_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
+  check_sparse_nonhermitian_solving(nhldlt_colmajor_upper_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
 }
 
 EIGEN_DECLARE_TEST(simplicial_cholesky) {

diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 364aac0..a9c6f4c 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp

@@ -39,7 +39,7 @@
   typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
   typedef Matrix<Scalar, Dynamic, 1> DenseVector;
   typedef Matrix<Scalar, Dynamic, Dynamic, SparseMatrixType::IsRowMajor ? RowMajor : ColMajor> CompatibleDenseMatrix;
-  Scalar eps = 1e-6;
+  Scalar eps = Scalar(1e-6);
 
   Scalar s1 = internal::random<Scalar>();
   {
@@ -948,6 +948,27 @@
     SparseMatrixType m2(rows, 0);
     m2.reserve(ArrayXi::Constant(m2.outerSize(), 1));
   }
+
+  // test move
+  {
+    using TransposedType = SparseMatrix<Scalar, SparseMatrixType::IsRowMajor ? ColMajor : RowMajor,
+                                        typename SparseMatrixType::StorageIndex>;
+    DenseMatrix refMat1 = DenseMatrix::Random(rows, cols);
+    SparseMatrixType m1(rows, cols);
+    initSparse<Scalar>(density, refMat1, m1);
+    // test move ctor
+    SparseMatrixType m2(std::move(m1));
+    VERIFY_IS_APPROX(m2, refMat1);
+    // test move assignment
+    m1 = std::move(m2);
+    VERIFY_IS_APPROX(m1, refMat1);
+    // test move ctor (SparseMatrixBase)
+    TransposedType m3(std::move(m1.transpose()));
+    VERIFY_IS_APPROX(m3, refMat1.transpose());
+    // test move assignment (SparseMatrixBase)
+    m2 = std::move(m3.transpose());
+    VERIFY_IS_APPROX(m2, refMat1);
+  }
 }
 
 template <typename SparseMatrixType>
@@ -994,7 +1015,7 @@
   g_dense_op_sparse_count = 0;  // Suppresses compiler warning.
   for (int i = 0; i < g_repeat; i++) {
     int r = Eigen::internal::random<int>(1, 200), c = Eigen::internal::random<int>(1, 200);
-    if (Eigen::internal::random<int>(0, 4) == 0) {
+    if (Eigen::internal::random<int>(0, 3) == 0) {
       r = c;  // check square matrices in 25% of tries
     }
     EIGEN_UNUSED_VARIABLE(r + c);
@@ -1011,7 +1032,7 @@
 
     r = Eigen::internal::random<int>(1, 100);
     c = Eigen::internal::random<int>(1, 100);
-    if (Eigen::internal::random<int>(0, 4) == 0) {
+    if (Eigen::internal::random<int>(0, 3) == 0) {
       r = c;  // check square matrices in 25% of tries
     }
 

diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index 033df83..50cb463 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h

@@ -484,6 +484,96 @@
   }
 }
 
+template <typename Solver, typename DenseMat>
+int generate_sparse_nonhermitian_problem(Solver&, typename Solver::MatrixType& A, typename Solver::MatrixType& halfA,
+                                         DenseMat& dA, int maxSize = 300) {
+  typedef typename Solver::MatrixType Mat;
+  typedef typename Mat::Scalar Scalar;
+  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
+
+  int size = internal::random<int>(1, maxSize);
+  double density = (std::max)(8. / static_cast<double>(size * size), 0.01);
+
+  Mat M(size, size);
+  DenseMatrix dM(size, size);
+
+  initSparse<Scalar>(density, dM, M, ForceNonZeroDiag);
+
+  A = M * M.transpose();
+  dA = dM * dM.transpose();
+
+  halfA.resize(size, size);
+  if (Solver::UpLo == (Lower | Upper))
+    halfA = A;
+  else
+    halfA = A.template triangularView<Solver::UpLo>();
+
+  return size;
+}
+
+template <typename Solver>
+void check_sparse_nonhermitian_solving(Solver& solver, int maxSize = (std::min)(300, EIGEN_TEST_MAX_SIZE),
+                                       int maxRealWorldSize = 100000) {
+  typedef typename Solver::MatrixType Mat;
+  typedef typename Mat::Scalar Scalar;
+  typedef typename Mat::StorageIndex StorageIndex;
+  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> SpMat;
+  typedef SparseVector<Scalar, 0, StorageIndex> SpVec;
+  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
+  typedef Matrix<Scalar, Dynamic, 1> DenseVector;
+
+  // generate the problem
+  Mat A, halfA;
+  DenseMatrix dA;
+  for (int i = 0; i < g_repeat; i++) {
+    int size = generate_sparse_nonhermitian_problem(solver, A, halfA, dA, maxSize);
+
+    // generate the right hand sides
+    int rhsCols = internal::random<int>(1, 16);
+    double density = (std::max)(8. / static_cast<double>(size * rhsCols), 0.1);
+    SpMat B(size, rhsCols);
+    DenseVector b = DenseVector::Random(size);
+    DenseMatrix dB(size, rhsCols);
+    initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
+    SpVec c = B.col(0);
+    DenseVector dc = dB.col(0);
+
+    CALL_SUBTEST(check_sparse_solving(solver, A, b, dA, b));
+    CALL_SUBTEST(check_sparse_solving(solver, halfA, b, dA, b));
+    CALL_SUBTEST(check_sparse_solving(solver, A, dB, dA, dB));
+    CALL_SUBTEST(check_sparse_solving(solver, halfA, dB, dA, dB));
+    CALL_SUBTEST(check_sparse_solving(solver, A, B, dA, dB));
+    CALL_SUBTEST(check_sparse_solving(solver, halfA, B, dA, dB));
+    CALL_SUBTEST(check_sparse_solving(solver, A, c, dA, dc));
+    CALL_SUBTEST(check_sparse_solving(solver, halfA, c, dA, dc));
+
+    // check only once
+    if (i == 0) {
+      b = DenseVector::Zero(size);
+      check_sparse_solving(solver, A, b, dA, b);
+    }
+  }
+
+  EIGEN_UNUSED_VARIABLE(maxRealWorldSize);
+}
+
+template <typename Solver>
+void check_sparse_nonhermitian_determinant(Solver& solver) {
+  typedef typename Solver::MatrixType Mat;
+  typedef typename Mat::Scalar Scalar;
+  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
+
+  // generate the problem
+  Mat A, halfA;
+  DenseMatrix dA;
+  generate_sparse_nonhermitian_problem(solver, A, halfA, dA, 30);
+
+  for (int i = 0; i < g_repeat; i++) {
+    check_sparse_determinant(solver, A, dA);
+    check_sparse_determinant(solver, halfA, dA);
+  }
+}
+
 template <typename Solver>
 void check_sparse_zero_matrix(Solver& solver) {
   typedef typename Solver::MatrixType Mat;

diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 83ad324..8d47fb0 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp

@@ -108,6 +108,33 @@
   VERIFY_IS_APPROX(refV3 = v1.transpose(), v1.toDense());
   VERIFY_IS_APPROX(DenseVector(v1), v1.toDense());
 
+  // test move
+  {
+    SparseVectorType v3(std::move(v1));
+    VERIFY_IS_APPROX(v3, refV1);
+    v1 = v3;
+  }
+
+  {
+    SparseVectorType v3;
+    v3 = std::move(v1);
+    VERIFY_IS_APPROX(v3, refV1);
+    v1 = v3;
+  }
+
+  {
+    SparseVectorType v3(std::move(mv1));
+    VERIFY_IS_APPROX(v3, refV1);
+    mv1 = v3;
+  }
+
+  {
+    SparseVectorType v3;
+    v3 = std::move(mv1);
+    VERIFY_IS_APPROX(v3, refV1);
+    mv1 = v3;
+  }
+
   // test conservative resize
   {
     std::vector<StorageIndex> inc;

diff --git a/test/unaryview.cpp b/test/unaryview.cpp
new file mode 100644
index 0000000..58e95d6
--- /dev/null
+++ b/test/unaryview.cpp

@@ -0,0 +1,109 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Andrew Johnson <andrew.johnson@arjohnsonau.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template <int OuterStride, int InnerStride, typename VectorType>
+void unaryview_stride(const VectorType& m) {
+  typedef typename VectorType::Scalar Scalar;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  VectorType vec = VectorType::Random(rows, cols);
+
+  struct view_op {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const Scalar& v) const { return v; }
+  };
+
+  CwiseUnaryView<view_op, VectorType, Stride<OuterStride, InnerStride>> vec_view(vec);
+  VERIFY(vec_view.outerStride() == (OuterStride == 0 ? 0 : OuterStride));
+  VERIFY(vec_view.innerStride() == (InnerStride == 0 ? 1 : InnerStride));
+}
+
+void test_mutable_unaryview() {
+  struct Vec3 {
+    double x;
+    double y;
+    double z;
+  };
+
+  Eigen::Vector<Vec3, 3> m;
+  auto x_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.x; });
+  auto y_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.y; });
+  auto z_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.z; });
+
+  x_view.setConstant(1);
+  y_view.setConstant(2);
+  z_view.setConstant(3);
+
+  for (int i = 0; i < m.size(); ++i) {
+    VERIFY_IS_EQUAL(m(i).x, 1);
+    VERIFY_IS_EQUAL(m(i).y, 2);
+    VERIFY_IS_EQUAL(m(i).z, 3);
+  }
+}
+
+void test_unaryview_solve() {
+  // Random upper-triangular system.
+  Eigen::MatrixXd A = Eigen::MatrixXd::Random(5, 5);
+  A.triangularView<Eigen::Lower>().setZero();
+  A.diagonal().setRandom();
+  Eigen::VectorXd b = Eigen::VectorXd::Random(5);
+
+  struct trivial_view_op {
+    double& operator()(double& x) const { return x; }
+    const double& operator()(const double& x) const { return x; }
+  };
+
+  // Non-const view:
+  {
+    auto b_view = b.unaryViewExpr(trivial_view_op());
+    b_view(0) = 1;  // Allows modification.
+    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
+    VERIFY_IS_APPROX(A * x, b);
+  }
+
+  // Const view:
+  {
+    const auto b_view = b.unaryViewExpr(trivial_view_op());
+    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
+    VERIFY_IS_APPROX(A * x, b);
+  }
+
+  // Non-const view of const matrix:
+  {
+    const Eigen::VectorXd const_b = b;
+    auto b_view = const_b.unaryViewExpr(trivial_view_op());
+    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
+    VERIFY_IS_APPROX(A * x, b);
+  }
+
+  // Const view of const matrix:
+  {
+    const Eigen::VectorXd const_b = b;
+    const auto b_view = const_b.unaryViewExpr(trivial_view_op());
+    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
+    VERIFY_IS_APPROX(A * x, b);
+  }
+
+  // Eigen::MatrixXd out =
+  //       mat_in.real()
+  //             .triangularView<Eigen::Upper>()
+  //             .solve(mat_in.unaryViewExpr([&](const auto& x){ return std::real(x); }));
+}
+
+EIGEN_DECLARE_TEST(unaryviewstride) {
+  CALL_SUBTEST_1((unaryview_stride<1, 2>(MatrixXf())));
+  CALL_SUBTEST_1((unaryview_stride<0, 0>(MatrixXf())));
+  CALL_SUBTEST_2((unaryview_stride<1, 2>(VectorXf())));
+  CALL_SUBTEST_2((unaryview_stride<0, 0>(VectorXf())));
+  CALL_SUBTEST_3((unaryview_stride<1, 2>(RowVectorXf())));
+  CALL_SUBTEST_3((unaryview_stride<0, 0>(RowVectorXf())));
+  CALL_SUBTEST_4(test_mutable_unaryview());
+  CALL_SUBTEST_4(test_unaryview_solve());
+}

diff --git a/test/unaryviewstride.cpp b/test/unaryviewstride.cpp
deleted file mode 100644
index 490a5b7..0000000
--- a/test/unaryviewstride.cpp
+++ /dev/null

@@ -1,35 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2021 Andrew Johnson <andrew.johnson@arjohnsonau.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "main.h"
-
-template <int OuterStride, int InnerStride, typename VectorType>
-void unaryview_stride(const VectorType& m) {
-  typedef typename VectorType::Scalar Scalar;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  VectorType vec = VectorType::Random(rows, cols);
-
-  struct view_op {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const Scalar& v) const { return v; }
-  };
-
-  CwiseUnaryView<view_op, VectorType, Stride<OuterStride, InnerStride>> vec_view(vec);
-  VERIFY(vec_view.outerStride() == (OuterStride == 0 ? 0 : OuterStride));
-  VERIFY(vec_view.innerStride() == (InnerStride == 0 ? 1 : InnerStride));
-}
-
-EIGEN_DECLARE_TEST(unaryviewstride) {
-  CALL_SUBTEST_1((unaryview_stride<1, 2>(MatrixXf())));
-  CALL_SUBTEST_1((unaryview_stride<0, 0>(MatrixXf())));
-  CALL_SUBTEST_2((unaryview_stride<1, 2>(VectorXf())));
-  CALL_SUBTEST_2((unaryview_stride<0, 0>(VectorXf())));
-  CALL_SUBTEST_3((unaryview_stride<1, 2>(RowVectorXf())));
-  CALL_SUBTEST_3((unaryview_stride<0, 0>(RowVectorXf())));
-}

diff --git a/unsupported/Eigen/AutoDiff b/unsupported/Eigen/AutoDiff
index 45078bc..0480c69 100644
--- a/unsupported/Eigen/AutoDiff
+++ b/unsupported/Eigen/AutoDiff

@@ -33,6 +33,7 @@
 #include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
 
 // IWYU pragma: begin_exports
+#include "src/AutoDiff/CoherentPadOp.h"
 #include "src/AutoDiff/AutoDiffScalar.h"
 // #include "src/AutoDiff/AutoDiffVector.h"
 #include "src/AutoDiff/AutoDiffJacobian.h"

diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index 3b9eff7..9417469 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h

@@ -303,12 +303,16 @@
 
   /** Normal Dimension */
   EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions) {
-    int i;
+#ifndef EIGEN_NO_DEBUG
     Index size = Index(1);
-    for (i = 0; i < NumIndices; i++) {
+    for (int i = 0; i < NumIndices; i++) {
       internal::check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic>::run(size, dimensions[i]);
       size *= dimensions[i];
     }
+#else
+    Index size = internal::array_prod(dimensions);
+#endif
+
 #ifdef EIGEN_INITIALIZE_COEFFS
     bool size_changed = size != this->size();
     m_storage.resize(size, dimensions);
@@ -318,15 +322,6 @@
 #endif
   }
 
-  // Why this overload, DSizes is derived from array ??? //
-  EIGEN_DEVICE_FUNC void resize(const DSizes<Index, NumIndices>& dimensions) {
-    array<Index, NumIndices> dims;
-    for (int i = 0; i < NumIndices; ++i) {
-      dims[i] = dimensions[i];
-    }
-    resize(dims);
-  }
-
   EIGEN_DEVICE_FUNC void resize() {
     EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
     // Nothing to do: rank 0 tensors have fixed size
@@ -347,7 +342,6 @@
     resize(internal::customIndices2Array<Index, NumIndices>(dimensions));
   }
 
-#ifndef EIGEN_EMULATE_CXX11_META_H
   template <typename std::ptrdiff_t... Indices>
   EIGEN_DEVICE_FUNC void resize(const Sizes<Indices...>& dimensions) {
     array<Index, NumIndices> dims;
@@ -356,16 +350,6 @@
     }
     resize(dims);
   }
-#else
-  template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-  EIGEN_DEVICE_FUNC void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
-    array<Index, NumIndices> dims;
-    for (int i = 0; i < NumIndices; ++i) {
-      dims[i] = static_cast<Index>(dimensions[i]);
-    }
-    resize(dims);
-  }
-#endif
 
 #ifdef EIGEN_TENSOR_PLUGIN
 #include EIGEN_TENSOR_PLUGIN

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index f9f07d4..f88793e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h

@@ -999,8 +999,9 @@
     }
 
     // Returns a formatted tensor ready for printing to a stream
-    inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions> format(const TensorIOFormat& fmt) const {
-      return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions>(derived(), fmt);
+    template<typename Format>
+    inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions, Format> format(const Format& fmt) const {
+      return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions, Format>(derived(), fmt);
     }
 
     #ifdef EIGEN_READONLY_TENSORBASE_PLUGIN

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 6c91d93..9ef4bbc 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h

@@ -55,12 +55,10 @@
 struct is_input_scalar<Sizes<>> {
   static const bool value = true;
 };
-#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct is_input_scalar<Sizes<Indices...>> {
-  static const bool value = (Sizes<Indices...>::total_size == 1);
+  static constexpr bool value = (Sizes<Indices...>::total_size == 1);
 };
-#endif
 
 }  // end namespace internal
 

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index ef553e0..780e896 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h

@@ -1057,7 +1057,7 @@
     __syncthreads();
   }  // end loop over k
 
-  #undef add_vals
+#undef add_vals
 
   __syncthreads();
   Index horiz_base = (threadIdx.y / 4) * 8 + base_n;

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 0493fe9..26984b6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h

@@ -898,8 +898,8 @@
         // num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: "
         // << shared_mem << " in stream " << m_device.stream() << endl;
 
-        const array<Index, 1> indices(m_indices[0]);
-        const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
+        const array<Index, 1> indices{m_indices[0]};
+        const array<Index, 1> kernel_dims{m_kernelImpl.dimensions()[0]};
         internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
         switch (kernel_size) {
           case 4: {
@@ -965,8 +965,8 @@
         // " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << "
         // shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
 
-        const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
-        const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]);
+        const array<Index, 2> indices{m_indices[idxX], m_indices[idxY]};
+        const array<Index, 2> kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]};
         internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
         switch (kernel_size_x) {
           case 4: {
@@ -1059,9 +1059,9 @@
         // block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y <<
         // " num_blocks.z: " << num_blocks.z  << " shared_mem: " << shared_mem << " in stream " << m_device.stream() <<
         // endl;
-        const array<Index, 3> indices(m_indices[idxX], m_indices[idxY], m_indices[idxZ]);
-        const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY],
-                                          m_kernelImpl.dimensions()[idxZ]);
+        const array<Index, 3> indices{m_indices[idxX], m_indices[idxY], m_indices[idxZ]};
+        const array<Index, 3> kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY],
+                                          m_kernelImpl.dimensions()[idxZ]};
         internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
 
         LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>),

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 4043e5e..ae8f25f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h

@@ -81,7 +81,6 @@
 }  // end namespace internal
 
 // Fixed size
-#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct Sizes {
   typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
@@ -133,87 +132,6 @@
 }
 }  // namespace internal
 
-#else
-
-template <std::ptrdiff_t n>
-struct non_zero_size {
-  typedef internal::type2val<std::ptrdiff_t, n> type;
-};
-template <>
-struct non_zero_size<0> {
-  typedef internal::null_type type;
-};
-
-template <std::ptrdiff_t V1 = 0, std::ptrdiff_t V2 = 0, std::ptrdiff_t V3 = 0, std::ptrdiff_t V4 = 0,
-          std::ptrdiff_t V5 = 0>
-struct Sizes {
-  typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type,
-                                            typename non_zero_size<V3>::type, typename non_zero_size<V4>::type,
-                                            typename non_zero_size<V5>::type>::type Base;
-  static const std::ptrdiff_t count = Base::count;
-  static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const { return count; }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() { return internal::arg_prod<Base>::value; }
-
-  Sizes() {}
-  template <typename DenseIndex>
-  explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
-    // todo: add assertion
-  }
-
-  template <typename T>
-  Sizes& operator=(const T& /*other*/) {
-    // add assertion failure if the size of other is different
-    return *this;
-  }
-
-  template <typename... DenseIndex>
-  Sizes(DenseIndex... /*indices*/) {}
-  explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
-    // todo: add assertion
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[](const Index index) const {
-    switch (index) {
-      case 0:
-        return internal::get<0, Base>::value;
-      case 1:
-        return internal::get<1, Base>::value;
-      case 2:
-        return internal::get<2, Base>::value;
-      case 3:
-        return internal::get<3, Base>::value;
-      case 4:
-        return internal::get<4, Base>::value;
-      default:
-        eigen_assert(false && "index overflow");
-        return static_cast<Index>(-1);
-    }
-  }
-
-  template <typename DenseIndex>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(
-        indices, *reinterpret_cast<const Base*>(this));
-  }
-  template <typename DenseIndex>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
-    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(
-        indices, *reinterpret_cast<const Base*>(this));
-  }
-};
-
-namespace internal {
-template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
-  return Sizes<V1, V2, V3, V4, V5>::total_size;
-}
-}  // namespace internal
-
-#endif
-
 // Boilerplate
 namespace internal {
 template <typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
@@ -289,21 +207,12 @@
     }
   }
 
-#ifndef EIGEN_EMULATE_CXX11_META_H
   template <typename std::ptrdiff_t... Indices>
   EIGEN_DEVICE_FUNC DSizes(const Sizes<Indices...>& a) {
     for (int i = 0; i < NumDims; ++i) {
       (*this)[i] = a[i];
     }
   }
-#else
-  template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
-  EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
-    for (int i = 0; i < NumDims; ++i) {
-      (*this)[i] = a[i];
-    }
-  }
-#endif
 
   template <typename... IndexTypes>
       EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension,
@@ -374,7 +283,6 @@
 struct array_size<DSizes<DenseIndex, NumDims> > {
   static const ptrdiff_t value = NumDims;
 };
-#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct array_size<const Sizes<Indices...> > {
   static const std::ptrdiff_t value = Sizes<Indices...>::count;
@@ -392,22 +300,6 @@
   eigen_assert(false && "should never be called");
   return -1;
 }
-#else
-template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
-struct array_size<const Sizes<V1, V2, V3, V4, V5> > {
-  static const ptrdiff_t value = Sizes<V1, V2, V3, V4, V5>::count;
-};
-template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
-struct array_size<Sizes<V1, V2, V3, V4, V5> > {
-  static const ptrdiff_t value = Sizes<V1, V2, V3, V4, V5>::count;
-};
-template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4,
-          std::ptrdiff_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1, V2, V3, V4, V5>&) {
-  return get<n, typename Sizes<V1, V2, V3, V4, V5>::Base>::value;
-}
-
-#endif
 
 template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m>
 struct sizes_match_below_dim {

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index 985e003..b1928c4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h

@@ -18,33 +18,24 @@
 struct TensorIOFormat;
 
 namespace internal {
-template <typename Tensor, std::size_t rank>
+template <typename Tensor, std::size_t rank, typename Format, typename EnableIf = void>
 struct TensorPrinter;
 }
 
-struct TensorIOFormat {
-  TensorIOFormat(const std::vector<std::string>& _separator, const std::vector<std::string>& _prefix,
-                 const std::vector<std::string>& _suffix, int _precision = StreamPrecision, int _flags = 0,
-                 const std::string& _tenPrefix = "", const std::string& _tenSuffix = "", const char _fill = ' ')
-      : tenPrefix(_tenPrefix),
-        tenSuffix(_tenSuffix),
-        prefix(_prefix),
-        suffix(_suffix),
-        separator(_separator),
-        fill(_fill),
-        precision(_precision),
-        flags(_flags) {
-    init_spacer();
-  }
-
-  TensorIOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _tenPrefix = "",
-                 const std::string& _tenSuffix = "", const char _fill = ' ')
-      : tenPrefix(_tenPrefix), tenSuffix(_tenSuffix), fill(_fill), precision(_precision), flags(_flags) {
-    // default values of prefix, suffix and separator
-    prefix = {"", "["};
-    suffix = {"", "]"};
-    separator = {", ", "\n"};
-
+template <typename Derived_>
+struct TensorIOFormatBase {
+  using Derived = Derived_;
+  TensorIOFormatBase(const std::vector<std::string>& separator, const std::vector<std::string>& prefix,
+                     const std::vector<std::string>& suffix, int precision = StreamPrecision, int flags = 0,
+                     const std::string& tenPrefix = "", const std::string& tenSuffix = "", const char fill = ' ')
+      : tenPrefix(tenPrefix),
+        tenSuffix(tenSuffix),
+        prefix(prefix),
+        suffix(suffix),
+        separator(separator),
+        fill(fill),
+        precision(precision),
+        flags(flags) {
     init_spacer();
   }
 
@@ -67,33 +58,6 @@
     }
   }
 
-  static inline const TensorIOFormat Numpy() {
-    std::vector<std::string> prefix = {"", "["};
-    std::vector<std::string> suffix = {"", "]"};
-    std::vector<std::string> separator = {" ", "\n"};
-    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "[", "]");
-  }
-
-  static inline const TensorIOFormat Plain() {
-    std::vector<std::string> separator = {" ", "\n", "\n", ""};
-    std::vector<std::string> prefix = {""};
-    std::vector<std::string> suffix = {""};
-    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "", "", ' ');
-  }
-
-  static inline const TensorIOFormat Native() {
-    std::vector<std::string> separator = {", ", ",\n", "\n"};
-    std::vector<std::string> prefix = {"", "{"};
-    std::vector<std::string> suffix = {"", "}"};
-    return TensorIOFormat(separator, prefix, suffix, StreamPrecision, 0, "{", "}", ' ');
-  }
-
-  static inline const TensorIOFormat Legacy() {
-    TensorIOFormat LegacyFormat(StreamPrecision, 0, "", "", ' ');
-    LegacyFormat.legacy_bit = true;
-    return LegacyFormat;
-  }
-
   std::string tenPrefix;
   std::string tenSuffix;
   std::vector<std::string> prefix;
@@ -103,24 +67,67 @@
   int precision;
   int flags;
   std::vector<std::string> spacer{};
-  bool legacy_bit = false;
 };
 
-template <typename T, int Layout, int rank>
+struct TensorIOFormatNumpy : public TensorIOFormatBase<TensorIOFormatNumpy> {
+  using Base = TensorIOFormatBase<TensorIOFormatNumpy>;
+  TensorIOFormatNumpy()
+      : Base(/*separator=*/{" ", "\n"}, /*prefix=*/{"", "["}, /*suffix=*/{"", "]"}, /*precision=*/StreamPrecision,
+             /*flags=*/0, /*tenPrefix=*/"[", /*tenSuffix=*/"]") {}
+};
+
+struct TensorIOFormatNative : public TensorIOFormatBase<TensorIOFormatNative> {
+  using Base = TensorIOFormatBase<TensorIOFormatNative>;
+  TensorIOFormatNative()
+      : Base(/*separator=*/{", ", ",\n", "\n"}, /*prefix=*/{"", "{"}, /*suffix=*/{"", "}"},
+             /*precision=*/StreamPrecision, /*flags=*/0, /*tenPrefix=*/"{", /*tenSuffix=*/"}") {}
+};
+
+struct TensorIOFormatPlain : public TensorIOFormatBase<TensorIOFormatPlain> {
+  using Base = TensorIOFormatBase<TensorIOFormatPlain>;
+  TensorIOFormatPlain()
+      : Base(/*separator=*/{" ", "\n", "\n", ""}, /*prefix=*/{""}, /*suffix=*/{""}, /*precision=*/StreamPrecision,
+             /*flags=*/0, /*tenPrefix=*/"", /*tenSuffix=*/"") {}
+};
+
+struct TensorIOFormatLegacy : public TensorIOFormatBase<TensorIOFormatLegacy> {
+  using Base = TensorIOFormatBase<TensorIOFormatLegacy>;
+  TensorIOFormatLegacy()
+      : Base(/*separator=*/{", ", "\n"}, /*prefix=*/{"", "["}, /*suffix=*/{"", "]"}, /*precision=*/StreamPrecision,
+             /*flags=*/0, /*tenPrefix=*/"", /*tenSuffix=*/"") {}
+};
+
+struct TensorIOFormat : public TensorIOFormatBase<TensorIOFormat> {
+  using Base = TensorIOFormatBase<TensorIOFormat>;
+  TensorIOFormat(const std::vector<std::string>& separator, const std::vector<std::string>& prefix,
+                 const std::vector<std::string>& suffix, int precision = StreamPrecision, int flags = 0,
+                 const std::string& tenPrefix = "", const std::string& tenSuffix = "", const char fill = ' ')
+      : Base(separator, prefix, suffix, precision, flags, tenPrefix, tenSuffix, fill) {}
+
+  static inline const TensorIOFormatNumpy Numpy() { return TensorIOFormatNumpy{}; }
+
+  static inline const TensorIOFormatPlain Plain() { return TensorIOFormatPlain{}; }
+
+  static inline const TensorIOFormatNative Native() { return TensorIOFormatNative{}; }
+
+  static inline const TensorIOFormatLegacy Legacy() { return TensorIOFormatLegacy{}; }
+};
+
+template <typename T, int Layout, int rank, typename Format>
 class TensorWithFormat;
 // specialize for Layout=ColMajor, Layout=RowMajor and rank=0.
-template <typename T, int rank>
-class TensorWithFormat<T, RowMajor, rank> {
+template <typename T, int rank, typename Format>
+class TensorWithFormat<T, RowMajor, rank, Format> {
  public:
-  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
+  TensorWithFormat(const T& tensor, const Format& format) : t_tensor(tensor), t_format(format) {}
 
-  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, RowMajor, rank>& wf) {
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, RowMajor, rank, Format>& wf) {
     // Evaluate the expression if needed
     typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
     TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
     Evaluator tensor(eval, DefaultDevice());
     tensor.evalSubExprsIfNeeded(NULL);
-    internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
+    internal::TensorPrinter<Evaluator, rank, Format>::run(os, tensor, wf.t_format);
     // Cleanup.
     tensor.cleanup();
     return os;
@@ -128,15 +135,15 @@
 
  protected:
   T t_tensor;
-  TensorIOFormat t_format;
+  Format t_format;
 };
 
-template <typename T, int rank>
-class TensorWithFormat<T, ColMajor, rank> {
+template <typename T, int rank, typename Format>
+class TensorWithFormat<T, ColMajor, rank, Format> {
  public:
-  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
+  TensorWithFormat(const T& tensor, const Format& format) : t_tensor(tensor), t_format(format) {}
 
-  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, rank>& wf) {
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, rank, Format>& wf) {
     // Switch to RowMajor storage and print afterwards
     typedef typename T::Index IndexType;
     std::array<IndexType, rank> shuffle;
@@ -150,7 +157,7 @@
     TensorForcedEvalOp<const decltype(tensor_row_major)> eval = tensor_row_major.eval();
     Evaluator tensor(eval, DefaultDevice());
     tensor.evalSubExprsIfNeeded(NULL);
-    internal::TensorPrinter<Evaluator, rank>::run(os, tensor, wf.t_format);
+    internal::TensorPrinter<Evaluator, rank, Format>::run(os, tensor, wf.t_format);
     // Cleanup.
     tensor.cleanup();
     return os;
@@ -158,21 +165,21 @@
 
  protected:
   T t_tensor;
-  TensorIOFormat t_format;
+  Format t_format;
 };
 
-template <typename T>
-class TensorWithFormat<T, ColMajor, 0> {
+template <typename T, typename Format>
+class TensorWithFormat<T, ColMajor, 0, Format> {
  public:
-  TensorWithFormat(const T& tensor, const TensorIOFormat& format) : t_tensor(tensor), t_format(format) {}
+  TensorWithFormat(const T& tensor, const Format& format) : t_tensor(tensor), t_format(format) {}
 
-  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, 0>& wf) {
+  friend std::ostream& operator<<(std::ostream& os, const TensorWithFormat<T, ColMajor, 0, Format>& wf) {
     // Evaluate the expression if needed
     typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
     TensorForcedEvalOp<const T> eval = wf.t_tensor.eval();
     Evaluator tensor(eval, DefaultDevice());
     tensor.evalSubExprsIfNeeded(NULL);
-    internal::TensorPrinter<Evaluator, 0>::run(os, tensor, wf.t_format);
+    internal::TensorPrinter<Evaluator, 0, Format>::run(os, tensor, wf.t_format);
     // Cleanup.
     tensor.cleanup();
     return os;
@@ -180,29 +187,39 @@
 
  protected:
   T t_tensor;
-  TensorIOFormat t_format;
+  Format t_format;
 };
 
 namespace internal {
-template <typename Tensor, std::size_t rank>
-struct TensorPrinter {
-  static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
-    typedef std::remove_const_t<typename Tensor::Scalar> Scalar;
-    typedef typename Tensor::Index IndexType;
-    static const int layout = Tensor::Layout;
-    // backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
-    // (dim(1)*dim(2)*...*dim(rank-1)).
-    if (fmt.legacy_bit) {
-      const IndexType total_size = internal::array_prod(_t.dimensions());
-      if (total_size > 0) {
-        const IndexType first_dim = Eigen::internal::array_get<0>(_t.dimensions());
-        Map<const Array<Scalar, Dynamic, Dynamic, layout>> matrix(_t.data(), first_dim, total_size / first_dim);
-        s << matrix;
-        return;
-      }
-    }
 
-    eigen_assert(layout == RowMajor);
+// Default scalar printer.
+template <typename Scalar, typename Format, typename EnableIf = void>
+struct ScalarPrinter {
+  static void run(std::ostream& stream, const Scalar& scalar, const Format& fmt) { stream << scalar; }
+};
+
+template <typename Scalar>
+struct ScalarPrinter<Scalar, TensorIOFormatNumpy, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
+  static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNumpy& fmt) {
+    stream << numext::real(scalar) << "+" << numext::imag(scalar) << "j";
+  }
+};
+
+template <typename Scalar>
+struct ScalarPrinter<Scalar, TensorIOFormatNative, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
+  static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNative& fmt) {
+    stream << "{" << numext::real(scalar) << ", " << numext::imag(scalar) << "}";
+  }
+};
+
+template <typename Tensor, std::size_t rank, typename Format, typename EnableIf>
+struct TensorPrinter {
+  using Scalar = std::remove_const_t<typename Tensor::Scalar>;
+
+  static void run(std::ostream& s, const Tensor& tensor, const Format& fmt) {
+    typedef typename Tensor::Index IndexType;
+
+    eigen_assert(Tensor::Layout == RowMajor);
     typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
                                    is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
                                int,
@@ -213,7 +230,7 @@
                                                   std::complex<int>, const Scalar&>>
         PrintType;
 
-    const IndexType total_size = array_prod(_t.dimensions());
+    const IndexType total_size = array_prod(tensor.dimensions());
 
     std::streamsize explicit_precision;
     if (fmt.precision == StreamPrecision) {
@@ -232,20 +249,16 @@
     if (explicit_precision) old_precision = s.precision(explicit_precision);
 
     IndexType width = 0;
-
     bool align_cols = !(fmt.flags & DontAlignCols);
     if (align_cols) {
       // compute the largest width
       for (IndexType i = 0; i < total_size; i++) {
         std::stringstream sstr;
         sstr.copyfmt(s);
-        sstr << static_cast<PrintType>(_t.data()[i]);
+        ScalarPrinter<Scalar, Format>::run(sstr, static_cast<PrintType>(tensor.data()[i]), fmt);
         width = std::max<IndexType>(width, IndexType(sstr.str().length()));
       }
     }
-    std::streamsize old_width = s.width();
-    char old_fill_character = s.fill();
-
     s << fmt.tenPrefix;
     for (IndexType i = 0; i < total_size; i++) {
       std::array<bool, rank> is_at_end{};
@@ -253,7 +266,7 @@
 
       // is the ith element the end of an coeff (always true), of a row, of a matrix, ...?
       for (std::size_t k = 0; k < rank; k++) {
-        if ((i + 1) % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
+        if ((i + 1) % (std::accumulate(tensor.dimensions().rbegin(), tensor.dimensions().rbegin() + k, 1,
                                        std::multiplies<IndexType>())) ==
             0) {
           is_at_end[k] = true;
@@ -262,7 +275,7 @@
 
       // is the ith element the begin of an coeff (always true), of a row, of a matrix, ...?
       for (std::size_t k = 0; k < rank; k++) {
-        if (i % (std::accumulate(_t.dimensions().rbegin(), _t.dimensions().rbegin() + k, 1,
+        if (i % (std::accumulate(tensor.dimensions().rbegin(), tensor.dimensions().rbegin() + k, 1,
                                  std::multiplies<IndexType>())) ==
             0) {
           is_at_begin[k] = true;
@@ -318,12 +331,20 @@
       }
 
       s << prefix.str();
-      if (width) {
-        s.fill(fmt.fill);
-        s.width(width);
-        s << std::right;
+      // So we don't mess around with formatting, output scalar to a string stream, and adjust the width/fill manually.
+      std::stringstream sstr;
+      sstr.copyfmt(s);
+      ScalarPrinter<Scalar, Format>::run(sstr, static_cast<PrintType>(tensor.data()[i]), fmt);
+      std::string scalar_str = sstr.str();
+      IndexType scalar_width = scalar_str.length();
+      if (width && scalar_width < width) {
+        std::string filler;
+        for (IndexType i = scalar_width; i < width; ++i) {
+          filler.push_back(fmt.fill);
+        }
+        s << filler;
       }
-      s << _t.data()[i];
+      s << scalar_str;
       s << suffix.str();
       if (i < total_size - 1) {
         s << separator.str();
@@ -331,17 +352,32 @@
     }
     s << fmt.tenSuffix;
     if (explicit_precision) s.precision(old_precision);
-    if (width) {
-      s.fill(old_fill_character);
-      s.width(old_width);
+  }
+};
+
+template <typename Tensor, std::size_t rank>
+struct TensorPrinter<Tensor, rank, TensorIOFormatLegacy, std::enable_if_t<rank != 0>> {
+  using Format = TensorIOFormatLegacy;
+  using Scalar = std::remove_const_t<typename Tensor::Scalar>;
+
+  static void run(std::ostream& s, const Tensor& tensor, const Format& fmt) {
+    typedef typename Tensor::Index IndexType;
+    // backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
+    // (dim(1)*dim(2)*...*dim(rank-1)).
+    const IndexType total_size = internal::array_prod(tensor.dimensions());
+    if (total_size > 0) {
+      const IndexType first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
+      Map<const Array<Scalar, Dynamic, Dynamic, Tensor::Layout>> matrix(tensor.data(), first_dim, total_size / first_dim);
+      s << matrix;
+      return;
     }
   }
 };
 
-template <typename Tensor>
-struct TensorPrinter<Tensor, 0> {
-  static void run(std::ostream& s, const Tensor& _t, const TensorIOFormat& fmt) {
-    typedef typename Tensor::Scalar Scalar;
+template <typename Tensor, typename Format>
+struct TensorPrinter<Tensor, 0, Format> {
+  static void run(std::ostream& s, const Tensor& tensor, const Format& fmt) {
+    using Scalar = std::remove_const_t<typename Tensor::Scalar>;
 
     std::streamsize explicit_precision;
     if (fmt.precision == StreamPrecision) {
@@ -358,8 +394,9 @@
 
     std::streamsize old_precision = 0;
     if (explicit_precision) old_precision = s.precision(explicit_precision);
-
-    s << fmt.tenPrefix << _t.coeff(0) << fmt.tenSuffix;
+    s << fmt.tenPrefix;
+    ScalarPrinter<Scalar, Format>::run(s, tensor.coeff(0), fmt);
+    s << fmt.tenSuffix;
     if (explicit_precision) s.precision(old_precision);
   }
 };

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
index fe83b26..69ab684 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h

@@ -37,7 +37,7 @@
 
 template <Index n>
 struct type2index {
-  static const Index value = n;
+  static constexpr Index value = n;
   EIGEN_DEVICE_FUNC constexpr operator Index() const { return n; }
   EIGEN_DEVICE_FUNC void set(Index val) { eigen_assert(val == n); }
 };
@@ -46,8 +46,8 @@
 // such as IndexPairList<type2indexpair<1,2>, type2indexpair<3,4>>().
 template <Index f, Index s>
 struct type2indexpair {
-  static const Index first = f;
-  static const Index second = s;
+  static constexpr Index first = f;
+  static constexpr Index second = s;
 
   constexpr EIGEN_DEVICE_FUNC operator IndexPair<Index>() const { return IndexPair<Index>(f, s); }
 
@@ -134,7 +134,7 @@
   EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() {}
   EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) {}
 
-  constexpr static int count = 1 + sizeof...(O);
+  static constexpr int count = 1 + sizeof...(O);
   T head;
   IndexTuple<O...> others;
   typedef T Head;
@@ -194,11 +194,11 @@
 }
 template <typename T, typename... O>
 struct array_size<IndexTuple<T, O...>> {
-  static const size_t value = IndexTuple<T, O...>::count;
+  static constexpr size_t value = IndexTuple<T, O...>::count;
 };
 template <typename T, typename... O>
 struct array_size<const IndexTuple<T, O...>> {
-  static const size_t value = IndexTuple<T, O...>::count;
+  static constexpr size_t value = IndexTuple<T, O...>::count;
 };
 
 template <Index Idx, typename ValueT>

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 86c6bf3..2ecbb7c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h

@@ -977,11 +977,12 @@
   // Dimensions of the output of the operation.
   Dimensions m_dimensions;
   // Precomputed strides for the output tensor.
-  array<Index, NumOutputDims> m_outputStrides;
-  array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides;
-  array<Index, NumPreservedStrides> m_preservedStrides;
+  // Avoid zero-sized arrays, since element access fails to compile on GPU.
+  array<Index, (std::max)(NumOutputDims, 1)> m_outputStrides;
+  array<internal::TensorIntDivisor<Index>, (std::max)(NumOutputDims, 1)> m_fastOutputStrides;
+  array<Index, (std::max)(NumPreservedStrides, 1)> m_preservedStrides;
   // Map from output to input dimension index.
-  array<Index, NumOutputDims> m_output_to_input_dim_map;
+  array<Index, (std::max)(NumOutputDims, 1)> m_output_to_input_dim_map;
   // How many values go into each reduction
   Index m_numValuesToReduce;
 

diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 70e74c2..c6ffa0d 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h

@@ -17,21 +17,52 @@
 
 namespace internal {
 
-template <typename A, typename B>
-struct make_coherent_impl {
-  static void run(A&, B&) {}
-};
-
-// resize a to match b is a.size()==0, and conversely.
-template <typename A, typename B>
-void make_coherent(const A& a, const B& b) {
-  make_coherent_impl<A, B>::run(a.const_cast_derived(), b.const_cast_derived());
-}
-
 template <typename DerivativeType, bool Enable>
 struct auto_diff_special_op;
 
-}  // end namespace internal
+template <typename DerivativeType, typename OtherDerivativeType, typename EnableIf = void>
+struct maybe_coherent_pad_helper {
+  static constexpr int SizeAtCompileTime = DerivativeType::SizeAtCompileTime == Dynamic ||
+                                                   OtherDerivativeType::SizeAtCompileTime == Dynamic
+                                               ? Dynamic
+                                           : int(DerivativeType::SizeAtCompileTime) >
+                                                   int(OtherDerivativeType::SizeAtCompileTime)
+                                               ? DerivativeType::SizeAtCompileTime
+                                               : OtherDerivativeType::SizeAtCompileTime;
+  using type = CoherentPadOp<DerivativeType, SizeAtCompileTime>;
+  static type pad(const DerivativeType& x, const OtherDerivativeType& y) {
+    // CoherentPadOp uses variable_if_dynamic<SizeAtCompileTime>.  In this case, `SizeAtCompileTime` might
+    // by Dynamic, so we need to take the runtime maximum of x, y.
+    return CoherentPadOp<DerivativeType, SizeAtCompileTime>(x, numext::maxi(x.size(), y.size()));
+  }
+};
+
+// Both are fixed-sized and equal, don't need to pad.
+// Both are fixed-size and this is larger than other, don't need to pad.
+template <typename DerivativeType, typename OtherDerivativeType>
+struct maybe_coherent_pad_helper<
+    DerivativeType, OtherDerivativeType,
+    std::enable_if_t<DerivativeType::SizeAtCompileTime >= OtherDerivativeType::SizeAtCompileTime &&
+                     DerivativeType::SizeAtCompileTime != Dynamic &&
+                     OtherDerivativeType::SizeAtCompileTime != Dynamic>> {
+  using type = const DerivativeType&;
+  static const DerivativeType& pad(const DerivativeType& x, const OtherDerivativeType& /*y*/) { return x; }
+};
+
+template <typename DerivativeType, typename OtherDerivativeType>
+typename maybe_coherent_pad_helper<DerivativeType, OtherDerivativeType>::type MaybeCoherentPad(
+    const DerivativeType& x, const OtherDerivativeType& y) {
+  return maybe_coherent_pad_helper<DerivativeType, OtherDerivativeType>::pad(x, y);
+}
+
+template <typename Op, typename LhsDerivativeType, typename RhsDerivativeType>
+auto MakeCoherentCwiseBinaryOp(const LhsDerivativeType& x, const RhsDerivativeType& y, Op op = Op()) {
+  const auto& lhs = MaybeCoherentPad(x, y);
+  const auto& rhs = MaybeCoherentPad(y, x);
+  return CwiseBinaryOp<Op, remove_all_t<decltype(lhs)>, remove_all_t<decltype(rhs)>>(lhs, rhs, op);
+}
+
+}  // namespace internal
 
 template <typename DerivativeType>
 class AutoDiffScalar;
@@ -214,13 +245,10 @@
   }
 
   template <typename OtherDerType>
-  inline AutoDiffScalar<
-      CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const DerType, const internal::remove_all_t<OtherDerType>>>
-  operator+(const AutoDiffScalar<OtherDerType>& other) const {
-    internal::make_coherent(m_derivatives, other.derivatives());
-    return AutoDiffScalar<
-        CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const DerType, const internal::remove_all_t<OtherDerType>>>(
-        m_value + other.value(), m_derivatives + other.derivatives());
+  inline auto operator+(const AutoDiffScalar<OtherDerType>& other) const {
+    return MakeAutoDiffScalar(
+        m_value + other.value(),
+        internal::MakeCoherentCwiseBinaryOp<internal::scalar_sum_op<Scalar>>(m_derivatives, other.derivatives()));
   }
 
   template <typename OtherDerType>
@@ -245,13 +273,10 @@
   }
 
   template <typename OtherDerType>
-  inline AutoDiffScalar<
-      CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType, const internal::remove_all_t<OtherDerType>>>
-  operator-(const AutoDiffScalar<OtherDerType>& other) const {
-    internal::make_coherent(m_derivatives, other.derivatives());
-    return AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,
-                                        const internal::remove_all_t<OtherDerType>>>(
-        m_value - other.value(), m_derivatives - other.derivatives());
+  inline auto operator-(const AutoDiffScalar<OtherDerType>& other) const {
+    return MakeAutoDiffScalar(m_value - other.value(),
+                              internal::MakeCoherentCwiseBinaryOp<internal::scalar_difference_op<Scalar>>(
+                                  m_derivatives, other.derivatives()));
   }
 
   template <typename OtherDerType>
@@ -264,13 +289,11 @@
     return AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType>>(-m_value, -m_derivatives);
   }
 
-  inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType, Scalar, product)> operator*(
-      const Scalar& other) const {
+  inline auto operator*(const Scalar& other) const {
     return MakeAutoDiffScalar(m_value * other, m_derivatives * other);
   }
 
-  friend inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType, Scalar, product)> operator*(
-      const Scalar& other, const AutoDiffScalar& a) {
+  friend inline auto operator*(const Scalar& other, const AutoDiffScalar& a) {
     return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other);
   }
 
@@ -290,13 +313,11 @@
   //         a.derivatives() * other);
   //     }
 
-  inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType, Scalar, product)> operator/(
-      const Scalar& other) const {
+  inline auto operator/(const Scalar& other) const {
     return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1) / other)));
   }
 
-  friend inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType, Scalar, product)> operator/(
-      const Scalar& other, const AutoDiffScalar& a) {
+  friend inline auto operator/(const Scalar& other, const AutoDiffScalar& a) {
     return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value() * a.value())));
   }
 
@@ -317,26 +338,18 @@
   //     }
 
   template <typename OtherDerType>
-  inline AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
-      CwiseBinaryOp<internal::scalar_difference_op<Scalar> EIGEN_COMMA const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
-          DerType, Scalar, product) EIGEN_COMMA const
-                        EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(internal::remove_all_t<OtherDerType>, Scalar, product)>,
-      Scalar, product)>
-  operator/(const AutoDiffScalar<OtherDerType>& other) const {
-    internal::make_coherent(m_derivatives, other.derivatives());
+  inline auto operator/(const AutoDiffScalar<OtherDerType>& other) const {
     return MakeAutoDiffScalar(m_value / other.value(),
-                              ((m_derivatives * other.value()) - (other.derivatives() * m_value)) *
+                              internal::MakeCoherentCwiseBinaryOp<internal::scalar_difference_op<Scalar>>(
+                                  m_derivatives * other.value(), (other.derivatives() * m_value)) *
                                   (Scalar(1) / (other.value() * other.value())));
   }
 
   template <typename OtherDerType>
-  inline AutoDiffScalar<CwiseBinaryOp<
-      internal::scalar_sum_op<Scalar>, const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType, Scalar, product),
-      const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(internal::remove_all_t<OtherDerType>, Scalar, product)>>
-  operator*(const AutoDiffScalar<OtherDerType>& other) const {
-    internal::make_coherent(m_derivatives, other.derivatives());
+  inline auto operator*(const AutoDiffScalar<OtherDerType>& other) const {
     return MakeAutoDiffScalar(m_value * other.value(),
-                              (m_derivatives * other.value()) + (other.derivatives() * m_value));
+                              internal::MakeCoherentCwiseBinaryOp<internal::scalar_sum_op<Scalar>>(
+                                  m_derivatives * other.value(), other.derivatives() * m_value));
   }
 
   inline AutoDiffScalar& operator*=(const Scalar& other) {
@@ -430,64 +443,6 @@
   void operator+() const;
 };
 
-template <typename BinOp, typename A, typename B, typename RefType>
-void make_coherent_expression(CwiseBinaryOp<BinOp, A, B> xpr, const RefType& ref) {
-  make_coherent(xpr.const_cast_derived().lhs(), ref);
-  make_coherent(xpr.const_cast_derived().rhs(), ref);
-}
-
-template <typename UnaryOp, typename A, typename RefType>
-void make_coherent_expression(const CwiseUnaryOp<UnaryOp, A>& xpr, const RefType& ref) {
-  make_coherent(xpr.nestedExpression().const_cast_derived(), ref);
-}
-
-// needed for compilation only
-template <typename UnaryOp, typename A, typename RefType>
-void make_coherent_expression(const CwiseNullaryOp<UnaryOp, A>&, const RefType&) {}
-
-template <typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename B>
-struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>, B> {
-  typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A;
-  static void run(A& a, B& b) {
-    if ((A_Rows == Dynamic || A_Cols == Dynamic) && (a.size() == 0)) {
-      a.resize(b.size());
-      a.setZero();
-    } else if (B::SizeAtCompileTime == Dynamic && a.size() != 0 && b.size() == 0) {
-      make_coherent_expression(b, a);
-    }
-  }
-};
-
-template <typename A, typename B_Scalar, int B_Rows, int B_Cols, int B_Options, int B_MaxRows, int B_MaxCols>
-struct make_coherent_impl<A, Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols>> {
-  typedef Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> B;
-  static void run(A& a, B& b) {
-    if ((B_Rows == Dynamic || B_Cols == Dynamic) && (b.size() == 0)) {
-      b.resize(a.size());
-      b.setZero();
-    } else if (A::SizeAtCompileTime == Dynamic && b.size() != 0 && a.size() == 0) {
-      make_coherent_expression(a, b);
-    }
-  }
-};
-
-template <typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename B_Scalar,
-          int B_Rows, int B_Cols, int B_Options, int B_MaxRows, int B_MaxCols>
-struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,
-                          Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols>> {
-  typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A;
-  typedef Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> B;
-  static void run(A& a, B& b) {
-    if ((A_Rows == Dynamic || A_Cols == Dynamic) && (a.size() == 0)) {
-      a.resize(b.size());
-      a.setZero();
-    } else if ((B_Rows == Dynamic || B_Cols == Dynamic) && (b.size() == 0)) {
-      b.resize(a.size());
-      b.setZero();
-    }
-  }
-};
-
 }  // end namespace internal
 
 template <typename DerType, typename BinOp>
@@ -518,10 +473,7 @@
 
 #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC, CODE)                                              \
   template <typename DerType>                                                                        \
-  inline Eigen::AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(                               \
-      Eigen::internal::remove_all_t<DerType>,                                                        \
-      typename Eigen::internal::traits<Eigen::internal::remove_all_t<DerType>>::Scalar, product)>    \
-  FUNC(const Eigen::AutoDiffScalar<DerType>& x) {                                                    \
+  inline auto FUNC(const Eigen::AutoDiffScalar<DerType>& x) {                                        \
     using namespace Eigen;                                                                           \
     typedef typename Eigen::internal::traits<Eigen::internal::remove_all_t<DerType>>::Scalar Scalar; \
     EIGEN_UNUSED_VARIABLE(sizeof(Scalar));                                                           \
@@ -602,10 +554,8 @@
                                                                      x.derivatives() * (Scalar(1) / x.value()));)
 
 template <typename DerType>
-inline Eigen::AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
-    internal::remove_all_t<DerType>, typename internal::traits<internal::remove_all_t<DerType>>::Scalar, product)>
-pow(const Eigen::AutoDiffScalar<DerType>& x,
-    const typename internal::traits<internal::remove_all_t<DerType>>::Scalar& y) {
+inline auto pow(const Eigen::AutoDiffScalar<DerType>& x,
+                const typename internal::traits<internal::remove_all_t<DerType>>::Scalar& y) {
   using namespace Eigen;
   using std::pow;
   return Eigen::MakeAutoDiffScalar(pow(x.value(), y), x.derivatives() * (y * pow(x.value(), y - 1)));

diff --git a/unsupported/Eigen/src/AutoDiff/CoherentPadOp.h b/unsupported/Eigen/src/AutoDiff/CoherentPadOp.h
new file mode 100644
index 0000000..7d3a3fb
--- /dev/null
+++ b/unsupported/Eigen/src/AutoDiff/CoherentPadOp.h

@@ -0,0 +1,152 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2020 The Eigen Team.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COHERENT_PAD_OP_H
+#define EIGEN_COHERENT_PAD_OP_H
+
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+// Pads a vector with zeros to a given size.
+template <typename XprType, int SizeAtCompileTime_>
+struct CoherentPadOp;
+
+template <typename XprType, int SizeAtCompileTime_>
+struct traits<CoherentPadOp<XprType, SizeAtCompileTime_>> : public traits<XprType> {
+  typedef typename internal::remove_all<XprType>::type PlainXprType;
+  typedef typename internal::ref_selector<XprType>::type XprNested;
+  typedef typename std::remove_reference_t<XprNested> XprNested_;
+  enum : int {
+    IsRowMajor = traits<PlainXprType>::Flags & RowMajorBit,
+    SizeAtCompileTime = SizeAtCompileTime_,
+    RowsAtCompileTime = IsRowMajor ? 1 : SizeAtCompileTime,
+    ColsAtCompileTime = IsRowMajor ? SizeAtCompileTime : 1,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    Flags = traits<XprType>::Flags & ~NestByRefBit,
+  };
+};
+
+// Pads a vector with zeros to a given size.
+template <typename XprType, int SizeAtCompileTime_>
+struct CoherentPadOp : public dense_xpr_base<CoherentPadOp<XprType, SizeAtCompileTime_>>::type {
+  typedef typename internal::generic_xpr_base<CoherentPadOp<XprType, SizeAtCompileTime_>>::type Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(CoherentPadOp)
+
+  using XprNested = typename traits<CoherentPadOp>::XprNested;
+  using XprNested_ = typename traits<CoherentPadOp>::XprNested_;
+  using NestedExpression = XprNested_;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoherentPadOp() = delete;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoherentPadOp(const CoherentPadOp&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoherentPadOp(CoherentPadOp&& other) = default;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoherentPadOp(const XprType& xpr, Index size) : xpr_(xpr), size_(size) {
+    static_assert(XprNested_::IsVectorAtCompileTime, "input type must be a vector");
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprNested_& nestedExpression() const { return xpr_; }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return size_.value(); }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const {
+    return traits<CoherentPadOp>::IsRowMajor ? Index(1) : size();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const {
+    return traits<CoherentPadOp>::IsRowMajor ? size() : Index(1);
+  }
+
+ private:
+  XprNested xpr_;
+  const internal::variable_if_dynamic<Index, SizeAtCompileTime> size_;
+};
+
+// Adapted from the Replicate evaluator.
+template <typename ArgType, int SizeAtCompileTime>
+struct unary_evaluator<CoherentPadOp<ArgType, SizeAtCompileTime>>
+    : evaluator_base<CoherentPadOp<ArgType, SizeAtCompileTime>> {
+  typedef CoherentPadOp<ArgType, SizeAtCompileTime> XprType;
+  typedef typename internal::remove_all_t<typename XprType::CoeffReturnType> CoeffReturnType;
+  typedef typename internal::nested_eval<ArgType, 1>::type ArgTypeNested;
+  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
+
+  enum {
+    CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+    LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
+    Flags = evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits | LinearAccessMask | RowMajorBit),
+    Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& pad)
+      : m_arg(pad.nestedExpression()), m_argImpl(m_arg), m_size(pad.nestedExpression().size()) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
+    EIGEN_IF_CONSTEXPR(XprType::IsRowMajor) {
+      if (col < m_size.value()) {
+        return m_argImpl.coeff(1, col);
+      }
+    }
+    else {
+      if (row < m_size.value()) {
+        return m_argImpl.coeff(row, 1);
+      }
+    }
+    return CoeffReturnType(0);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
+    if (index < m_size.value()) {
+      return m_argImpl.coeff(index);
+    }
+    return CoeffReturnType(0);
+  }
+
+  template <int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
+    // AutoDiff scalar's derivative must be a vector, which is enforced by static assert.
+    // Defer to linear access for simplicity.
+    EIGEN_IF_CONSTEXPR(XprType::IsRowMajor) { return packet(col); }
+    return packet(row);
+  }
+
+  template <int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index index) const {
+    constexpr int kPacketSize = unpacket_traits<PacketType>::size;
+    if (index + kPacketSize <= m_size.value()) {
+      return m_argImpl.template packet<LoadMode, PacketType>(index);
+    } else if (index < m_size.value()) {
+      // Partial packet.
+      EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[kPacketSize];
+      const int partial = m_size.value() - index;
+      for (int i = 0; i < partial && i < kPacketSize; ++i) {
+        values[i] = m_argImpl.coeff(index + i);
+      }
+      for (int i = partial; i < kPacketSize; ++i) {
+        values[i] = CoeffReturnType(0);
+      }
+      return pload<PacketType>(values);
+    }
+    return pset1<PacketType>(CoeffReturnType(0));
+  }
+
+ protected:
+  const ArgTypeNested m_arg;
+  evaluator<ArgTypeNestedCleaned> m_argImpl;
+  const variable_if_dynamic<Index, ArgTypeNestedCleaned::SizeAtCompileTime> m_size;
+};
+
+}  // namespace internal
+
+}  // namespace Eigen
+
+#endif  // EIGEN_CWISE_BINARY_OP_H

diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index ec5bc54..8c0ce3b 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h

@@ -320,6 +320,7 @@
       internal::companion<Scalar, Deg_> companion(poly);
       companion.balance();
       m_eigenSolver.compute(companion.denseMatrix());
+      eigen_assert(m_eigenSolver.info() == Eigen::Success);
       m_roots = m_eigenSolver.eigenvalues();
       // cleanup noise in imaginary part of real roots:
       // if the imaginary part is rather small compared to the real part

diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 9ea4e7d..a885a1e 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp

@@ -175,15 +175,11 @@
   jref.setZero();
   yref.setZero();
   f(x, &yref, &jref);
-  //     std::cerr << y.transpose() << "\n\n";;
-  //     std::cerr << j << "\n\n";;
 
   j.setZero();
   y.setZero();
   AutoDiffJacobian<Func> autoj(f);
   autoj(x, &y, &j);
-  //     std::cerr << y.transpose() << "\n\n";;
-  //     std::cerr << j << "\n\n";;
 
   VERIFY_IS_APPROX(y, yref);
   VERIFY_IS_APPROX(j, jref);
@@ -277,8 +273,6 @@
   return denom.value();
 }
 
-#ifdef EIGEN_TEST_PART_5
-
 double bug_1223() {
   using std::min;
   typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
@@ -338,8 +332,6 @@
   return (y1 + y2 + y3).value();
 }
 
-#endif
-
 EIGEN_DECLARE_TEST(autodiff) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1(test_autodiff_scalar<1>());

diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu
index 83058be..629e5c0 100644
--- a/unsupported/test/cxx11_tensor_argmax_gpu.cu
+++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu

@@ -20,7 +20,7 @@
 
 template <int Layout>
 void test_gpu_simple_argmax() {
-  Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72, 53, 97));
+  Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>{72, 53, 97});
   Tensor<DenseIndex, 0, Layout> out_max;
   Tensor<DenseIndex, 0, Layout> out_min;
   in.setRandom();
@@ -43,7 +43,7 @@
   Eigen::GpuStreamDevice stream;
   Eigen::GpuDevice gpu_device(&stream);
 
-  Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned> gpu_in(d_in, Eigen::array<DenseIndex, 3>(72, 53, 97));
+  Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned> gpu_in(d_in, Eigen::array<DenseIndex, 3>{72, 53, 97});
   Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned> gpu_out_max(d_out_max);
   Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout>, Aligned> gpu_out_min(d_out_min);
 
@@ -113,7 +113,7 @@
     Eigen::GpuDevice gpu_device(&stream);
 
     Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned> gpu_in(d_in,
-                                                                          Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
+                                                                          Eigen::array<DenseIndex, 4>{2, 3, 5, 7});
     Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned> gpu_out(d_out, out_shape);
 
     gpu_out.device(gpu_device) = gpu_in.argmax(dim);
@@ -212,7 +212,7 @@
     Eigen::GpuDevice gpu_device(&stream);
 
     Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned> gpu_in(d_in,
-                                                                          Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
+                                                                          Eigen::array<DenseIndex, 4>{2, 3, 5, 7});
     Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned> gpu_out(d_out, out_shape);
 
     gpu_out.device(gpu_device) = gpu_in.argmin(dim);

diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp
index 68455b3..562ac77 100644
--- a/unsupported/test/cxx11_tensor_concatenation.cpp
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp

@@ -47,13 +47,7 @@
 
   // This can be worked around in this case.
   Tensor<int, 3, DataLayout> concatenation = left.reshape(Tensor<int, 3>::Dimensions(2, 3, 1)).concatenate(right, 0);
-  Tensor<int, 2, DataLayout> alternative = left
-                                               // Clang compiler break with {{{}}} with an ambiguous error on copy
-                                               // constructor the variadic DSize constructor added for #ifndef
-                                               // EIGEN_EMULATE_CXX11_META_H. Solution: either the code should change to
-                                               //  Tensor<int, 2>::Dimensions{{2, 3}}
-                                               // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
-                                               .concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0);
+  Tensor<int, 2, DataLayout> alternative = left.concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0);
 }
 
 template <int DataLayout>

diff --git a/unsupported/test/cxx11_tensor_contract_gpu.cu b/unsupported/test/cxx11_tensor_contract_gpu.cu
index 7088d6f..c9eebfc 100644
--- a/unsupported/test/cxx11_tensor_contract_gpu.cu
+++ b/unsupported/test/cxx11_tensor_contract_gpu.cu

@@ -28,7 +28,7 @@
   Tensor<float, 2, DataLayout> t_right(k_size, n_size);
   Tensor<float, 2, DataLayout> t_result(m_size, n_size);
   Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
-  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+  Eigen::array<DimPair, 1> dims{DimPair(1, 0)};
 
   t_left.setRandom();
   t_right.setRandom();
@@ -51,9 +51,9 @@
   Eigen::GpuStreamDevice stream;
   Eigen::GpuDevice gpu_device(&stream);
 
-  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_left(d_t_left, Eigen::array<int, 2>(m_size, k_size));
-  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_right(d_t_right, Eigen::array<int, 2>(k_size, n_size));
-  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_result(d_t_result, Eigen::array<int, 2>(m_size, n_size));
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_left(d_t_left, Eigen::array<int, 2>{m_size, k_size});
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_right(d_t_right, Eigen::array<int, 2>{k_size, n_size});
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_t_result(d_t_result, Eigen::array<int, 2>{m_size, n_size});
 
   gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
   t_result = t_left.contract(t_right, dims);
@@ -85,7 +85,7 @@
   Tensor<float, 2, DataLayout> t_right(k_size, n_size);
   Tensor<float, 0, DataLayout> t_result;
   Tensor<float, 0, DataLayout> t_result_gpu;
-  Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1));
+  Eigen::array<DimPair, 2> dims{DimPair(0, 0), DimPair(1, 1)};
 
   t_left.setRandom();
   t_right.setRandom();

diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index 0f91b10..0a26ab9 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu

@@ -140,7 +140,7 @@
   dims[0] = std::make_pair(1, 1);
   dims[1] = std::make_pair(2, 2);
 
-  Eigen::array<int, 2> shape(40, 50 * 70);
+  Eigen::array<int, 2> shape{40, 50 * 70};
 
   Eigen::DSizes<int, 2> indices(0, 0);
   Eigen::DSizes<int, 2> sizes(40, 40);
@@ -154,7 +154,7 @@
   Eigen::DSizes<int, 3> indices(0, 0, 0);
   Eigen::DSizes<int, 3> sizes(40, 49, 70);
 
-  Eigen::array<int, 1> dims(1);
+  Eigen::array<int, 1> dims{1};
   context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims);
 }
 
@@ -163,7 +163,7 @@
   Eigen::DSizes<int, 3> indices(0, 0, 0);
   Eigen::DSizes<int, 3> sizes(40, 49, 69);
 
-  Eigen::array<int, 2> dims(1, 2);
+  Eigen::array<int, 2> dims{1, 2};
   context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims);
 }
 
@@ -172,7 +172,7 @@
   Eigen::DSizes<int, 3> indices(0, 0, 0);
   Eigen::DSizes<int, 3> sizes(39, 49, 69);
 
-  Eigen::array<int, 3> dims(0, 1, 2);
+  Eigen::array<int, 3> dims{0, 1, 2};
   context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
 }
 
@@ -188,7 +188,7 @@
 template <typename DataType, typename TensorDevice>
 void test_device_memory(const TensorDevice& device) {
   int count = 100;
-  Eigen::array<int, 1> tensorRange = {{count}};
+  Eigen::array<int, 1> tensorRange{count};
   Eigen::Tensor<DataType, 1> host(tensorRange);
   Eigen::Tensor<DataType, 1> expected(tensorRange);
   DataType* device_data = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));

diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu
index b6ddc98..c5046c0 100644
--- a/unsupported/test/cxx11_tensor_gpu.cu
+++ b/unsupported/test/cxx11_tensor_gpu.cu

@@ -99,9 +99,9 @@
 }
 
 void test_gpu_elementwise_small() {
-  Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
-  Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
-  Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2));
+  Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>{2});
+  Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>{2});
+  Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>{2});
   in1.setRandom();
   in2.setRandom();
 
@@ -122,9 +122,9 @@
   Eigen::GpuStreamDevice stream;
   Eigen::GpuDevice gpu_device(&stream);
 
-  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 1>(2));
-  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 1>(2));
-  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 1>(2));
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 1>{2});
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 1>{2});
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 1>{2});
 
   gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
 
@@ -132,8 +132,8 @@
   assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
 
   for (int i = 0; i < 2; ++i) {
-    VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 1>(i)),
-                     in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i)));
+    VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 1>{i}),
+                     in1(Eigen::array<Eigen::DenseIndex, 1>{i}) + in2(Eigen::array<Eigen::DenseIndex, 1>{i}));
   }
 
   gpuFree(d_in1);
@@ -142,10 +142,10 @@
 }
 
 void test_gpu_elementwise() {
-  Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
+  Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
   in1.setRandom();
   in2.setRandom();
   in3.setRandom();
@@ -171,10 +171,10 @@
   Eigen::GpuStreamDevice stream;
   Eigen::GpuDevice gpu_device(&stream);
 
-  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
-  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72, 53, 97));
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>{72, 53, 97});
 
   gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3;
 
@@ -185,9 +185,9 @@
     for (int j = 0; j < 53; ++j) {
       for (int k = 0; k < 97; ++k) {
         VERIFY_IS_APPROX(
-            out(Eigen::array<Eigen::DenseIndex, 3>(i, j, k)),
-            in1(Eigen::array<Eigen::DenseIndex, 3>(i, j, k)) +
-                in2(Eigen::array<Eigen::DenseIndex, 3>(i, j, k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i, j, k)));
+            out(Eigen::array<Eigen::DenseIndex, 3>{i, j, k}),
+            in1(Eigen::array<Eigen::DenseIndex, 3>{i, j, k}) +
+                in2(Eigen::array<Eigen::DenseIndex, 3>{i, j, k}) * in3(Eigen::array<Eigen::DenseIndex, 3>{i, j, k}));
       }
     }
   }
@@ -284,8 +284,8 @@
   // more than 30 * 1024, which is the number of threads in blocks on
   // a 15 SM GK110 GPU
   Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
-  Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1));
-  Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1));
+  Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>{3, 31, 7, 20, 1});
+  Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>{6, 50, 7, 20, 1});
 
   t_left.setRandom();
   t_right.setRandom();
@@ -369,7 +369,7 @@
   Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
   Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74, 34, 11, 137);
 
-  Eigen::array<Eigen::DenseIndex, 1> dims(1);
+  Eigen::array<Eigen::DenseIndex, 1> dims{1};
   gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
 
   assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
@@ -421,7 +421,7 @@
   Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel, 4);
   Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out, 71, 9, 11, 7);
 
-  Eigen::array<Eigen::DenseIndex, 1> dims(0);
+  Eigen::array<Eigen::DenseIndex, 1> dims{0};
   gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
 
   assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
@@ -473,7 +473,7 @@
   Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
   Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7, 9, 11, 71);
 
-  Eigen::array<Eigen::DenseIndex, 1> dims(3);
+  Eigen::array<Eigen::DenseIndex, 1> dims{3};
   gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
 
   assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
@@ -526,7 +526,7 @@
   Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel, 3, 4);
   Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74, 35, 8, 137);
 
-  Eigen::array<Eigen::DenseIndex, 2> dims(1, 2);
+  Eigen::array<Eigen::DenseIndex, 2> dims{1, 2};
   gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
 
   assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
@@ -556,9 +556,9 @@
 
 template <int DataLayout>
 void test_gpu_convolution_3d() {
-  Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74, 37, 11, 137, 17));
+  Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>{74, 37, 11, 137, 17});
   Tensor<float, 3, DataLayout> kernel(3, 4, 2);
-  Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74, 35, 8, 136, 17));
+  Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>{74, 35, 8, 136, 17});
   input = input.constant(10.0f) + input.random();
   kernel = kernel.constant(7.0f) + kernel.random();
 
@@ -583,7 +583,7 @@
   Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel, 3, 4, 2);
   Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out, 74, 35, 8, 136, 17);
 
-  Eigen::array<Eigen::DenseIndex, 3> dims(1, 2, 3);
+  Eigen::array<Eigen::DenseIndex, 3> dims{1, 2, 3};
   gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
 
   assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
@@ -1467,97 +1467,97 @@
   CALL_SUBTEST_1(test_gpu_elementwise_small());
   CALL_SUBTEST_1(test_gpu_elementwise());
   CALL_SUBTEST_1(test_gpu_props());
-  CALL_SUBTEST_1(test_gpu_reduction());
-  CALL_SUBTEST_2(test_gpu_contraction<ColMajor>());
-  CALL_SUBTEST_2(test_gpu_contraction<RowMajor>());
-  CALL_SUBTEST_3(test_gpu_convolution_1d<ColMajor>());
-  CALL_SUBTEST_3(test_gpu_convolution_1d<RowMajor>());
-  CALL_SUBTEST_3(test_gpu_convolution_inner_dim_col_major_1d());
-  CALL_SUBTEST_3(test_gpu_convolution_inner_dim_row_major_1d());
-  CALL_SUBTEST_3(test_gpu_convolution_2d<ColMajor>());
-  CALL_SUBTEST_3(test_gpu_convolution_2d<RowMajor>());
+  CALL_SUBTEST_2(test_gpu_reduction());
+  CALL_SUBTEST_3(test_gpu_contraction<ColMajor>());
+  CALL_SUBTEST_3(test_gpu_contraction<RowMajor>());
+  CALL_SUBTEST_4(test_gpu_convolution_1d<ColMajor>());
+  CALL_SUBTEST_4(test_gpu_convolution_1d<RowMajor>());
+  CALL_SUBTEST_4(test_gpu_convolution_inner_dim_col_major_1d());
+  CALL_SUBTEST_4(test_gpu_convolution_inner_dim_row_major_1d());
+  CALL_SUBTEST_5(test_gpu_convolution_2d<ColMajor>());
+  CALL_SUBTEST_5(test_gpu_convolution_2d<RowMajor>());
 #if !defined(EIGEN_USE_HIP)
   // disable these tests on HIP for now.
   // they hang..need to investigate and fix
-  CALL_SUBTEST_3(test_gpu_convolution_3d<ColMajor>());
-  CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>());
+  CALL_SUBTEST_6(test_gpu_convolution_3d<ColMajor>());
+  CALL_SUBTEST_7(test_gpu_convolution_3d<RowMajor>());
 #endif
 
   // std::erf, std::erfc, and so on where only added in c++11. We use them
   // as a golden reference to validate the results produced by Eigen. Therefore
   // we can only run these tests if we use a c++11 compiler.
-  CALL_SUBTEST_4(test_gpu_lgamma<float>(1.0f));
-  CALL_SUBTEST_4(test_gpu_lgamma<float>(100.0f));
-  CALL_SUBTEST_4(test_gpu_lgamma<float>(0.01f));
-  CALL_SUBTEST_4(test_gpu_lgamma<float>(0.001f));
+  CALL_SUBTEST_8(test_gpu_lgamma<float>(1.0f));
+  CALL_SUBTEST_8(test_gpu_lgamma<float>(100.0f));
+  CALL_SUBTEST_8(test_gpu_lgamma<float>(0.01f));
+  CALL_SUBTEST_8(test_gpu_lgamma<float>(0.001f));
 
-  CALL_SUBTEST_4(test_gpu_lgamma<double>(1.0));
-  CALL_SUBTEST_4(test_gpu_lgamma<double>(100.0));
-  CALL_SUBTEST_4(test_gpu_lgamma<double>(0.01));
-  CALL_SUBTEST_4(test_gpu_lgamma<double>(0.001));
+  CALL_SUBTEST_8(test_gpu_lgamma<double>(1.0));
+  CALL_SUBTEST_8(test_gpu_lgamma<double>(100.0));
+  CALL_SUBTEST_8(test_gpu_lgamma<double>(0.01));
+  CALL_SUBTEST_8(test_gpu_lgamma<double>(0.001));
 
-  CALL_SUBTEST_4(test_gpu_erf<float>(1.0f));
-  CALL_SUBTEST_4(test_gpu_erf<float>(100.0f));
-  CALL_SUBTEST_4(test_gpu_erf<float>(0.01f));
-  CALL_SUBTEST_4(test_gpu_erf<float>(0.001f));
+  CALL_SUBTEST_8(test_gpu_erf<float>(1.0f));
+  CALL_SUBTEST_8(test_gpu_erf<float>(100.0f));
+  CALL_SUBTEST_8(test_gpu_erf<float>(0.01f));
+  CALL_SUBTEST_8(test_gpu_erf<float>(0.001f));
 
-  CALL_SUBTEST_4(test_gpu_erfc<float>(1.0f));
-  // CALL_SUBTEST(test_gpu_erfc<float>(100.0f));
-  CALL_SUBTEST_4(test_gpu_erfc<float>(5.0f));  // GPU erfc lacks precision for large inputs
-  CALL_SUBTEST_4(test_gpu_erfc<float>(0.01f));
-  CALL_SUBTEST_4(test_gpu_erfc<float>(0.001f));
+  CALL_SUBTEST_8(test_gpu_erfc<float>(1.0f));
+  // CALL_SUBTEST_8(test_gpu_erfc<float>(100.0f));
+  CALL_SUBTEST_8(test_gpu_erfc<float>(5.0f));  // GPU erfc lacks precision for large inputs
+  CALL_SUBTEST_8(test_gpu_erfc<float>(0.01f));
+  CALL_SUBTEST_8(test_gpu_erfc<float>(0.001f));
 
-  CALL_SUBTEST_4(test_gpu_erf<double>(1.0));
-  CALL_SUBTEST_4(test_gpu_erf<double>(100.0));
-  CALL_SUBTEST_4(test_gpu_erf<double>(0.01));
-  CALL_SUBTEST_4(test_gpu_erf<double>(0.001));
+  CALL_SUBTEST_8(test_gpu_erf<double>(1.0));
+  CALL_SUBTEST_8(test_gpu_erf<double>(100.0));
+  CALL_SUBTEST_8(test_gpu_erf<double>(0.01));
+  CALL_SUBTEST_8(test_gpu_erf<double>(0.001));
 
-  CALL_SUBTEST_4(test_gpu_erfc<double>(1.0));
-  // CALL_SUBTEST(test_gpu_erfc<double>(100.0));
-  CALL_SUBTEST_4(test_gpu_erfc<double>(5.0));  // GPU erfc lacks precision for large inputs
-  CALL_SUBTEST_4(test_gpu_erfc<double>(0.01));
-  CALL_SUBTEST_4(test_gpu_erfc<double>(0.001));
+  CALL_SUBTEST_8(test_gpu_erfc<double>(1.0));
+  // CALL_SUBTEST_8(test_gpu_erfc<double>(100.0));
+  CALL_SUBTEST_8(test_gpu_erfc<double>(5.0));  // GPU erfc lacks precision for large inputs
+  CALL_SUBTEST_8(test_gpu_erfc<double>(0.01));
+  CALL_SUBTEST_8(test_gpu_erfc<double>(0.001));
 
 #if !defined(EIGEN_USE_HIP)
   // disable these tests on HIP for now.
 
-  CALL_SUBTEST_5(test_gpu_ndtri<float>());
-  CALL_SUBTEST_5(test_gpu_ndtri<double>());
+  CALL_SUBTEST_9(test_gpu_ndtri<float>());
+  CALL_SUBTEST_9(test_gpu_ndtri<double>());
 
-  CALL_SUBTEST_5(test_gpu_digamma<float>());
-  CALL_SUBTEST_5(test_gpu_digamma<double>());
+  CALL_SUBTEST_9(test_gpu_digamma<float>());
+  CALL_SUBTEST_9(test_gpu_digamma<double>());
 
-  CALL_SUBTEST_5(test_gpu_polygamma<float>());
-  CALL_SUBTEST_5(test_gpu_polygamma<double>());
+  CALL_SUBTEST_9(test_gpu_polygamma<float>());
+  CALL_SUBTEST_9(test_gpu_polygamma<double>());
 
-  CALL_SUBTEST_5(test_gpu_zeta<float>());
-  CALL_SUBTEST_5(test_gpu_zeta<double>());
+  CALL_SUBTEST_9(test_gpu_zeta<float>());
+  CALL_SUBTEST_9(test_gpu_zeta<double>());
 #endif
 
-  CALL_SUBTEST_5(test_gpu_igamma<float>());
-  CALL_SUBTEST_5(test_gpu_igammac<float>());
+  CALL_SUBTEST_9(test_gpu_igamma<float>());
+  CALL_SUBTEST_9(test_gpu_igammac<float>());
 
-  CALL_SUBTEST_5(test_gpu_igamma<double>());
-  CALL_SUBTEST_5(test_gpu_igammac<double>());
+  CALL_SUBTEST_9(test_gpu_igamma<double>());
+  CALL_SUBTEST_9(test_gpu_igammac<double>());
 
 #if !defined(EIGEN_USE_HIP)
   // disable these tests on HIP for now.
-  CALL_SUBTEST_6(test_gpu_betainc<float>());
-  CALL_SUBTEST_6(test_gpu_betainc<double>());
+  CALL_SUBTEST_9(test_gpu_betainc<float>());
+  CALL_SUBTEST_9(test_gpu_betainc<double>());
 
-  CALL_SUBTEST_6(test_gpu_i0e<float>());
-  CALL_SUBTEST_6(test_gpu_i0e<double>());
+  CALL_SUBTEST_9(test_gpu_i0e<float>());
+  CALL_SUBTEST_9(test_gpu_i0e<double>());
 
-  CALL_SUBTEST_6(test_gpu_i1e<float>());
-  CALL_SUBTEST_6(test_gpu_i1e<double>());
+  CALL_SUBTEST_9(test_gpu_i1e<float>());
+  CALL_SUBTEST_9(test_gpu_i1e<double>());
 
-  CALL_SUBTEST_6(test_gpu_i1e<float>());
-  CALL_SUBTEST_6(test_gpu_i1e<double>());
+  CALL_SUBTEST_9(test_gpu_i1e<float>());
+  CALL_SUBTEST_9(test_gpu_i1e<double>());
 
-  CALL_SUBTEST_6(test_gpu_igamma_der_a<float>());
-  CALL_SUBTEST_6(test_gpu_igamma_der_a<double>());
+  CALL_SUBTEST_9(test_gpu_igamma_der_a<float>());
+  CALL_SUBTEST_9(test_gpu_igamma_der_a<double>());
 
-  CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<float>());
-  CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<double>());
+  CALL_SUBTEST_9(test_gpu_gamma_sample_der_alpha<float>());
+  CALL_SUBTEST_9(test_gpu_gamma_sample_der_alpha<double>());
 #endif
 }

diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp
index 16285c1..27b3230 100644
--- a/unsupported/test/cxx11_tensor_io.cpp
+++ b/unsupported/test/cxx11_tensor_io.cpp

@@ -82,6 +82,16 @@
     std::ostringstream os;
     os << t.format(Eigen::TensorIOFormat::Plain());
     VERIFY(os.str() == " (1,2) (12,3)\n(-4,2)  (0,5)\n(-1,4) (5,27)");
+
+    os.str("");
+    os.clear();
+    os << t.format(Eigen::TensorIOFormat::Numpy());
+    VERIFY(os.str() == "[[ 1+2j 12+3j]\n [-4+2j  0+5j]\n [-1+4j 5+27j]]");
+
+    os.str("");
+    os.clear();
+    os << t.format(Eigen::TensorIOFormat::Native());
+    VERIFY(os.str() == "{{ {1, 2}, {12, 3}},\n {{-4, 2},  {0, 5}},\n {{-1, 4}, {5, 27}}}");
   }
 };
 

diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
index a9a8267..88482de 100644
--- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu

@@ -281,7 +281,7 @@
   gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
 
   typedef Tensor<float, 2>::DimensionPair DimPair;
-  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+  Eigen::array<DimPair, 1> dims{DimPair(1, 0)};
   gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>();
   gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims);
 

diff --git a/unsupported/test/cxx11_tensor_scan_gpu.cu b/unsupported/test/cxx11_tensor_scan_gpu.cu
index c316f1e..c9997ad 100644
--- a/unsupported/test/cxx11_tensor_scan_gpu.cu
+++ b/unsupported/test/cxx11_tensor_scan_gpu.cu

@@ -45,9 +45,9 @@
   Eigen::GpuDevice gpu_device(&stream);
 
   Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_t_input(d_t_input,
-                                                                     Eigen::array<int, 3>(m_size, k_size, n_size));
+                                                                     Eigen::array<int, 3>{m_size, k_size, n_size});
   Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_t_result(d_t_result,
-                                                                      Eigen::array<int, 3>(m_size, k_size, n_size));
+                                                                      Eigen::array<int, 3>{m_size, k_size, n_size});
 
   gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1);
   t_result = t_input.cumsum(1);

diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 87d213e..d7396e0 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp

@@ -168,12 +168,12 @@
   CALL_SUBTEST(check_sparse_kronecker_product(SM_ab));
 
   // test dimension of result of DM = kroneckerProduct(DM,DM)
-  MatrixXd DM_a2(2, 1);
-  MatrixXd DM_b2(5, 4);
+  MatrixXd DM_a2 = Eigen::MatrixXd::Random(2, 1);
+  MatrixXd DM_b2 = Eigen::MatrixXd::Random(5, 4);
   MatrixXd DM_ab2 = kroneckerProduct(DM_a2, DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2, 2 * 5, 1 * 4));
-  DM_a2.resize(10, 9);
-  DM_b2.resize(4, 8);
+  DM_a2 = Eigen::MatrixXd::Random(10, 9);
+  DM_b2 = Eigen::MatrixXd::Random(4, 8);
   DM_ab2 = kroneckerProduct(DM_a2, DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2, 10 * 4, 9 * 8));
commit	941ca8d83f776b9a07153d3abef2877907aa0555	[log] [tgz]
author	Rasmus Munk Larsen <rmlarsen@google.com>	Fri Apr 05 13:30:15 2024 -0700
committer	Copybara-Service <copybara-worker@google.com>	Fri Apr 05 13:32:20 2024 -0700
tree	438d003173b8ca2ffec2bbcf02188401be3d1020
parent	f6c457cdc27f232d7cf7f4f9b801aa8c3f9f18c6 [diff]