No public description

PiperOrigin-RevId: 622331418
Change-Id: I0d1bdfeece05539a90f17f1aa16f1889fdb87bdf
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport
index adc5f8d..2961863 100644
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -12,7 +12,9 @@
 
 #include "src/Core/util/DisableStupidWarnings.h"
 
+extern "C" {
 #include <cholmod.h>
+}
 
 /** \ingroup Support_modules
  * \defgroup CholmodSupport_Module CholmodSupport module
diff --git a/Eigen/Core b/Eigen/Core
index ed7d353..f9d9974 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -178,7 +178,6 @@
 
 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
-#include "src/Core/RandomImpl.h"
 #include "src/Core/GenericPacketMath.h"
 #include "src/Core/MathFunctionsImpl.h"
 #include "src/Core/arch/Default/ConjHelper.h"
diff --git a/Eigen/src/Core/ArithmeticSequence.h b/Eigen/src/Core/ArithmeticSequence.h
index ae6373d..0f45e89 100644
--- a/Eigen/src/Core/ArithmeticSequence.h
+++ b/Eigen/src/Core/ArithmeticSequence.h
@@ -61,28 +61,26 @@
 template <typename FirstType, typename SizeType, typename IncrType>
 class ArithmeticSequence {
  public:
-  constexpr ArithmeticSequence() = default;
-  constexpr ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}
-  constexpr ArithmeticSequence(FirstType first, SizeType size, IncrType incr)
-      : m_first(first), m_size(size), m_incr(incr) {}
+  ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}
+  ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {}
 
   enum {
-    // SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,
+    SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,
     IncrAtCompileTime = internal::get_fixed_value<IncrType, DynamicIndex>::value
   };
 
   /** \returns the size, i.e., number of elements, of the sequence */
-  constexpr Index size() const { return m_size; }
+  Index size() const { return m_size; }
 
   /** \returns the first element \f$ a_0 \f$ in the sequence */
-  constexpr Index first() const { return m_first; }
+  Index first() const { return m_first; }
 
   /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */
-  constexpr Index operator[](Index i) const { return m_first + i * m_incr; }
+  Index operator[](Index i) const { return m_first + i * m_incr; }
 
-  constexpr const FirstType& firstObject() const { return m_first; }
-  constexpr const SizeType& sizeObject() const { return m_size; }
-  constexpr const IncrType& incrObject() const { return m_incr; }
+  const FirstType& firstObject() const { return m_first; }
+  const SizeType& sizeObject() const { return m_size; }
+  const IncrType& incrObject() const { return m_incr; }
 
  protected:
   FirstType m_first;
@@ -90,7 +88,7 @@
   IncrType m_incr;
 
  public:
-  constexpr auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) {
+  auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) {
     return seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr);
   }
 };
@@ -203,6 +201,33 @@
 
 }  // namespace placeholders
 
+namespace internal {
+
+// Convert a symbolic span into a usable one (i.e., remove last/end "keywords")
+template <typename T>
+struct make_size_type {
+  typedef std::conditional_t<symbolic::is_symbolic<T>::value, Index, T> type;
+};
+
+template <typename FirstType, typename SizeType, typename IncrType, int XprSize>
+struct IndexedViewCompatibleType<ArithmeticSequence<FirstType, SizeType, IncrType>, XprSize> {
+  typedef ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType> type;
+};
+
+template <typename FirstType, typename SizeType, typename IncrType>
+ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType> makeIndexedViewCompatible(
+    const ArithmeticSequence<FirstType, SizeType, IncrType>& ids, Index size, SpecializedType) {
+  return ArithmeticSequence<Index, typename make_size_type<SizeType>::type, IncrType>(
+      eval_expr_given_size(ids.firstObject(), size), eval_expr_given_size(ids.sizeObject(), size), ids.incrObject());
+}
+
+template <typename FirstType, typename SizeType, typename IncrType>
+struct get_compile_time_incr<ArithmeticSequence<FirstType, SizeType, IncrType> > {
+  enum { value = get_fixed_value<IncrType, DynamicIndex>::value };
+};
+
+}  // end namespace internal
+
 /** \namespace Eigen::indexing
   * \ingroup Core_Module
   *
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 49b1410..725b337 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -18,9 +18,7 @@
 namespace internal {
 template <typename ViewOp, typename MatrixType, typename StrideType>
 struct traits<CwiseUnaryView<ViewOp, MatrixType, StrideType> > : traits<MatrixType> {
-  typedef typename result_of<ViewOp(typename traits<MatrixType>::Scalar&)>::type1 ScalarRef;
-  static_assert(std::is_reference<ScalarRef>::value, "Views must return a reference type.");
-  typedef remove_all_t<ScalarRef> Scalar;
+  typedef typename result_of<ViewOp(const typename traits<MatrixType>::Scalar&)>::type Scalar;
   typedef typename MatrixType::Nested MatrixTypeNested;
   typedef remove_all_t<MatrixTypeNested> MatrixTypeNested_;
   enum {
@@ -46,77 +44,11 @@
                                    : int(StrideType::OuterStrideAtCompileTime)
   };
 };
-
-// Generic API dispatcher
-template <typename ViewOp, typename XprType, typename StrideType, typename StorageKind,
-          bool Mutable = !std::is_const<XprType>::value>
-class CwiseUnaryViewImpl : public generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type {
- public:
-  typedef typename generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
-};
-
-template <typename ViewOp, typename MatrixType, typename StrideType>
-class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false>
-    : public dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type {
- public:
-  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
-  typedef typename dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type Base;
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
-
-  EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeffRef(0)); }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const {
-    return StrideType::InnerStrideAtCompileTime != 0 ? int(StrideType::InnerStrideAtCompileTime)
-                                                     : derived().nestedExpression().innerStride() *
-                                                           sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const {
-    return StrideType::OuterStrideAtCompileTime != 0 ? int(StrideType::OuterStrideAtCompileTime)
-                                                     : derived().nestedExpression().outerStride() *
-                                                           sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar);
-  }
-
- protected:
-  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
-
-  // Allow const access to coeffRef for the case of direct access being enabled.
-  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const {
-    return internal::evaluator<Derived>(derived()).coeffRef(index);
-  }
-
-  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const {
-    return internal::evaluator<Derived>(derived()).coeffRef(row, col);
-  }
-};
-
-template <typename ViewOp, typename MatrixType, typename StrideType>
-class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, true>
-    : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false> {
- public:
-  typedef CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense, false> Base;
-  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
-
-  using Base::data;
-  EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
-    return internal::evaluator<Derived>(derived()).coeffRef(row, col);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    return internal::evaluator<Derived>(derived()).coeffRef(index);
-  }
-
- protected:
-  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
-};
-
 }  // namespace internal
 
+template <typename ViewOp, typename MatrixType, typename StrideType, typename StorageKind>
+class CwiseUnaryViewImpl;
+
 /** \class CwiseUnaryView
  * \ingroup Core_Module
  *
@@ -131,11 +63,11 @@
  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
  */
 template <typename ViewOp, typename MatrixType, typename StrideType>
-class CwiseUnaryView : public internal::CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
-                                                           typename internal::traits<MatrixType>::StorageKind> {
+class CwiseUnaryView
+    : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind> {
  public:
-  typedef typename internal::CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
-                                                typename internal::traits<MatrixType>::StorageKind>::Base Base;
+  typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType,
+                                      typename internal::traits<MatrixType>::StorageKind>::Base Base;
   EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
   typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
   typedef internal::remove_all_t<MatrixType> NestedExpression;
@@ -162,6 +94,44 @@
   ViewOp m_functor;
 };
 
-}  // namespace Eigen
+// Generic API dispatcher
+template <typename ViewOp, typename XprType, typename StrideType, typename StorageKind>
+class CwiseUnaryViewImpl : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type {
+ public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
+};
+
+template <typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, Dense>
+    : public internal::dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type {
+ public:
+  typedef CwiseUnaryView<ViewOp, MatrixType, StrideType> Derived;
+  typedef typename internal::dense_xpr_base<CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type Base;
+
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
+
+  EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
+  EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const {
+    return StrideType::InnerStrideAtCompileTime != 0
+               ? int(StrideType::InnerStrideAtCompileTime)
+               : derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) /
+                     sizeof(Scalar);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const {
+    return StrideType::OuterStrideAtCompileTime != 0
+               ? int(StrideType::OuterStrideAtCompileTime)
+               : derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) /
+                     sizeof(Scalar);
+  }
+
+ protected:
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
+};
+
+}  // end namespace Eigen
 
 #endif  // EIGEN_CWISE_UNARY_VIEW_H
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 1220073..3ec6852 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -242,18 +242,26 @@
 
 template <typename Scalar, int Size, int MaxSize>
 struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
+  enum {
+    ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+    PacketSize = internal::packet_traits<Scalar>::size
+  };
 #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
-  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax>
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0,
+                        internal::plain_enum_min(AlignedMax, PacketSize)>
       m_data;
   EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
 #else
   // Some architectures cannot align on the stack,
   // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
   internal::plain_array<
-      Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0>
+      Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + (ForceAlignment ? EIGEN_MAX_ALIGN_BYTES : 0), 0>
       m_data;
   EIGEN_STRONG_INLINE Scalar* data() {
-    return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES);
+    return ForceAlignment
+               ? reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
+                                           EIGEN_MAX_ALIGN_BYTES)
+               : m_data.array;
   }
 #endif
 };
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 58a197f..4b56f0f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -44,30 +44,24 @@
 
 struct default_packet_traits {
   enum {
-    // Ops that are implemented for most types.
     HasAdd = 1,
     HasSub = 1,
     HasShift = 1,
     HasMul = 1,
     HasNegate = 1,
     HasAbs = 1,
+    HasArg = 0,
     HasAbs2 = 1,
+    HasAbsDiff = 0,
     HasMin = 1,
     HasMax = 1,
     HasConj = 1,
     HasSetLinear = 1,
     HasSign = 1,
-
-    HasArg = 0,
-    HasAbsDiff = 0,
     HasBlend = 0,
     // This flag is used to indicate whether packet comparison is supported.
     // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
     HasCmp = 0,
-    HasRound = 0,
-    HasRint = 0,
-    HasFloor = 0,
-    HasCeil = 0,
 
     HasDiv = 0,
     HasReciprocal = 0,
@@ -79,6 +73,7 @@
     HasLog1p = 0,
     HasLog10 = 0,
     HasPow = 0,
+
     HasSin = 0,
     HasCos = 0,
     HasTan = 0,
@@ -101,7 +96,12 @@
     HasIGammaDerA = 0,
     HasGammaSampleDerAlpha = 0,
     HasIGammac = 0,
-    HasBetaInc = 0
+    HasBetaInc = 0,
+
+    HasRound = 0,
+    HasRint = 0,
+    HasFloor = 0,
+    HasCeil = 0
   };
 };
 
diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h
index 454e560..0a02417 100644
--- a/Eigen/src/Core/IndexedView.h
+++ b/Eigen/src/Core/IndexedView.h
@@ -20,8 +20,8 @@
 template <typename XprType, typename RowIndices, typename ColIndices>
 struct traits<IndexedView<XprType, RowIndices, ColIndices>> : traits<XprType> {
   enum {
-    RowsAtCompileTime = int(IndexedViewHelper<RowIndices>::SizeAtCompileTime),
-    ColsAtCompileTime = int(IndexedViewHelper<ColIndices>::SizeAtCompileTime),
+    RowsAtCompileTime = int(array_size<RowIndices>::value),
+    ColsAtCompileTime = int(array_size<ColIndices>::value),
     MaxRowsAtCompileTime = RowsAtCompileTime,
     MaxColsAtCompileTime = ColsAtCompileTime,
 
@@ -30,8 +30,8 @@
                  : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0
                                                                             : XprTypeIsRowMajor,
 
-    RowIncr = int(IndexedViewHelper<RowIndices>::IncrAtCompileTime),
-    ColIncr = int(IndexedViewHelper<ColIndices>::IncrAtCompileTime),
+    RowIncr = int(get_compile_time_incr<RowIndices>::value),
+    ColIncr = int(get_compile_time_incr<ColIndices>::value),
     InnerIncr = IsRowMajor ? ColIncr : RowIncr,
     OuterIncr = IsRowMajor ? RowIncr : ColIncr,
 
@@ -47,23 +47,24 @@
                     is_same<AllRange<InnerSize>, std::conditional_t<XprTypeIsRowMajor, ColIndices, RowIndices>>::value,
 
     InnerStrideAtCompileTime =
-        InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined
+        InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == UndefinedIncr
             ? Dynamic
             : XprInnerStride * InnerIncr,
     OuterStrideAtCompileTime =
-        OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined
+        OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == UndefinedIncr
             ? Dynamic
             : XprOuterstride * OuterIncr,
 
-    ReturnAsScalar = is_single_range<RowIndices>::value && is_single_range<ColIndices>::value,
+    ReturnAsScalar = is_same<RowIndices, SingleRange>::value && is_same<ColIndices, SingleRange>::value,
     ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
     ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),
 
     // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,
     // but this is too strict regarding negative strides...
-    DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0)
-                           ? DirectAccessBit
-                           : 0,
+    DirectAccessMask =
+        (int(InnerIncr) != UndefinedIncr && int(OuterIncr) != UndefinedIncr && InnerIncr >= 0 && OuterIncr >= 0)
+            ? DirectAccessBit
+            : 0,
     FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
     FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
     FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
@@ -74,11 +75,11 @@
   typedef Block<XprType, RowsAtCompileTime, ColsAtCompileTime, IsInnerPannel> BlockType;
 };
 
-template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
-class IndexedViewImpl;
-
 }  // namespace internal
 
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
+class IndexedViewImpl;
+
 /** \class IndexedView
  * \ingroup Core_Module
  *
@@ -119,43 +120,26 @@
  */
 template <typename XprType, typename RowIndices, typename ColIndices>
 class IndexedView
-    : public internal::IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
-                                       (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags &
-                                        DirectAccessBit) != 0> {
+    : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind> {
  public:
-  typedef typename internal::IndexedViewImpl<
-      XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
-      (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags & DirectAccessBit) != 0>
-      Base;
+  typedef
+      typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base
+          Base;
   EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView)
   EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
 
-  template <typename T0, typename T1>
-  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
-};
-
-namespace internal {
-
-// Generic API dispatcher
-template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
-class IndexedViewImpl : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type {
- public:
-  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type Base;
   typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
   typedef internal::remove_all_t<XprType> NestedExpression;
-  typedef typename XprType::Scalar Scalar;
-
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
 
   template <typename T0, typename T1>
-  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices)
+  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)
       : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) {}
 
   /** \returns number of rows */
-  Index rows() const { return IndexedViewHelper<RowIndices>::size(m_rowIndices); }
+  Index rows() const { return internal::index_list_size(m_rowIndices); }
 
   /** \returns number of columns */
-  Index cols() const { return IndexedViewHelper<ColIndices>::size(m_colIndices); }
+  Index cols() const { return internal::index_list_size(m_colIndices); }
 
   /** \returns the nested expression */
   const internal::remove_all_t<XprType>& nestedExpression() const { return m_xpr; }
@@ -169,77 +153,21 @@
   /** \returns a const reference to the object storing/generating the column indices */
   const ColIndices& colIndices() const { return m_colIndices; }
 
-  constexpr Scalar& coeffRef(Index rowId, Index colId) {
-    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
-  }
-
-  constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
-    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
-  }
-
  protected:
   MatrixTypeNested m_xpr;
   RowIndices m_rowIndices;
   ColIndices m_colIndices;
 };
 
+// Generic API dispatcher
 template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
-class IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, true>
-    : public IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, false> {
+class IndexedViewImpl : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type {
  public:
-  using Base = internal::IndexedViewImpl<XprType, RowIndices, ColIndices,
-                                         typename internal::traits<XprType>::StorageKind, false>;
-  using Derived = IndexedView<XprType, RowIndices, ColIndices>;
-
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
-
-  template <typename T0, typename T1>
-  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
-
-  Index rowIncrement() const {
-    if (traits<Derived>::RowIncr != DynamicIndex && traits<Derived>::RowIncr != Undefined) {
-      return traits<Derived>::RowIncr;
-    }
-    return IndexedViewHelper<RowIndices>::incr(this->rowIndices());
-  }
-  Index colIncrement() const {
-    if (traits<Derived>::ColIncr != DynamicIndex && traits<Derived>::ColIncr != Undefined) {
-      return traits<Derived>::ColIncr;
-    }
-    return IndexedViewHelper<ColIndices>::incr(this->colIndices());
-  }
-
-  Index innerIncrement() const { return traits<Derived>::IsRowMajor ? colIncrement() : rowIncrement(); }
-
-  Index outerIncrement() const { return traits<Derived>::IsRowMajor ? rowIncrement() : colIncrement(); }
-
-  std::decay_t<typename XprType::Scalar>* data() {
-    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
-    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
-    return this->nestedExpression().data() + row_offset + col_offset;
-  }
-
-  const std::decay_t<typename XprType::Scalar>* data() const {
-    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
-    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
-    return this->nestedExpression().data() + row_offset + col_offset;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT {
-    if (traits<Derived>::InnerStrideAtCompileTime != Dynamic) {
-      return traits<Derived>::InnerStrideAtCompileTime;
-    }
-    return innerIncrement() * this->nestedExpression().innerStride();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT {
-    if (traits<Derived>::OuterStrideAtCompileTime != Dynamic) {
-      return traits<Derived>::OuterStrideAtCompileTime;
-    }
-    return outerIncrement() * this->nestedExpression().outerStride();
-  }
+  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type Base;
 };
 
+namespace internal {
+
 template <typename ArgType, typename RowIndices, typename ColIndices>
 struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
     : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices>> {
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index f907d1e..c92572f 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -164,7 +164,7 @@
   typedef typename NumTraits<Scalar>::Real RealScalar;
   EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast<RealScalar*>(&x)[1]; }
   EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) {
-    return reinterpret_cast<const RealScalar*>(&x)[1];
+    return reinterpret_cast<RealScalar*>(&x)[1];
   }
 };
 
@@ -604,6 +604,7 @@
 struct count_bits_impl {
   static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
                 "BitsType must be an unsigned integer");
+
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     int n = CHAR_BIT * sizeof(BitsType);
     int shift = n / 2;
@@ -654,9 +655,9 @@
 #if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 
 template <typename BitsType>
-struct count_bits_impl<
-    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned int)>> {
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
@@ -668,10 +669,10 @@
 };
 
 template <typename BitsType>
-struct count_bits_impl<BitsType,
-                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned int) < sizeof(BitsType) &&
-                                        sizeof(BitsType) <= sizeof(unsigned long)>> {
+struct count_bits_impl<
+    BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
@@ -683,10 +684,10 @@
 };
 
 template <typename BitsType>
-struct count_bits_impl<BitsType,
-                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
-                                        sizeof(BitsType) <= sizeof(unsigned long long)>> {
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) &&
+                                                  sizeof(BitsType) <= sizeof(unsigned long long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
     return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
@@ -700,9 +701,9 @@
 #elif EIGEN_COMP_MSVC
 
 template <typename BitsType>
-struct count_bits_impl<
-    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned long)>> {
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     unsigned long out;
     _BitScanReverse(&out, static_cast<unsigned long>(bits));
@@ -719,10 +720,10 @@
 #ifdef _WIN64
 
 template <typename BitsType>
-struct count_bits_impl<BitsType,
-                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
-                                        sizeof(BitsType) <= sizeof(__int64)>> {
+struct count_bits_impl<
+    BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> {
   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
     unsigned long out;
     _BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
@@ -741,27 +742,186 @@
 #endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 
 template <typename BitsType>
-struct log_2_impl {
-  static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT;
-  static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) {
-    const int n = kTotalBits - clz(x);
-    bool power_of_two = (x & (x - 1)) == 0;
-    return x == 0 ? 0 : power_of_two ? (n - 1) : n;
-  }
-  static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) {
-    const int n = kTotalBits - clz(x);
-    return x == 0 ? 0 : n - 1;
-  }
-};
-
-template <typename BitsType>
-int log2_ceil(const BitsType& x) {
-  return log_2_impl<BitsType>::run_ceil(x);
+int log2_ceil(BitsType x) {
+  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
+  bool powerOfTwo = (x & (x - 1)) == 0;
+  return x == 0 ? 0 : powerOfTwo ? n - 1 : n;
 }
 
 template <typename BitsType>
-int log2_floor(const BitsType& x) {
-  return log_2_impl<BitsType>::run_floor(x);
+int log2_floor(BitsType x) {
+  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
+  return x == 0 ? 0 : n - 1;
+}
+
+/****************************************************************************
+ * Implementation of random                                               *
+ ****************************************************************************/
+
+// return a Scalar filled with numRandomBits beginning from the least significant bit
+template <typename Scalar>
+Scalar getRandomBits(int numRandomBits) {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  enum : int {
+    StdRandBits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value,
+    ScalarBits = sizeof(Scalar) * CHAR_BIT
+  };
+  eigen_assert((numRandomBits >= 0) && (numRandomBits <= ScalarBits));
+  const BitsType mask = BitsType(-1) >> ((ScalarBits - numRandomBits) & (ScalarBits - 1));
+  BitsType randomBits = BitsType(0);
+  for (int shift = 0; shift < numRandomBits; shift += StdRandBits) {
+    int r = std::rand();
+    randomBits |= static_cast<BitsType>(r) << shift;
+  }
+  // clear the excess bits
+  randomBits &= mask;
+  return numext::bit_cast<Scalar, BitsType>(randomBits);
+}
+
+template <typename Scalar, bool IsComplex, bool IsInteger>
+struct random_default_impl {};
+
+template <typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template <typename Scalar>
+struct random_retval {
+  typedef Scalar type;
+};
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+
+template <typename Scalar>
+struct random_default_impl<Scalar, false, false> {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
+    Scalar half_x = Scalar(0.5) * x;
+    Scalar half_y = Scalar(0.5) * y;
+    Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits);
+    // result is in the half-open interval [x, y) -- provided that x < y
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    return run(x, y, mantissa_bits);
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissa_bits);
+    BitsType randomBits = getRandomBits<BitsType>(numRandomBits);
+    // if fewer than MantissaBits is requested, shift them to the left
+    randomBits <<= (mantissa_bits - numRandomBits);
+    // randomBits is in the half-open interval [2,4)
+    randomBits |= numext::bit_cast<BitsType>(Scalar(2));
+    // result is in the half-open interval [-1,1)
+    Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3);
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    return run(mantissa_bits);
+  }
+};
+
+// TODO: fix this for PPC
+template <bool Specialize = sizeof(long double) == 2 * sizeof(uint64_t) && !EIGEN_ARCH_PPC>
+struct random_longdouble_impl {
+  enum : int {
+    Size = sizeof(long double),
+    MantissaBits = NumTraits<long double>::digits() - 1,
+    LowBits = MantissaBits > 64 ? 64 : MantissaBits,
+    HighBits = MantissaBits > 64 ? MantissaBits - 64 : 0
+  };
+  static EIGEN_DEVICE_FUNC inline long double run() {
+    EIGEN_USING_STD(memcpy)
+    uint64_t randomBits[2];
+    long double result = 2.0L;
+    memcpy(&randomBits, &result, Size);
+    randomBits[0] |= getRandomBits<uint64_t>(LowBits);
+    randomBits[1] |= getRandomBits<uint64_t>(HighBits);
+    memcpy(&result, &randomBits, Size);
+    result -= 3.0L;
+    return result;
+  }
+};
+template <>
+struct random_longdouble_impl<false> {
+  using Impl = random_impl<double>;
+  static EIGEN_DEVICE_FUNC inline long double run() { return static_cast<long double>(Impl::run()); }
+};
+
+template <>
+struct random_impl<long double> {
+  static EIGEN_DEVICE_FUNC inline long double run(const long double& x, const long double& y) {
+    long double half_x = 0.5L * x;
+    long double half_y = 0.5L * y;
+    long double result = (half_x + half_y) + (half_y - half_x) * run();
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline long double run() { return random_longdouble_impl<>::run(); }
+};
+
+template <typename Scalar>
+struct random_default_impl<Scalar, false, true> {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  enum : int { ScalarBits = sizeof(Scalar) * CHAR_BIT };
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    if (y <= x) return x;
+    const BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x) + 1;
+    // handle edge case where [x,y] spans the entire range of Scalar
+    if (range == 0) return getRandomBits<Scalar>(ScalarBits);
+    // calculate the number of random bits needed to fill range
+    const int numRandomBits = log2_ceil(range);
+    BitsType randomBits;
+    do {
+      randomBits = getRandomBits<BitsType>(numRandomBits);
+      // if the random draw is outside [0, range), try again (rejection sampling)
+      // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50%
+    } while (randomBits >= range);
+    Scalar result = x + static_cast<Scalar>(randomBits);
+    return result;
+  }
+
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
+#ifdef EIGEN_MAKING_DOCS
+    return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
+#else
+    return getRandomBits<Scalar>(ScalarBits);
+#endif
+  }
+};
+
+template <>
+struct random_impl<bool> {
+  static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) {
+    if (y <= x) return x;
+    return run();
+  }
+  static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<int>(1) ? true : false; }
+};
+
+template <typename Scalar>
+struct random_default_impl<Scalar, true, false> {
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag()));
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    return Scalar(random<RealScalar>(), random<RealScalar>());
+  }
+};
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) {
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() {
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
 }
 
 // Implementation of is* functions
@@ -820,7 +980,7 @@
 template <typename T>
 EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
 template <typename T>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x);
+T generic_fast_tanh_float(const T& a_x);
 
 /****************************************************************************
  * Implementation of sign                                                 *
@@ -1375,25 +1535,6 @@
   return exp(x);
 }
 
-// MSVC screws up some edge-cases for std::exp(complex).
-#ifdef EIGEN_COMP_MSVC
-template <typename RealScalar>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<RealScalar> exp(const std::complex<RealScalar>& x) {
-  EIGEN_USING_STD(exp);
-  // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised.
-  // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised.
-  if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) {
-    return std::complex<RealScalar>(NumTraits<RealScalar>::quiet_NaN(), NumTraits<RealScalar>::quiet_NaN());
-  }
-  // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
-  // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
-  if ((real_ref(x) == NumTraits<RealScalar>::infinity() && !(isfinite)(imag_ref(x)))) {
-    return std::complex<RealScalar>(NumTraits<RealScalar>::infinity(), NumTraits<RealScalar>::quiet_NaN());
-  }
-  return exp(x);
-}
-#endif
-
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
 #endif
@@ -1657,7 +1798,7 @@
 }
 
 #if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); }
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); }
 #endif
 
 #if defined(SYCL_DEVICE_ONLY)
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 689c6d8..ed44089 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -146,6 +146,65 @@
   }
 };
 
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+    is accurate up to a couple of ulps in the (approximate) range [-8, 8],
+    outside of which tanh(x) = +/-1 in single precision. The input is clamped
+    to the range [-c, c]. The value c is chosen as the smallest value where
+    the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
+    the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
+
+    This implementation works on both scalars and packets.
+*/
+template <typename T>
+T generic_fast_tanh_float(const T& a_x) {
+  // Clamp the inputs to the range [-c, c]
+#ifdef EIGEN_VECTORIZE_FMA
+  const T plus_clamp = pset1<T>(7.99881172180175781f);
+  const T minus_clamp = pset1<T>(-7.99881172180175781f);
+#else
+  const T plus_clamp = pset1<T>(7.90531110763549805f);
+  const T minus_clamp = pset1<T>(-7.90531110763549805f);
+#endif
+  const T tiny = pset1<T>(0.0004f);
+  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
+  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(4.89352518554385e-03f);
+  const T beta_2 = pset1<T>(2.26843463243900e-03f);
+  const T beta_4 = pset1<T>(1.18534705686654e-04f);
+  const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial q.
+  T q = pmadd(x2, beta_6, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pselect(tiny_mask, x, pdiv(p, q));
+}
+
 template <typename RealScalar>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) {
   // IEEE IEC 6059 special cases.
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index af6afaf..ce0e4e6 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -30,7 +30,7 @@
     actual_alignment = ((Options_ & DontAlign) == 0) ? default_alignment : 0,
     required_alignment = unpacket_traits<PacketScalar>::alignment,
     packet_access_bit = (packet_traits<Scalar_>::Vectorizable &&
-                         (EIGEN_UNALIGNED_VECTORIZE || (int(actual_alignment) >= int(required_alignment))))
+                         (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment >= required_alignment)))
                             ? PacketAccessBit
                             : 0
   };
@@ -48,7 +48,7 @@
     Flags = compute_matrix_flags(Options_),
     Options = Options_,
     InnerStrideAtCompileTime = 1,
-    OuterStrideAtCompileTime = (int(Options) & int(RowMajor)) ? ColsAtCompileTime : RowsAtCompileTime,
+    OuterStrideAtCompileTime = (Options & RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
 
     // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
     EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
@@ -207,7 +207,7 @@
    *
    * \callgraph
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) { return Base::_set(other); }
 
   /** \internal
    * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
@@ -250,18 +250,17 @@
    *
    * \sa resize(Index,Index)
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix()
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix()
       : Base(){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
 
         // FIXME is it still needed
-        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit Matrix(
-            internal::constructor_without_unaligned_array_assert)
+        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
 
-        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix && other)
+        EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(Matrix && other)
             EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
       : Base(std::move(other)) {}
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(Matrix&& other)
       EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
     Base::operator=(std::move(other));
     return *this;
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 5f846a0..f9bf737 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -31,7 +31,6 @@
 
 namespace internal {
 
-#ifndef EIGEN_NO_DEBUG
 template <int MaxSizeAtCompileTime, int MaxRowsAtCompileTime, int MaxColsAtCompileTime>
 struct check_rows_cols_for_overflow {
   EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime* MaxColsAtCompileTime == MaxSizeAtCompileTime,
@@ -45,7 +44,7 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = cols > (MaxIndex / MaxRowsAtCompileTime);
+    bool error = cols > MaxIndex / MaxRowsAtCompileTime;
     if (error) throw_std_bad_alloc();
   }
 };
@@ -55,7 +54,7 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = rows > (MaxIndex / MaxColsAtCompileTime);
+    bool error = rows > MaxIndex / MaxColsAtCompileTime;
     if (error) throw_std_bad_alloc();
   }
 };
@@ -65,11 +64,10 @@
   template <typename Index>
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
     constexpr Index MaxIndex = NumTraits<Index>::highest();
-    bool error = cols == 0 ? false : (rows > (MaxIndex / cols));
+    bool error = cols == 0 ? false : (rows > MaxIndex / cols);
     if (error) throw_std_bad_alloc();
   }
 };
-#endif
 
 template <typename Derived, typename OtherDerived = Derived,
           bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
@@ -299,10 +297,8 @@
                  internal::check_implication(ColsAtCompileTime == Dynamic && MaxColsAtCompileTime != Dynamic,
                                              cols <= MaxColsAtCompileTime) &&
                  rows >= 0 && cols >= 0 && "Invalid sizes when resizing a matrix or array.");
-#ifndef EIGEN_NO_DEBUG
     internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(rows,
                                                                                                                   cols);
-#endif
 #ifdef EIGEN_INITIALIZE_COEFFS
     Index size = rows * cols;
     bool size_changed = size != this->size();
@@ -371,10 +367,8 @@
   template <typename OtherDerived>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other) {
     const OtherDerived& other = _other.derived();
-#ifndef EIGEN_NO_DEBUG
     internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(
         other.rows(), other.cols());
-#endif
     const Index othersize = other.rows() * other.cols();
     if (RowsAtCompileTime == 1) {
       eigen_assert(other.rows() == 1 || other.cols() == 1);
@@ -452,9 +446,7 @@
   /** This is a special case of the templated operator=. Its purpose is to
    * prevent a default operator= from hiding the templated operator=.
    */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) {
-    return _set(other);
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); }
 
   /** \sa MatrixBase::lazyAssign() */
   template <typename OtherDerived>
@@ -472,29 +464,28 @@
   // Prevent user from trying to instantiate PlainObjectBase objects
   // by making all its constructor protected. See bug 1074.
  protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() {
     //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
   }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   // FIXME is it still needed ?
   /** \internal */
-  EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+  EIGEN_DEVICE_FUNC explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert()) {
     // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
   }
 #endif
 
-  EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
-      : m_storage(std::move(other.m_storage)) {}
+  EIGEN_DEVICE_FUNC PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT : m_storage(std::move(other.m_storage)) {}
 
-  EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
+  EIGEN_DEVICE_FUNC PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
     m_storage = std::move(other.m_storage);
     return *this;
   }
 
   /** Copy constructor */
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
       : Base(), m_storage(other.m_storage) {}
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
       : m_storage(size, rows, cols) {
@@ -752,7 +743,7 @@
   // aliasing is dealt once in internal::call_assignment
   // so at this stage we have to assume aliasing... and resising has to be done later.
   template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other) {
     internal::call_assignment(this->derived(), other.derived());
     return this->derived();
   }
@@ -763,7 +754,7 @@
    * \sa operator=(const MatrixBase<OtherDerived>&), _set()
    */
   template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
     // I don't think we need this resize call since the lazyAssign will anyways resize
     // and lazyAssign will be called by the assign selector.
     //_resize_to_match(other);
@@ -950,10 +941,8 @@
         ((Derived::IsRowMajor && _this.cols() == cols) ||  // row-major and we change only the number of rows
          (!Derived::IsRowMajor && _this.rows() == rows)))  // column-major and we change only the number of columns
     {
-#ifndef EIGEN_NO_DEBUG
       internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime, Derived::MaxRowsAtCompileTime,
                                              Derived::MaxColsAtCompileTime>::run(rows, cols);
-#endif
       _this.derived().m_storage.conservativeResize(rows * cols, rows, cols);
     } else {
       // The storage order does not allow us to use reallocation.
diff --git a/Eigen/src/Core/RandomImpl.h b/Eigen/src/Core/RandomImpl.h
deleted file mode 100644
index 445376c..0000000
--- a/Eigen/src/Core/RandomImpl.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2024 Charles Schlosser <cs.schlosser@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_RANDOM_IMPL_H
-#define EIGEN_RANDOM_IMPL_H
-
-// IWYU pragma: private
-#include "./InternalHeaderCheck.h"
-
-namespace Eigen {
-
-namespace internal {
-
-/****************************************************************************
- * Implementation of random                                               *
- ****************************************************************************/
-
-template <typename Scalar, bool IsComplex, bool IsInteger>
-struct random_default_impl {};
-
-template <typename Scalar>
-struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
-
-template <typename Scalar>
-struct random_retval {
-  typedef Scalar type;
-};
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) {
-  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
-}
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() {
-  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
-}
-
-// TODO: replace or provide alternatives to this, e.g. std::random_device
-struct eigen_random_device {
-  using ReturnType = int;
-  static constexpr int Entropy = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value;
-  static constexpr ReturnType Highest = RAND_MAX;
-  static EIGEN_DEVICE_FUNC inline ReturnType run() { return std::rand(); };
-};
-
-// Fill a built-in unsigned integer with numRandomBits beginning with the least significant bit
-template <typename Scalar>
-struct random_bits_impl {
-  EIGEN_STATIC_ASSERT(std::is_unsigned<Scalar>::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER)
-  using RandomDevice = eigen_random_device;
-  using RandomReturnType = typename RandomDevice::ReturnType;
-  static constexpr int kEntropy = RandomDevice::Entropy;
-  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
-  // return a Scalar filled with numRandomBits beginning from the least significant bit
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
-    eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits));
-    const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1));
-    Scalar randomBits = 0;
-    for (int shift = 0; shift < numRandomBits; shift += kEntropy) {
-      RandomReturnType r = RandomDevice::run();
-      randomBits |= static_cast<Scalar>(r) << shift;
-    }
-    // clear the excess bits
-    randomBits &= mask;
-    return randomBits;
-  }
-};
-
-template <typename BitsType>
-EIGEN_DEVICE_FUNC inline BitsType getRandomBits(int numRandomBits) {
-  return random_bits_impl<BitsType>::run(numRandomBits);
-}
-
-// random implementation for a built-in floating point type
-template <typename Scalar, bool BuiltIn = std::is_floating_point<Scalar>::value>
-struct random_float_impl {
-  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
-  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() {
-    const int digits = NumTraits<Scalar>::digits();
-    return digits - 1;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
-    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
-    BitsType randomBits = getRandomBits<BitsType>(numRandomBits);
-    // if fewer than MantissaBits is requested, shift them to the left
-    randomBits <<= (mantissaBits() - numRandomBits);
-    // randomBits is in the half-open interval [2,4)
-    randomBits |= numext::bit_cast<BitsType>(Scalar(2));
-    // result is in the half-open interval [-1,1)
-    Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3);
-    return result;
-  }
-};
-// random implementation for a custom floating point type
-// uses double as the implementation with a mantissa with a size equal to either the target scalar's mantissa or that of
-// double, whichever is smaller
-template <typename Scalar>
-struct random_float_impl<Scalar, false> {
-  static EIGEN_DEVICE_FUNC inline int mantissaBits() {
-    const int digits = NumTraits<Scalar>::digits();
-    constexpr int kDoubleDigits = NumTraits<double>::digits();
-    return numext::mini(digits, kDoubleDigits) - 1;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
-    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
-    Scalar result = static_cast<Scalar>(random_float_impl<double>::run(numRandomBits));
-    return result;
-  }
-};
-
-// random implementation for long double
-// this specialization is not compatible with double-double scalars
-template <bool Specialize = (sizeof(long double) == 2 * sizeof(uint64_t)) &&
-                            ((std::numeric_limits<long double>::digits != (2 * std::numeric_limits<double>::digits)))>
-struct random_longdouble_impl {
-  static constexpr int Size = sizeof(long double);
-  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits<long double>::digits() - 1; }
-  static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) {
-    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits());
-    EIGEN_USING_STD(memcpy);
-    int numLowBits = numext::mini(numRandomBits, 64);
-    int numHighBits = numext::maxi(numRandomBits - 64, 0);
-    uint64_t randomBits[2];
-    long double result = 2.0L;
-    memcpy(&randomBits, &result, Size);
-    randomBits[0] |= getRandomBits<uint64_t>(numLowBits);
-    randomBits[1] |= getRandomBits<uint64_t>(numHighBits);
-    memcpy(&result, &randomBits, Size);
-    result -= 3.0L;
-    return result;
-  }
-};
-template <>
-struct random_longdouble_impl<false> {
-  static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { return NumTraits<double>::digits() - 1; }
-  static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) {
-    return static_cast<long double>(random_float_impl<double>::run(numRandomBits));
-  }
-};
-template <>
-struct random_float_impl<long double> : random_longdouble_impl<> {};
-
-template <typename Scalar>
-struct random_default_impl<Scalar, false, false> {
-  using Impl = random_float_impl<Scalar>;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
-    Scalar half_x = Scalar(0.5) * x;
-    Scalar half_y = Scalar(0.5) * y;
-    Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits);
-    // result is in the half-open interval [x, y) -- provided that x < y
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    return run(x, y, Impl::mantissaBits());
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { return Impl::run(numRandomBits); }
-  static EIGEN_DEVICE_FUNC inline Scalar run() { return run(Impl::mantissaBits()); }
-};
-
-template <typename Scalar, bool IsSigned = NumTraits<Scalar>::IsSigned, bool BuiltIn = std::is_integral<Scalar>::value>
-struct random_int_impl;
-
-// random implementation for a built-in unsigned integer type
-template <typename Scalar>
-struct random_int_impl<Scalar, false, true> {
-  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    if (y <= x) return x;
-    Scalar range = y - x;
-    // handle edge case where [x,y] spans the entire range of Scalar
-    if (range == NumTraits<Scalar>::highest()) return run();
-    Scalar count = range + 1;
-    // calculate the number of random bits needed to fill range
-    int numRandomBits = log2_ceil(count);
-    Scalar randomBits;
-    do {
-      randomBits = getRandomBits<Scalar>(numRandomBits);
-      // if the random draw is outside [0, range), try again (rejection sampling)
-      // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50%
-    } while (randomBits >= count);
-    Scalar result = x + randomBits;
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run() { return getRandomBits<Scalar>(kTotalBits); }
-};
-
-// random implementation for a built-in signed integer type
-template <typename Scalar>
-struct random_int_impl<Scalar, true, true> {
-  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
-  using BitsType = typename make_unsigned<Scalar>::type;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    if (y <= x) return x;
-    // Avoid overflow by representing `range` as an unsigned type
-    BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x);
-    BitsType randomBits = random_int_impl<BitsType>::run(0, range);
-    // Avoid overflow in the case where `x` is negative and there is a large range so
-    // `randomBits` would also be negative if cast to `Scalar` first.
-    Scalar result = static_cast<Scalar>(static_cast<BitsType>(x) + randomBits);
-    return result;
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run() { return static_cast<Scalar>(getRandomBits<BitsType>(kTotalBits)); }
-};
-
-// todo: custom integers
-template <typename Scalar, bool IsSigned>
-struct random_int_impl<Scalar, IsSigned, false> {
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&, const Scalar&) { return run(); }
-  static EIGEN_DEVICE_FUNC inline Scalar run() {
-    eigen_assert(std::false_type::value && "RANDOM FOR CUSTOM INTEGERS NOT YET SUPPORTED");
-    return Scalar(0);
-  }
-};
-
-template <typename Scalar>
-struct random_default_impl<Scalar, false, true> : random_int_impl<Scalar> {};
-
-template <>
-struct random_impl<bool> {
-  static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) {
-    if (y <= x) return x;
-    return run();
-  }
-  static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<unsigned>(1) ? true : false; }
-};
-
-template <typename Scalar>
-struct random_default_impl<Scalar, true, false> {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  using Impl = random_impl<RealScalar>;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
-    return Scalar(Impl::run(x.real(), y.real(), numRandomBits), Impl::run(x.imag(), y.imag(), numRandomBits));
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
-    return Scalar(Impl::run(x.real(), y.real()), Impl::run(x.imag(), y.imag()));
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
-    return Scalar(Impl::run(numRandomBits), Impl::run(numRandomBits));
-  }
-  static EIGEN_DEVICE_FUNC inline Scalar run() { return Scalar(Impl::run(), Impl::run()); }
-};
-
-}  // namespace internal
-}  // namespace Eigen
-
-#endif  // EIGEN_RANDOM_IMPL_H
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 2b1683b..afdb242 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -184,8 +184,7 @@
   enum {
     Mode = Mode_,
     Flags = internal::traits<TriangularView>::Flags,
-    TransposeMode = (int(Mode) & int(Upper) ? Lower : 0) | (int(Mode) & int(Lower) ? Upper : 0) |
-                    (int(Mode) & int(UnitDiag)) | (int(Mode) & int(ZeroDiag)),
+    TransposeMode = (Mode & Upper ? Lower : 0) | (Mode & Lower ? Upper : 0) | (Mode & (UnitDiag)) | (Mode & (ZeroDiag)),
     IsVectorAtCompileTime = false
   };
 
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index bae5714..6a8bee8 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -41,7 +41,6 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
-    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -444,11 +443,6 @@
   return plog_complex<Packet4cf>(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
-  return pexp_complex<Packet4cf>(a);
-}
-
 }  // end namespace internal
 
 }  // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 2383e46..d752f06 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -270,7 +270,9 @@
 template <>
 struct packet_traits<int64_t> : default_packet_traits {
   typedef Packet4l type;
-  typedef Packet2l half;
+  // There is no half-size packet for current Packet4l.
+  // TODO: support as SSE path.
+  typedef Packet4l half;
   enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 4 };
 };
 template <>
@@ -330,9 +332,6 @@
 struct unpacket_traits<Packet4d> {
   typedef double type;
   typedef Packet2d half;
-#ifdef EIGEN_VECTORIZE_AVX2
-  typedef Packet4l integer_packet;
-#endif
   enum {
     size = 4,
     alignment = Aligned32,
@@ -369,7 +368,7 @@
 template <>
 struct unpacket_traits<Packet4l> {
   typedef int64_t type;
-  typedef Packet2l half;
+  typedef Packet4l half;
   enum {
     size = 4,
     alignment = Aligned32,
@@ -562,7 +561,7 @@
 }
 template <int N>
 EIGEN_STRONG_INLINE std::enable_if_t<(N == 63), Packet4l> parithmetic_shift_right(Packet4l a) {
-  return _mm256_cmpgt_epi64(_mm256_setzero_si256(), a);
+  return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask));
 }
 template <int N>
 EIGEN_STRONG_INLINE std::enable_if_t<(N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) {
@@ -624,22 +623,22 @@
 template <>
 EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet4l>(int64_t* to, const Packet4l& from, Index stride) {
   __m128i low = _mm256_extractf128_si256(from, 0);
-  to[stride * 0] = _mm_extract_epi64_0(low);
-  to[stride * 1] = _mm_extract_epi64_1(low);
+  to[stride * 0] = _mm_extract_epi64(low, 0);
+  to[stride * 1] = _mm_extract_epi64(low, 1);
 
   __m128i high = _mm256_extractf128_si256(from, 1);
-  to[stride * 2] = _mm_extract_epi64_0(high);
-  to[stride * 3] = _mm_extract_epi64_1(high);
+  to[stride * 2] = _mm_extract_epi64(high, 0);
+  to[stride * 3] = _mm_extract_epi64(high, 1);
 }
 template <>
 EIGEN_DEVICE_FUNC inline void pscatter<uint64_t, Packet4ul>(uint64_t* to, const Packet4ul& from, Index stride) {
   __m128i low = _mm256_extractf128_si256(from, 0);
-  to[stride * 0] = _mm_extract_epi64_0(low);
-  to[stride * 1] = _mm_extract_epi64_1(low);
+  to[stride * 0] = _mm_extract_epi64(low, 0);
+  to[stride * 1] = _mm_extract_epi64(low, 1);
 
   __m128i high = _mm256_extractf128_si256(from, 1);
-  to[stride * 2] = _mm_extract_epi64_0(high);
-  to[stride * 3] = _mm_extract_epi64_1(high);
+  to[stride * 2] = _mm_extract_epi64(high, 0);
+  to[stride * 3] = _mm_extract_epi64(high, 1);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore1<Packet4l>(int64_t* to, const int64_t& a) {
@@ -653,21 +652,21 @@
 }
 template <>
 EIGEN_STRONG_INLINE int64_t pfirst<Packet4l>(const Packet4l& a) {
-  return _mm_extract_epi64_0(_mm256_castsi256_si128(a));
+  return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
 }
 template <>
 EIGEN_STRONG_INLINE uint64_t pfirst<Packet4ul>(const Packet4ul& a) {
-  return _mm_extract_epi64_0(_mm256_castsi256_si128(a));
+  return _mm_cvtsi128_si64(_mm256_castsi256_si128(a));
 }
 template <>
 EIGEN_STRONG_INLINE int64_t predux<Packet4l>(const Packet4l& a) {
   __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
-  return _mm_extract_epi64_0(r) + _mm_extract_epi64_1(r);
+  return _mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1);
 }
 template <>
 EIGEN_STRONG_INLINE uint64_t predux<Packet4ul>(const Packet4ul& a) {
   __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
-  return numext::bit_cast<uint64_t>(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r));
+  return numext::bit_cast<uint64_t>(_mm_extract_epi64(r, 0) + _mm_extract_epi64(r, 1));
 }
 #define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
@@ -1804,12 +1803,14 @@
 // pabs should be ok
 template <>
 EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a) {
-  const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
+  const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+                                                              0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF));
   return _mm256_and_ps(a, mask);
 }
 template <>
 EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) {
-  const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFF));
+  const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF,
+                                                              0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF));
   return _mm256_and_pd(a, mask);
 }
 template <>
@@ -1829,32 +1830,28 @@
 
 template <>
 EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) {
-  return _mm_cmpgt_epi16(_mm_setzero_si128(), a);
+  return _mm_srai_epi16(a, 15);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) {
-  return _mm_cmpgt_epi16(_mm_setzero_si128(), a);
+  return _mm_srai_epi16(a, 15);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8f psignbit(const Packet8f& a) {
-#ifdef EIGEN_VECTORIZE_AVX2
-  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(_mm256_setzero_si256(), _mm256_castps_si256(a)));
-#else
-  return _mm256_castsi256_ps(parithmetic_shift_right<31>(Packet8i(_mm256_castps_si256(a))));
-#endif
+  return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a)));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& a) {
-  return _mm256_setzero_si256();
+  return pzero(a);
 }
 #ifdef EIGEN_VECTORIZE_AVX2
 template <>
 EIGEN_STRONG_INLINE Packet4d psignbit(const Packet4d& a) {
-  return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(a)));
+  return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a)));
 }
 template <>
 EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& a) {
-  return _mm256_setzero_si256();
+  return pzero(a);
 }
 #endif
 
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index 9dcd6ef..3688f8d 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -47,13 +47,6 @@
 struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};
 template <>
 struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
-
-#ifdef EIGEN_VECTORIZE_AVX2
-template <>
-struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
-template <>
-struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
-#endif
 #endif
 
 template <>
@@ -196,63 +189,6 @@
 
 #ifdef EIGEN_VECTORIZE_AVX2
 template <>
-EIGEN_STRONG_INLINE Packet4l pcast<Packet4d, Packet4l>(const Packet4d& a) {
-#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
-  return _mm256_cvttpd_epi64(a);
-#else
-
-  // if 'a' exceeds the numerical limits of int64_t, the behavior is undefined
-
-  // e <= 0 corresponds to |a| < 1, which should result in zero. incidentally, intel intrinsics with shift arguments
-  // greater than or equal to 64 produce zero. furthermore, negative shifts appear to be interpreted as large positive
-  // shifts (two's complement), which also result in zero. therefore, e does not need to be clamped to [0, 64)
-
-  constexpr int kTotalBits = sizeof(double) * CHAR_BIT, kMantissaBits = std::numeric_limits<double>::digits - 1,
-                kExponentBits = kTotalBits - kMantissaBits - 1, kBias = (1 << (kExponentBits - 1)) - 1;
-
-  const __m256i cst_one = _mm256_set1_epi64x(1);
-  const __m256i cst_total_bits = _mm256_set1_epi64x(kTotalBits);
-  const __m256i cst_bias = _mm256_set1_epi64x(kBias);
-
-  __m256i a_bits = _mm256_castpd_si256(a);
-  // shift left by 1 to clear the sign bit, and shift right by kMantissaBits + 1 to recover biased exponent
-  __m256i biased_e = _mm256_srli_epi64(_mm256_slli_epi64(a_bits, 1), kMantissaBits + 1);
-  __m256i e = _mm256_sub_epi64(biased_e, cst_bias);
-
-  // shift to the left by kExponentBits + 1 to clear the sign and exponent bits
-  __m256i shifted_mantissa = _mm256_slli_epi64(a_bits, kExponentBits + 1);
-  // shift to the right by kTotalBits - e to convert the significand to an integer
-  __m256i result_significand = _mm256_srlv_epi64(shifted_mantissa, _mm256_sub_epi64(cst_total_bits, e));
-
-  // add the implied bit
-  __m256i result_exponent = _mm256_sllv_epi64(cst_one, e);
-  // e <= 0 is interpreted as a large positive shift (2's complement), which also conveniently results in zero
-  __m256i result = _mm256_add_epi64(result_significand, result_exponent);
-  // handle negative arguments
-  __m256i sign_mask = _mm256_cmpgt_epi64(_mm256_setzero_si256(), a_bits);
-  result = _mm256_sub_epi64(_mm256_xor_si256(result, sign_mask), sign_mask);
-  return result;
-#endif
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
-#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
-  return _mm256_cvtepi64_pd(a);
-#else
-  EIGEN_ALIGN16 int64_t aux[4];
-  pstore(aux, a);
-  return _mm256_set_pd(static_cast<double>(aux[3]), static_cast<double>(aux[2]), static_cast<double>(aux[1]),
-                       static_cast<double>(aux[0]));
-#endif
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
-  return _mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
-}
-
-template <>
 EIGEN_STRONG_INLINE Packet4ul preinterpret<Packet4ul, Packet4l>(const Packet4l& a) {
   return Packet4ul(a);
 }
@@ -262,21 +198,6 @@
   return Packet4l(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4d>(const Packet4d& a) {
-  return _mm256_castpd_si256(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet4l>(const Packet4l& a) {
-  return _mm256_castsi256_pd(a);
-}
-
-// truncation operations
-template <>
-EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet4l>(const Packet4l& a) {
-  return _mm256_castsi256_si128(a);
-}
 #endif
 
 template <>
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index b70c7fe..c14b4a0 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -40,7 +40,6 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
-    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -461,11 +460,6 @@
   return plog_complex<Packet8cf>(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet8cf pexp<Packet8cf>(const Packet8cf& a) {
-  return pexp_complex<Packet8cf>(a);
-}
-
 }  // end namespace internal
 }  // end namespace Eigen
 
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index ed2f189..b6d2d98 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -34,7 +34,6 @@
 typedef __m512 Packet16f;
 typedef __m512i Packet16i;
 typedef __m512d Packet8d;
-// TODO(rmlarsen): Add support for Packet8l.
 #ifndef EIGEN_VECTORIZE_AVX512FP16
 typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
 #endif
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index fc11174..131e6f1 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -1,870 +1,870 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-//
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
-#define EIGEN_PACKET_MATH_FP16_AVX512_H
-
-// IWYU pragma: private
-#include "../../InternalHeaderCheck.h"
-
-namespace Eigen {
-
-namespace internal {
-
-typedef __m512h Packet32h;
-typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
-typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
-
-template <>
-struct is_arithmetic<Packet8h> {
-  enum { value = true };
-};
-
-template <>
-struct packet_traits<half> : default_packet_traits {
-  typedef Packet32h type;
-  typedef Packet16h half;
-  enum {
-    Vectorizable = 1,
-    AlignedOnScalar = 1,
-    size = 32,
-
-    HasCmp = 1,
-    HasAdd = 1,
-    HasSub = 1,
-    HasMul = 1,
-    HasDiv = 1,
-    HasNegate = 1,
-    HasAbs = 1,
-    HasAbs2 = 0,
-    HasMin = 1,
-    HasMax = 1,
-    HasConj = 1,
-    HasSetLinear = 0,
-    HasLog = 1,
-    HasLog1p = 1,
-    HasExp = 1,
-    HasExpm1 = 1,
-    HasSqrt = 1,
-    HasRsqrt = 1,
-    // These ones should be implemented in future
-    HasBessel = 0,
-    HasNdtri = 0,
-    HasSin = EIGEN_FAST_MATH,
-    HasCos = EIGEN_FAST_MATH,
-    HasTanh = EIGEN_FAST_MATH,
-    HasErf = 0,  // EIGEN_FAST_MATH,
-    HasBlend = 0,
-    HasRound = 1,
-    HasFloor = 1,
-    HasCeil = 1,
-    HasRint = 1
-  };
-};
-
-template <>
-struct unpacket_traits<Packet32h> {
-  typedef Eigen::half type;
-  typedef Packet16h half;
-  enum {
-    size = 32,
-    alignment = Aligned64,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-template <>
-struct unpacket_traits<Packet16h> {
-  typedef Eigen::half type;
-  typedef Packet8h half;
-  enum {
-    size = 16,
-    alignment = Aligned32,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-template <>
-struct unpacket_traits<Packet8h> {
-  typedef Eigen::half type;
-  typedef Packet8h half;
-  enum {
-    size = 8,
-    alignment = Aligned16,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-// Memory functions
-
-// pset1
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
-  return _mm512_set1_ph(static_cast<_Float16>(from));
-}
-
-// pset1frombits
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
-  return _mm512_castsi512_ph(_mm512_set1_epi16(from));
-}
-
-// pfirst
-
-template <>
-EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-  return half_impl::raw_uint16_to_half(
-      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
-#else
-  Eigen::half dest[32];
-  _mm512_storeu_ph(dest, from);
-  return dest[0];
-#endif
-}
-
-// pload
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
-  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
-}
-
-// ploadu
-
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
-  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
-}
-
-// pstore
-
-template <>
-EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
-  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
-}
-
-// pstoreu
-
-template <>
-EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
-  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
-}
-
-// ploaddup
-template <>
-EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
-  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
-  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
-                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
-                               a);
-}
-
-// ploadquad
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
-  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
-  return _mm512_permutexvar_ph(
-      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
-      a);
-}
-
-// pabs
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
-  return _mm512_abs_ph(a);
-}
-
-// psignbit
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
-  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
-}
-
-// pmin
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_min_ph(a, b);
-}
-
-// pmax
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_max_ph(a, b);
-}
-
-// plset
-template <>
-EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
-  return _mm512_add_ph(_mm512_set1_ph(a),
-                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
-                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
-                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
-}
-
-// por
-
-template <>
-EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pxor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pand
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pandnot
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
-}
-
-// pselect
-
-template <>
-EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
-  return _mm512_mask_blend_ph(mask32, a, b);
-}
-
-// pcmp_eq
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_le
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_lt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_lt_or_nan
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
-}
-
-// padd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_add_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// psub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_sub_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pmul
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_mul_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pdiv
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_div_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pround
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
-  // Work-around for default std::round rounding mode.
-
-  // Mask for the sign bit
-  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
-  // The largest half-preicision float less than 0.5
-  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
-
-  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
-}
-
-// print
-
-template <>
-EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
-}
-
-// pceil
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
-}
-
-// pfloor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
-}
-
-// predux
-template <>
-EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
-  return (half)_mm512_reduce_add_ph(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
-  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
-  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
-}
-
-// predux_half_dowto4
-template <>
-EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
-  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
-
-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#else
-  Eigen::half data[32];
-  _mm512_storeu_ph(data, a);
-
-  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
-  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
-
-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#endif
-}
-
-// predux_max
-
-// predux_min
-
-// predux_mul
-
-#ifdef EIGEN_VECTORIZE_FMA
-
-// pmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fnmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fnmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-#endif
-
-// pnegate
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
-  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);
-}
-
-// pconj
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
-  return a;
-}
-
-// psqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
-  return _mm512_sqrt_ph(a);
-}
-
-// prsqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
-  return _mm512_rsqrt_ph(a);
-}
-
-// preciprocal
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
-  return _mm512_rcp_ph(a);
-}
-
-// ptranspose
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
-  __m512i t[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 16; i++) {
-    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
-    t[2 * i + 1] =
-        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
-  }
-
-  __m512i p[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 8; i++) {
-    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
-    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
-    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
-    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
-  }
-
-  __m512i q[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 4; i++) {
-    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
-    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
-    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
-    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
-    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
-    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
-    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
-    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
-  }
-
-  __m512i f[32];
-
-#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \
-  do {                                                                                              \
-    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \
-    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
-    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
-    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
-    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
-    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
-    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
-    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
-  } while (false);
-
-  PACKET32H_TRANSPOSE_HELPER(0, 0);
-  PACKET32H_TRANSPOSE_HELPER(1, 1);
-  PACKET32H_TRANSPOSE_HELPER(2, 2);
-  PACKET32H_TRANSPOSE_HELPER(3, 3);
-
-  PACKET32H_TRANSPOSE_HELPER(1, 0);
-  PACKET32H_TRANSPOSE_HELPER(2, 0);
-  PACKET32H_TRANSPOSE_HELPER(3, 0);
-  PACKET32H_TRANSPOSE_HELPER(2, 1);
-  PACKET32H_TRANSPOSE_HELPER(3, 1);
-  PACKET32H_TRANSPOSE_HELPER(3, 2);
-
-  PACKET32H_TRANSPOSE_HELPER(0, 1);
-  PACKET32H_TRANSPOSE_HELPER(0, 2);
-  PACKET32H_TRANSPOSE_HELPER(0, 3);
-  PACKET32H_TRANSPOSE_HELPER(1, 2);
-  PACKET32H_TRANSPOSE_HELPER(1, 3);
-  PACKET32H_TRANSPOSE_HELPER(2, 3);
-
-#undef PACKET32H_TRANSPOSE_HELPER
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 32; i++) {
-    a.packet[i] = _mm512_castsi512_ph(f[i]);
-  }
-}
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
-  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
-  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
-  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
-  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
-  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
-
-  p0 = _mm512_unpacklo_epi32(t0, t2);
-  p1 = _mm512_unpackhi_epi32(t0, t2);
-  p2 = _mm512_unpacklo_epi32(t1, t3);
-  p3 = _mm512_unpackhi_epi32(t1, t3);
-
-  a0 = p0;
-  a1 = p1;
-  a2 = p2;
-  a3 = p3;
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
-
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
-
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
-
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
-
-  a.packet[0] = _mm512_castsi512_ph(a0);
-  a.packet[1] = _mm512_castsi512_ph(a1);
-  a.packet[2] = _mm512_castsi512_ph(a2);
-  a.packet[3] = _mm512_castsi512_ph(a3);
-}
-
-// preverse
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
-  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
-                               a);
-}
-
-// pscatter
-
-template <>
-EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
-  EIGEN_ALIGN64 half aux[32];
-  pstore(aux, from);
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 32; i++) {
-    to[stride * i] = aux[i];
-  }
-}
-
-// pgather
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
-  return _mm512_castsi512_ph(_mm512_set_epi16(
-      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
-      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
-      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
-      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
-      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
-      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
-      from[1 * stride].x, from[0 * stride].x));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
-
-EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
-  __m512d result = _mm512_undefined_pd();
-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
-  return _mm512_castpd_ph(result);
-}
-
-EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
-  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
-  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
-}
-
-// psin
-template <>
-EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = psin(low);
-  Packet16h highOut = psin(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pcos
-template <>
-EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pcos(low);
-  Packet16h highOut = pcos(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog
-template <>
-EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog(low);
-  Packet16h highOut = plog(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog2
-template <>
-EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog2(low);
-  Packet16h highOut = plog2(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog1p
-template <>
-EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog1p(low);
-  Packet16h highOut = plog1p(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pexp(low);
-  Packet16h highOut = pexp(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pexpm1
-template <>
-EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pexpm1(low);
-  Packet16h highOut = pexpm1(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// ptanh
-template <>
-EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = ptanh(low);
-  Packet16h highOut = ptanh(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pfrexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h exp1 = _mm256_undefined_si256();
-  Packet16h exp2 = _mm256_undefined_si256();
-
-  Packet16h lowOut = pfrexp(low, exp1);
-  Packet16h highOut = pfrexp(high, exp2);
-
-  exponent = combine2Packet16h(exp1, exp2);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pldexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h exp1;
-  Packet16h exp2;
-  extract2Packet16h(exponent, exp1, exp2);
-
-  Packet16h lowOut = pldexp(low, exp1);
-  Packet16h highOut = pldexp(high, exp2);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H
+// This file is part of Eigen, a lightweight C++ template library

+// for linear algebra.

+//

+//

+//

+// This Source Code Form is subject to the terms of the Mozilla

+// Public License v. 2.0. If a copy of the MPL was not distributed

+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

+

+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H

+#define EIGEN_PACKET_MATH_FP16_AVX512_H

+

+// IWYU pragma: private

+#include "../../InternalHeaderCheck.h"

+

+namespace Eigen {

+

+namespace internal {

+

+typedef __m512h Packet32h;

+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;

+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;

+

+template <>

+struct is_arithmetic<Packet8h> {

+  enum { value = true };

+};

+

+template <>

+struct packet_traits<half> : default_packet_traits {

+  typedef Packet32h type;

+  typedef Packet16h half;

+  enum {

+    Vectorizable = 1,

+    AlignedOnScalar = 1,

+    size = 32,

+

+    HasCmp = 1,

+    HasAdd = 1,

+    HasSub = 1,

+    HasMul = 1,

+    HasDiv = 1,

+    HasNegate = 1,

+    HasAbs = 1,

+    HasAbs2 = 0,

+    HasMin = 1,

+    HasMax = 1,

+    HasConj = 1,

+    HasSetLinear = 0,

+    HasLog = 1,

+    HasLog1p = 1,

+    HasExp = 1,

+    HasExpm1 = 1,

+    HasSqrt = 1,

+    HasRsqrt = 1,

+    // These ones should be implemented in future

+    HasBessel = 0,

+    HasNdtri = 0,

+    HasSin = EIGEN_FAST_MATH,

+    HasCos = EIGEN_FAST_MATH,

+    HasTanh = EIGEN_FAST_MATH,

+    HasErf = 0,  // EIGEN_FAST_MATH,

+    HasBlend = 0,

+    HasRound = 1,

+    HasFloor = 1,

+    HasCeil = 1,

+    HasRint = 1

+  };

+};

+

+template <>

+struct unpacket_traits<Packet32h> {

+  typedef Eigen::half type;

+  typedef Packet16h half;

+  enum {

+    size = 32,

+    alignment = Aligned64,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+template <>

+struct unpacket_traits<Packet16h> {

+  typedef Eigen::half type;

+  typedef Packet8h half;

+  enum {

+    size = 16,

+    alignment = Aligned32,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+template <>

+struct unpacket_traits<Packet8h> {

+  typedef Eigen::half type;

+  typedef Packet8h half;

+  enum {

+    size = 8,

+    alignment = Aligned16,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+// Memory functions

+

+// pset1

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {

+  return _mm512_set1_ph(static_cast<_Float16>(from));

+}

+

+// pset1frombits

+template <>

+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {

+  return _mm512_castsi512_ph(_mm512_set1_epi16(from));

+}

+

+// pfirst

+

+template <>

+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {

+#ifdef EIGEN_VECTORIZE_AVX512DQ

+  return half_impl::raw_uint16_to_half(

+      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));

+#else

+  Eigen::half dest[32];

+  _mm512_storeu_ph(dest, from);

+  return dest[0];

+#endif

+}

+

+// pload

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {

+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);

+}

+

+// ploadu

+

+template <>

+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {

+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);

+}

+

+// pstore

+

+template <>

+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {

+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);

+}

+

+// pstoreu

+

+template <>

+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {

+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);

+}

+

+// ploaddup

+template <>

+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {

+  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));

+  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,

+                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),

+                               a);

+}

+

+// ploadquad

+template <>

+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {

+  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));

+  return _mm512_permutexvar_ph(

+      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),

+      a);

+}

+

+// pabs

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {

+  return _mm512_abs_ph(a);

+}

+

+// psignbit

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {

+  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));

+}

+

+// pmin

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_min_ph(a, b);

+}

+

+// pmax

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_max_ph(a, b);

+}

+

+// plset

+template <>

+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {

+  return _mm512_add_ph(_mm512_set1_ph(a),

+                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,

+                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,

+                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));

+}

+

+// por

+

+template <>

+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pxor

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pand

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pandnot

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));

+}

+

+// pselect

+

+template <>

+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);

+  return _mm512_mask_blend_ph(mask32, a, b);

+}

+

+// pcmp_eq

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_le

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_lt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_lt_or_nan

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));

+}

+

+// padd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_add_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// psub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_sub_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pmul

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_mul_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pdiv

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_div_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pround

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {

+  // Work-around for default std::round rounding mode.

+

+  // Mask for the sign bit

+  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));

+  // The largest half-preicision float less than 0.5

+  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));

+

+  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);

+}

+

+// print

+

+template <>

+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);

+}

+

+// pceil

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);

+}

+

+// pfloor

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);

+}

+

+// predux

+template <>

+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {

+  return (half)_mm512_reduce_add_ph(a);

+}

+

+template <>

+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {

+  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));

+}

+

+template <>

+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {

+  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));

+}

+

+// predux_half_dowto4

+template <>

+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {

+#ifdef EIGEN_VECTORIZE_AVX512DQ

+  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));

+  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));

+

+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

+#else

+  Eigen::half data[32];

+  _mm512_storeu_ph(data, a);

+

+  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));

+  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));

+

+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

+#endif

+}

+

+// predux_max

+

+// predux_min

+

+// predux_mul

+

+#ifdef EIGEN_VECTORIZE_FMA

+

+// pmadd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fmadd_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pmsub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fmsub_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pnmadd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fnmadd_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pnmsub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fnmsub_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+#endif

+

+// pnegate

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {

+  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);

+}

+

+// pconj

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {

+  return a;

+}

+

+// psqrt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {

+  return _mm512_sqrt_ph(a);

+}

+

+// prsqrt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {

+  return _mm512_rsqrt_ph(a);

+}

+

+// preciprocal

+

+template <>

+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {

+  return _mm512_rcp_ph(a);

+}

+

+// ptranspose

+

+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {

+  __m512i t[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 16; i++) {

+    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

+    t[2 * i + 1] =

+        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

+  }

+

+  __m512i p[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 8; i++) {

+    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);

+    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);

+    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);

+    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);

+  }

+

+  __m512i q[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 4; i++) {

+    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);

+    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);

+    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);

+    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);

+    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);

+    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);

+    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);

+    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);

+  }

+

+  __m512i f[32];

+

+#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \

+  do {                                                                                              \

+    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \

+    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \

+    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \

+    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \

+    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \

+    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \

+    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \

+    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \

+  } while (false);

+

+  PACKET32H_TRANSPOSE_HELPER(0, 0);

+  PACKET32H_TRANSPOSE_HELPER(1, 1);

+  PACKET32H_TRANSPOSE_HELPER(2, 2);

+  PACKET32H_TRANSPOSE_HELPER(3, 3);

+

+  PACKET32H_TRANSPOSE_HELPER(1, 0);

+  PACKET32H_TRANSPOSE_HELPER(2, 0);

+  PACKET32H_TRANSPOSE_HELPER(3, 0);

+  PACKET32H_TRANSPOSE_HELPER(2, 1);

+  PACKET32H_TRANSPOSE_HELPER(3, 1);

+  PACKET32H_TRANSPOSE_HELPER(3, 2);

+

+  PACKET32H_TRANSPOSE_HELPER(0, 1);

+  PACKET32H_TRANSPOSE_HELPER(0, 2);

+  PACKET32H_TRANSPOSE_HELPER(0, 3);

+  PACKET32H_TRANSPOSE_HELPER(1, 2);

+  PACKET32H_TRANSPOSE_HELPER(1, 3);

+  PACKET32H_TRANSPOSE_HELPER(2, 3);

+

+#undef PACKET32H_TRANSPOSE_HELPER

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 32; i++) {

+    a.packet[i] = _mm512_castsi512_ph(f[i]);

+  }

+}

+

+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {

+  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;

+  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

+  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

+  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

+  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

+

+  p0 = _mm512_unpacklo_epi32(t0, t2);

+  p1 = _mm512_unpackhi_epi32(t0, t2);

+  p2 = _mm512_unpacklo_epi32(t1, t3);

+  p3 = _mm512_unpackhi_epi32(t1, t3);

+

+  a0 = p0;

+  a1 = p1;

+  a2 = p2;

+  a3 = p3;

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);

+

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);

+

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);

+

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);

+

+  a.packet[0] = _mm512_castsi512_ph(a0);

+  a.packet[1] = _mm512_castsi512_ph(a1);

+  a.packet[2] = _mm512_castsi512_ph(a2);

+  a.packet[3] = _mm512_castsi512_ph(a3);

+}

+

+// preverse

+

+template <>

+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {

+  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,

+                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),

+                               a);

+}

+

+// pscatter

+

+template <>

+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {

+  EIGEN_ALIGN64 half aux[32];

+  pstore(aux, from);

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 32; i++) {

+    to[stride * i] = aux[i];

+  }

+}

+

+// pgather

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {

+  return _mm512_castsi512_ph(_mm512_set_epi16(

+      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,

+      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,

+      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,

+      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,

+      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,

+      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,

+      from[1 * stride].x, from[0 * stride].x));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);

+

+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {

+  __m512d result = _mm512_undefined_pd();

+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);

+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);

+  return _mm512_castpd_ph(result);

+}

+

+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {

+  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));

+  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));

+}

+

+// psin

+template <>

+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = psin(low);

+  Packet16h highOut = psin(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pcos

+template <>

+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pcos(low);

+  Packet16h highOut = pcos(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog

+template <>

+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog(low);

+  Packet16h highOut = plog(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog2

+template <>

+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog2(low);

+  Packet16h highOut = plog2(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog1p

+template <>

+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog1p(low);

+  Packet16h highOut = plog1p(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pexp(low);

+  Packet16h highOut = pexp(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pexpm1

+template <>

+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pexpm1(low);

+  Packet16h highOut = pexpm1(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// ptanh

+template <>

+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = ptanh(low);

+  Packet16h highOut = ptanh(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pfrexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h exp1 = _mm256_undefined_si256();

+  Packet16h exp2 = _mm256_undefined_si256();

+

+  Packet16h lowOut = pfrexp(low, exp1);

+  Packet16h highOut = pfrexp(high, exp2);

+

+  exponent = combine2Packet16h(exp1, exp2);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pldexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h exp1;

+  Packet16h exp2;

+  extract2Packet16h(exponent, exp1, exp2);

+

+  Packet16h lowOut = pldexp(low, exp1);

+  Packet16h highOut = pldexp(high, exp2);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+}  // end namespace internal

+}  // end namespace Eigen

+

+#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H

diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 0252efa..e3c4436 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -99,7 +99,6 @@
     HasMax = 0,
     HasSqrt = 1,
     HasLog = 1,
-    HasExp = 1,
 #ifdef EIGEN_VECTORIZE_VSX
     HasBlend = 1,
 #endif
@@ -376,11 +375,6 @@
   return plog_complex<Packet2cf>(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
-  return pexp_complex<Packet2cf>(a);
-}
-
 //---------- double ----------
 #ifdef EIGEN_VECTORIZE_VSX
 struct Packet1cd {
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 6a2f0e6..a4b134c 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -94,7 +94,9 @@
 
 static Packet16uc p16uc_REVERSE32 = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
 static Packet16uc p16uc_REVERSE16 = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
+#ifndef _ARCH_PWR9
 static Packet16uc p16uc_REVERSE8 = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+#endif
 
 #ifdef _BIG_ENDIAN
 static Packet16uc p16uc_DUPLICATE32_HI = {0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7};
@@ -1926,11 +1928,19 @@
 }
 template <>
 EIGEN_STRONG_INLINE Packet16c preverse(const Packet16c& a) {
+#ifdef _ARCH_PWR9
+  return vec_revb(a);
+#else
   return vec_perm(a, a, p16uc_REVERSE8);
+#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet16uc preverse(const Packet16uc& a) {
+#ifdef _ARCH_PWR9
+  return vec_revb(a);
+#else
   return vec_perm(a, a, p16uc_REVERSE8);
+#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a) {
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 78dbf20..118426f 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -555,7 +555,7 @@
   return float(double(int64_t(p)) * pio2_62);
 }
 
-template <bool ComputeSine, typename Packet, bool ComputeBoth = false>
+template <bool ComputeSine, typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 #if EIGEN_COMP_GNUC_STRICT
     __attribute__((optimize("-fno-unsafe-math-optimizations")))
@@ -669,21 +669,10 @@
   y2 = pmadd(y2, x, x);
 
   // Select the correct result from the two polynomials.
-  if (ComputeBoth) {
-    Packet peven = peven_mask(x);
-    Packet ysin = pselect(poly_mask, y2, y1);
-    Packet ycos = pselect(poly_mask, y1, y2);
-    Packet sign_bit_sin = pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)));
-    Packet sign_bit_cos = preinterpret<Packet>(plogical_shift_left<30>(padd(y_int, csti_1)));
-    sign_bit_sin = pand(sign_bit_sin, cst_sign_mask);  // clear all but left most bit
-    sign_bit_cos = pand(sign_bit_cos, cst_sign_mask);  // clear all but left most bit
-    y = pselect(peven, pxor(ysin, sign_bit_sin), pxor(ycos, sign_bit_cos));
-  } else {
-    y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2);
-    y = pxor(y, sign_bit);
-  }
+  y = ComputeSine ? pselect(poly_mask, y2, y1) : pselect(poly_mask, y1, y2);
+
   // Update the sign and filter huge inputs
-  return y;
+  return pxor(y, sign_bit);
 }
 
 template <typename Packet>
@@ -928,65 +917,6 @@
   return pxor(p, x_signmask);
 }
 
-/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
-    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-    is accurate up to a couple of ulps in the (approximate) range [-8, 8],
-    outside of which tanh(x) = +/-1 in single precision. The input is clamped
-    to the range [-c, c]. The value c is chosen as the smallest value where
-    the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
-    the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
-
-    This implementation works on both scalars and packets.
-*/
-template <typename T>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) {
-  // Clamp the inputs to the range [-c, c]
-#ifdef EIGEN_VECTORIZE_FMA
-  const T plus_clamp = pset1<T>(7.99881172180175781f);
-  const T minus_clamp = pset1<T>(-7.99881172180175781f);
-#else
-  const T plus_clamp = pset1<T>(7.90531110763549805f);
-  const T minus_clamp = pset1<T>(-7.90531110763549805f);
-#endif
-  const T tiny = pset1<T>(0.0004f);
-  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
-  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
-  // The monomial coefficients of the numerator polynomial (odd).
-  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
-  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
-  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
-  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
-  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
-  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
-  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
-
-  // The monomial coefficients of the denominator polynomial (even).
-  const T beta_0 = pset1<T>(4.89352518554385e-03f);
-  const T beta_2 = pset1<T>(2.26843463243900e-03f);
-  const T beta_4 = pset1<T>(1.18534705686654e-04f);
-  const T beta_6 = pset1<T>(1.19825839466702e-06f);
-
-  // Since the polynomials are odd/even, we need x^2.
-  const T x2 = pmul(x, x);
-
-  // Evaluate the numerator polynomial p.
-  T p = pmadd(x2, alpha_13, alpha_11);
-  p = pmadd(x2, p, alpha_9);
-  p = pmadd(x2, p, alpha_7);
-  p = pmadd(x2, p, alpha_5);
-  p = pmadd(x2, p, alpha_3);
-  p = pmadd(x2, p, alpha_1);
-  p = pmul(x, p);
-
-  // Evaluate the denominator polynomial q.
-  T q = pmadd(x2, beta_6, beta_4);
-  q = pmadd(x2, q, beta_2);
-  q = pmadd(x2, q, beta_0);
-
-  // Divide the numerator by the denominator.
-  return pselect(tiny_mask, x, pdiv(p, q));
-}
-
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) {
   typedef typename unpacket_traits<Packet>::type Scalar;
@@ -1063,49 +993,6 @@
 }
 
 template <typename Packet>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& a) {
-  typedef typename unpacket_traits<Packet>::as_real RealPacket;
-  typedef typename unpacket_traits<Packet>::type Scalar;
-  typedef typename Scalar::value_type RealScalar;
-  const RealPacket even_mask = peven_mask(a.v);
-  const RealPacket odd_mask = pcplxflip(Packet(even_mask)).v;
-
-  // Let a = x + iy.
-  // exp(a) = exp(x) * cis(y), plus some special edge-case handling.
-
-  // exp(x):
-  RealPacket x = pand(a.v, even_mask);
-  x = por(x, pcplxflip(Packet(x)).v);
-  RealPacket expx = pexp(x);  // exp(x);
-
-  // cis(y):
-  RealPacket y = pand(odd_mask, a.v);
-  y = por(y, pcplxflip(Packet(y)).v);
-  RealPacket cisy = psincos_float<false, RealPacket, true>(y);
-  cisy = pcplxflip(Packet(cisy)).v;  // cos(y) + i * sin(y)
-
-  const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
-  const RealPacket cst_neg_inf = pset1<RealPacket>(-NumTraits<RealScalar>::infinity());
-
-  // If x is -inf, we know that cossin(y) is bounded,
-  //   so the result is (0, +/-0), where the sign of the imaginary part comes
-  //   from the sign of cossin(y).
-  RealPacket cisy_sign = por(pandnot(cisy, pabs(cisy)), pset1<RealPacket>(RealScalar(1)));
-  cisy = pselect(pcmp_eq(x, cst_neg_inf), cisy_sign, cisy);
-
-  // If x is inf, and cos(y) has unknown sign (y is inf or NaN), the result
-  // is (+/-inf, NaN), where the signs are undetermined (take the sign of y).
-  RealPacket y_sign = por(pandnot(y, pabs(y)), pset1<RealPacket>(RealScalar(1)));
-  cisy = pselect(pand(pcmp_eq(x, cst_pos_inf), pisnan(cisy)), pand(y_sign, even_mask), cisy);
-  Packet result = Packet(pmul(expx, cisy));
-
-  // If y is +/- 0, the input is real, so take the real result for consistency.
-  result = pselect(Packet(pcmp_eq(y, pzero(y))), Packet(por(pand(expx, even_mask), pand(y, odd_mask))), result);
-
-  return result;
-}
-
-template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet& a) {
   typedef typename unpacket_traits<Packet>::type Scalar;
   typedef typename Scalar::value_type RealScalar;
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index 9560de2..960bb67 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -98,10 +98,6 @@
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x);
 
-/** \internal \returns tanh(x) for single precision float */
-template <typename Packet>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x);
-
 /** \internal \returns atanh(x) for single precision float */
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x);
@@ -121,10 +117,6 @@
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x);
 
-/** \internal \returns exp(x) for complex types */
-template <typename Packet>
-EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& x);
-
 // Macros for instantiating these generic functions for different backends.
 #define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET)                                             \
   template <>                                                                                     \
@@ -141,7 +133,6 @@
   EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET)                                                    \
-  EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET)                                                    \
   EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET)                                                   \
   EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET)                                                     \
   EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET)                                                    \
@@ -153,6 +144,10 @@
   template <>                                                                                  \
   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p<PACKET>(const PACKET& _x) { \
     return internal::generic_plog1p(_x);                                                       \
+  }                                                                                            \
+  template <>                                                                                  \
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh<PACKET>(const PACKET& _x) {  \
+    return internal::generic_fast_tanh_float(_x);                                              \
   }
 
 #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 5257c03..22c7765 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -63,7 +63,6 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
-    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -448,16 +447,6 @@
   return plog_complex(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet1cf pexp<Packet1cf>(const Packet1cf& a) {
-  return pexp_complex(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
-  return pexp_complex(a);
-}
-
 //---------- double ----------
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
 
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 0e70f03..76c3a05 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -43,7 +43,6 @@
     HasNegate = 1,
     HasSqrt = 1,
     HasLog = 1,
-    HasExp = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -425,11 +424,6 @@
   return plog_complex<Packet2cf>(a);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
-  return pexp_complex<Packet2cf>(a);
-}
-
 }  // end namespace internal
 }  // end namespace Eigen
 
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 008109a..bdbf759 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -52,7 +52,6 @@
 typedef eigen_packet_wrapper<__m128i, 0> Packet4i;
 typedef eigen_packet_wrapper<__m128i, 1> Packet16b;
 typedef eigen_packet_wrapper<__m128i, 4> Packet4ui;
-typedef eigen_packet_wrapper<__m128i, 5> Packet2l;
 
 template <>
 struct is_arithmetic<__m128> {
@@ -70,10 +69,6 @@
 struct is_arithmetic<Packet4i> {
   enum { value = true };
 };
-template <>
-struct is_arithmetic<Packet2l> {
-  enum { value = true };
-};
 // Note that `Packet4ui` uses the underlying type `__m128i`, which is
 // interpreted as a vector of _signed_ `int32`s, which breaks some arithmetic
 // operations used in `GenericPacketMath.h`.
@@ -145,27 +140,6 @@
 
 #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1<Packet4ui>(X)
 
-// Work around lack of extract/cvt for epi64 when compiling for 32-bit.
-#if EIGEN_ARCH_x86_64
-EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); }
-#ifdef EIGEN_VECTORIZE_SSE4_1
-EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); }
-#else
-EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
-  return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
-}
-#endif
-#else
-// epi64 instructions are not available.  The following seems to generate the same instructions
-// with -O2 in GCC/Clang.
-EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) {
-  return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
-}
-EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
-  return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
-}
-#endif
-
 // Use the packet_traits defined in AVX/PacketMath.h instead if we're going
 // to leverage AVX instructions.
 #ifndef EIGEN_VECTORIZE_AVX
@@ -239,10 +213,10 @@
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size = 4,
-
     HasCmp = 1,
     HasDiv = 1,
+    size = 4,
+
     HasShift = 1,
     HasBlend = 1
   };
@@ -258,22 +232,10 @@
 
     HasDiv = 0,
     HasNegate = 0,
+    HasSqrt = 0,
     HasCmp = 1,
-    HasShift = 1,
-    HasBlend = 1
-  };
-};
-template <>
-struct packet_traits<int64_t> : default_packet_traits {
-  typedef Packet2l type;
-  typedef Packet2l half;
-  enum {
-    Vectorizable = 1,
-    AlignedOnScalar = 1,
-    size = 2,
-
-    HasDiv = 0,
-    HasCmp = 1,
+    HasMin = 1,
+    HasMax = 1,
     HasShift = 1,
     HasBlend = 1
   };
@@ -288,8 +250,12 @@
     AlignedOnScalar = 1,
     size = 16,
 
+    HasAdd = 1,
+    HasSub = 1,
     HasCmp = 1,  // note -- only pcmp_eq is defined
     HasShift = 0,
+    HasMul = 1,
+    HasNegate = 1,
     HasAbs = 0,
     HasAbs2 = 0,
     HasMin = 0,
@@ -317,19 +283,6 @@
 struct unpacket_traits<Packet2d> {
   typedef double type;
   typedef Packet2d half;
-  typedef Packet2l integer_packet;
-  enum {
-    size = 2,
-    alignment = Aligned16,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-template <>
-struct unpacket_traits<Packet2l> {
-  typedef int64_t type;
-  typedef Packet2l half;
   enum {
     size = 2,
     alignment = Aligned16,
@@ -395,10 +348,6 @@
   return _mm_set1_pd(from);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pset1<Packet2l>(const int64_t& from) {
-  return _mm_set1_epi64x(from);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
   return _mm_set1_epi32(from);
 }
@@ -425,10 +374,6 @@
   return _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1));
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l peven_mask(const Packet2l& /*a*/) {
-  return _mm_set_epi32(0, 0, -1, -1);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i peven_mask(const Packet4i& /*a*/) {
   return _mm_set_epi32(0, -1, 0, -1);
 }
@@ -450,10 +395,6 @@
   return _mm_setzero_pd();
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pzero(const Packet2l& /*a*/) {
-  return _mm_setzero_si128();
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pzero(const Packet4i& /*a*/) {
   return _mm_setzero_si128();
 }
@@ -483,10 +424,6 @@
   return _mm_add_pd(pset1<Packet2d>(a), _mm_set_pd(1, 0));
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l plset<Packet2l>(const int64_t& a) {
-  return _mm_add_epi32(pset1<Packet2l>(a), _mm_set_epi64x(1, 0));
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) {
   return _mm_add_epi32(pset1<Packet4i>(a), _mm_set_epi32(3, 2, 1, 0));
 }
@@ -504,10 +441,6 @@
   return _mm_add_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l padd<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_add_epi64(a, b);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_add_epi32(a, b);
 }
@@ -541,10 +474,6 @@
   return _mm_sub_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l psub<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_sub_epi64(a, b);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_sub_epi32(a, b);
 }
@@ -592,13 +521,8 @@
   return _mm_xor_pd(a, mask);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pnegate(const Packet2l& a) {
-  return psub(pzero(a), a);
-}
-
-template <>
 EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
-  return psub(pzero(a), a);
+  return psub(Packet4i(_mm_setr_epi32(0, 0, 0, 0)), a);
 }
 
 template <>
@@ -615,10 +539,6 @@
   return a;
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pconj(const Packet2l& a) {
-  return a;
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
   return a;
 }
@@ -632,21 +552,6 @@
   return _mm_mul_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pmul<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  // 64-bit mul requires avx512, so do this with 32-bit multiplication
-  __m128i upper32_a = _mm_srli_epi64(a, 32);
-  __m128i upper32_b = _mm_srli_epi64(b, 32);
-
-  // upper * lower
-  __m128i mul1 = _mm_mul_epu32(upper32_a, b);
-  __m128i mul2 = _mm_mul_epu32(upper32_b, a);
-  // Gives us both upper*upper and lower*lower
-  __m128i mul3 = _mm_mul_epu32(a, b);
-
-  __m128i high = _mm_slli_epi64(_mm_add_epi64(mul1, mul2), 32);
-  return _mm_add_epi64(high, mul3);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_mullo_epi32(a, b);
@@ -697,6 +602,15 @@
 #endif
 }
 
+// for some weird raisons, it has to be overloaded for packet of integers
+template <>
+EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
+  return padd(pmul(a, b), c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui pmadd(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c) {
+  return padd(pmul(a, b), c);
+}
 #ifdef EIGEN_VECTORIZE_FMA
 template <>
 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
@@ -745,36 +659,27 @@
 
 #ifdef EIGEN_VECTORIZE_SSE4_1
 template <>
-EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
+EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
   return _mm_blendv_ps(b, a, mask);
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet2l pselect(const Packet2l& mask, const Packet2l& a, const Packet2l& b) {
-  return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(b), _mm_castsi128_pd(a), _mm_castsi128_pd(mask)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {
+EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {
   return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask)));
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) {
+EIGEN_DEVICE_FUNC inline Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b) {
   return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _mm_castsi128_ps(mask)));
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {
+EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {
   return _mm_blendv_pd(b, a, mask);
 }
 #endif
 
 template <>
-EIGEN_STRONG_INLINE Packet2l ptrue<Packet2l>(const Packet2l& a) {
-  return _mm_cmpeq_epi32(a, a);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) {
   return _mm_cmpeq_epi32(a, a);
 }
@@ -802,10 +707,6 @@
   return _mm_and_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pand<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_and_si128(a, b);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_and_si128(a, b);
 }
@@ -827,10 +728,6 @@
   return _mm_or_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l por<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_or_si128(a, b);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_or_si128(a, b);
 }
@@ -852,10 +749,6 @@
   return _mm_xor_pd(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pxor<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_xor_si128(a, b);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_xor_si128(a, b);
 }
@@ -877,10 +770,6 @@
   return _mm_andnot_pd(b, a);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  return _mm_andnot_si128(b, a);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
   return _mm_andnot_si128(b, a);
 }
@@ -922,6 +811,7 @@
 EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) {
   return _mm_cmpeq_pd(a, b);
 }
+
 template <>
 EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) {
   return _mm_cmplt_epi32(a, b);
@@ -931,35 +821,8 @@
   return _mm_cmpeq_epi32(a, b);
 }
 template <>
-EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
-  return por(pcmp_lt(a, b), pcmp_eq(a, b));
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) {
-#ifdef EIGEN_VECTORIZE_SSE4_2
-  return _mm_cmpgt_epi64(b, a);
-#else
-  Packet4i eq = pcmp_eq<Packet4i>(Packet4i(a), Packet4i(b));
-  Packet2l hi_eq = Packet2l(_mm_shuffle_epi32(eq, (shuffle_mask<1, 1, 3, 3>::mask)));
-  Packet4i lt = pcmp_lt<Packet4i>(Packet4i(a), Packet4i(b));
-  Packet2l hi_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<1, 1, 3, 3>::mask)));
-  Packet2l lo_lt = Packet2l(_mm_shuffle_epi32(lt, (shuffle_mask<0, 0, 2, 2>::mask)));
-  // return hi(a) < hi(b) || (hi(a) == hi(b) && lo(a) < lo(b))
-  return por(hi_lt, pand(hi_eq, lo_lt));
-#endif
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
-#ifdef EIGEN_VECTORIZE_SSE4_1
-  return _mm_cmpeq_epi64(a, b);
-#else
-  Packet4i tmp = pcmp_eq<Packet4i>(Packet4i(a), Packet4i(b));
-  return Packet2l(pand<Packet4i>(tmp, _mm_shuffle_epi32(tmp, (shuffle_mask<1, 0, 3, 2>::mask))));
-#endif
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l pcmp_le(const Packet2l& a, const Packet2l& b) {
-  return por(pcmp_lt(a, b), pcmp_eq(a, b));
+EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) {
+  return _mm_cmpeq_epi32(a, b);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) {
@@ -968,8 +831,8 @@
   return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask);
 }
 template <>
-EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) {
-  return _mm_cmpeq_epi32(a, b);
+EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
+  return por(pcmp_lt(a, b), pcmp_eq(a, b));
 }
 
 template <>
@@ -1013,11 +876,6 @@
 #endif
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pmin<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  Packet2l a_lt_mask = pcmp_lt(a, b);
-  return por(pandnot(b, a_lt_mask), pand(a, a_lt_mask));
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_min_epi32(a, b);
@@ -1079,11 +937,6 @@
 #endif
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pmax<Packet2l>(const Packet2l& a, const Packet2l& b) {
-  Packet2l a_lt_mask = pcmp_lt(a, b);
-  return por(pandnot(a, a_lt_mask), pand(b, a_lt_mask));
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
 #ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_max_epi32(a, b);
@@ -1175,46 +1028,6 @@
   return pminmax_propagate_nan(a, b, pmax<Packet2d>);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
-  return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31));
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
-  Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
-#ifdef EIGEN_VECTORIZE_AVX
-  return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
-#else
-  return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
-#endif  // EIGEN_VECTORIZE_AVX
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i psignbit(const Packet4i& a) {
-  return _mm_srai_epi32(a, 31);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) {
-  return pzero(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l psignbit(const Packet2l& a) {
-  Packet4i tmp = psignbit<Packet4i>(Packet4i(a));
-  return Packet2l(_mm_shuffle_epi32(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
-}
-
-template <int N>
-EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) {
-  Packet2l signbit = psignbit(a);
-  return por(_mm_slli_epi64(signbit, 64 - N), _mm_srli_epi64(a, N));
-}
-template <int N>
-EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
-  return _mm_srli_epi64(a, N);
-}
-template <int N>
-EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
-  return _mm_slli_epi64(a, N);
-}
 template <int N>
 EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) {
   return _mm_srai_epi32(a, N);
@@ -1227,6 +1040,7 @@
 EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) {
   return _mm_slli_epi32(a, N);
 }
+
 template <int N>
 EIGEN_STRONG_INLINE Packet4ui parithmetic_shift_right(const Packet4ui& a) {
   return _mm_srli_epi32(a, N);
@@ -1251,17 +1065,12 @@
   return _mm_and_pd(a, mask);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) {
-  Packet2l signbit = psignbit(a);
-  return _mm_sub_epi64(_mm_xor_si128(a, signbit), signbit);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
 #ifdef EIGEN_VECTORIZE_SSSE3
   return _mm_abs_epi32(a);
 #else
-  Packet4i signbit = psignbit(a);
-  return _mm_sub_epi32(_mm_xor_si128(a, signbit), signbit);
+  Packet4i aux = _mm_srai_epi32(a, 31);
+  return _mm_sub_epi32(_mm_xor_si128(a, aux), aux);
 #endif
 }
 template <>
@@ -1269,6 +1078,24 @@
   return a;
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
+  return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
+  Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
+#ifdef EIGEN_VECTORIZE_AVX
+  return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#else
+  return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#endif  // EIGEN_VECTORIZE_AVX
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui psignbit(const Packet4ui& a) {
+  return pzero(a);
+}
+
 #ifdef EIGEN_VECTORIZE_SSE4_1
 template <>
 EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
@@ -1390,10 +1217,6 @@
   EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pload<Packet2l>(const int64_t* from) {
-  EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from));
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) {
   EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from));
 }
@@ -1428,11 +1251,6 @@
   return _mm_loadu_pd(from);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l ploadu<Packet2l>(const int64_t* from) {
-  EIGEN_DEBUG_UNALIGNED_LOAD
-  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) {
   EIGEN_DEBUG_UNALIGNED_LOAD
   return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
@@ -1481,10 +1299,6 @@
   return pset1<Packet2d>(from[0]);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l ploaddup<Packet2l>(const int64_t* from) {
-  return pset1<Packet2l>(from[0]);
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) {
   Packet4i tmp;
   tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
@@ -1523,10 +1337,6 @@
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from);
 }
 template <>
-EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet2l& from) {
-  EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from);
-}
-template <>
 EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from);
 }
@@ -1548,10 +1358,6 @@
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from);
 }
 template <>
-EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet2l& from) {
-  EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
-}
-template <>
 EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
 }
@@ -1587,142 +1393,25 @@
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
-  return _mm_shuffle_ps(a, a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
-  return _mm_shuffle_pd(a, a, 0x1);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l preverse(const Packet2l& a) {
-  return _mm_castpd_si128(preverse(_mm_castsi128_pd(a)));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
-  return _mm_shuffle_epi32(a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) {
-  return _mm_shuffle_epi32(a, 0x1B);
-}
-template <>
-EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {
-#ifdef EIGEN_VECTORIZE_SSSE3
-  __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-  return _mm_shuffle_epi8(a, mask);
-#else
-  Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
-  tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
-  return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
-#endif
-}
-
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
-// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
-// Direct of the struct members fixed bug #62.
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  return a.m128_f32[0];
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  return a.m128d_f64[0];
-}
-template <>
-EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
-  int64_t x = _mm_extract_epi64_0(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-  return x;
-}
-#elif EIGEN_COMP_MSVC_STRICT
-// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  float x = _mm_cvtss_f32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  double x = _mm_cvtsd_f64(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
-  int64_t x = _mm_extract_epi64_0(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return x;
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-  return x;
-}
-#else
-template <>
-EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
-  return _mm_cvtss_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
-  return _mm_cvtsd_f64(a);
-}
-template <>
-EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
-  return _mm_extract_epi64_0(a);
-}
-template <>
-EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
-  return _mm_cvtsi128_si32(a);
-}
-template <>
-EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
-  return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
-}
-#endif
-template <>
-EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) {
-  int x = _mm_cvtsi128_si32(a);
-  return static_cast<bool>(x & 1);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
+EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
   return _mm_set_ps(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
+EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
   return _mm_set_pd(from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_STRONG_INLINE Packet2l pgather<int64_t, Packet2l>(const int64_t* from, Index stride) {
-  return _mm_set_epi64x(from[1 * stride], from[0 * stride]);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i pgather<int, Packet4i>(const int* from, Index stride) {
+EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) {
   return _mm_set_epi32(from[3 * stride], from[2 * stride], from[1 * stride], from[0 * stride]);
 }
 template <>
-EIGEN_STRONG_INLINE Packet4ui pgather<uint32_t, Packet4ui>(const uint32_t* from, Index stride) {
+EIGEN_DEVICE_FUNC inline Packet4ui pgather<uint32_t, Packet4ui>(const uint32_t* from, Index stride) {
   return _mm_set_epi32(numext::bit_cast<int32_t>(from[3 * stride]), numext::bit_cast<int32_t>(from[2 * stride]),
                        numext::bit_cast<int32_t>(from[1 * stride]), numext::bit_cast<int32_t>(from[0 * stride]));
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet16b pgather<bool, Packet16b>(const bool* from, Index stride) {
+EIGEN_DEVICE_FUNC inline Packet16b pgather<bool, Packet16b>(const bool* from, Index stride) {
   return _mm_set_epi8(from[15 * stride], from[14 * stride], from[13 * stride], from[12 * stride], from[11 * stride],
                       from[10 * stride], from[9 * stride], from[8 * stride], from[7 * stride], from[6 * stride],
                       from[5 * stride], from[4 * stride], from[3 * stride], from[2 * stride], from[1 * stride],
@@ -1730,38 +1419,33 @@
 }
 
 template <>
-EIGEN_STRONG_INLINE void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
-  to[stride * 0] = pfirst(from);
-  to[stride * 1] = pfirst(_mm_shuffle_ps(from, from, 1));
-  to[stride * 2] = pfirst(_mm_shuffle_ps(from, from, 2));
-  to[stride * 3] = pfirst(_mm_shuffle_ps(from, from, 3));
+EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) {
+  to[stride * 0] = _mm_cvtss_f32(from);
+  to[stride * 1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+  to[stride * 2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+  to[stride * 3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
 }
 template <>
-EIGEN_STRONG_INLINE void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
-  to[stride * 0] = pfirst(from);
-  to[stride * 1] = pfirst(preverse(from));
+EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) {
+  to[stride * 0] = _mm_cvtsd_f64(from);
+  to[stride * 1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
 }
 template <>
-EIGEN_STRONG_INLINE void pscatter<int64_t, Packet2l>(int64_t* to, const Packet2l& from, Index stride) {
-  to[stride * 0] = pfirst(from);
-  to[stride * 1] = pfirst(preverse(from));
-}
-template <>
-EIGEN_STRONG_INLINE void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) {
+EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) {
   to[stride * 0] = _mm_cvtsi128_si32(from);
   to[stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
   to[stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
   to[stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
 }
 template <>
-EIGEN_STRONG_INLINE void pscatter<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index stride) {
+EIGEN_DEVICE_FUNC inline void pscatter<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index stride) {
   to[stride * 0] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(from));
   to[stride * 1] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)));
   to[stride * 2] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)));
   to[stride * 3] = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)));
 }
 template <>
-EIGEN_STRONG_INLINE void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) {
+EIGEN_DEVICE_FUNC inline void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) {
   to[4 * stride * 0] = _mm_cvtsi128_si32(from);
   to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
   to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
@@ -1801,15 +1485,106 @@
   _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
 }
 template <>
-EIGEN_STRONG_INLINE void prefetch<int64_t>(const int64_t* addr) {
-  _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
-}
-template <>
 EIGEN_STRONG_INLINE void prefetch<uint32_t>(const uint32_t* addr) {
   _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0);
 }
 #endif
 
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+// Direct of the struct members fixed bug #62.
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  return a.m128_f32[0];
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  return a.m128d_f64[0];
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+  return x;
+}
+#elif EIGEN_COMP_MSVC_STRICT
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  float x = _mm_cvtss_f32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  double x = _mm_cvtsd_f64(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return x;
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  uint32_t x = numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+  return x;
+}
+#else
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
+  return _mm_cvtss_f32(a);
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
+  return _mm_cvtsd_f64(a);
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
+  return _mm_cvtsi128_si32(a);
+}
+template <>
+EIGEN_STRONG_INLINE uint32_t pfirst<Packet4ui>(const Packet4ui& a) {
+  return numext::bit_cast<uint32_t>(_mm_cvtsi128_si32(a));
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) {
+  int x = _mm_cvtsi128_si32(a);
+  return static_cast<bool>(x & 1);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+  return _mm_shuffle_ps(a, a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
+  return _mm_shuffle_pd(a, a, 0x1);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+  return _mm_shuffle_epi32(a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui preverse(const Packet4ui& a) {
+  return _mm_shuffle_epi32(a, 0x1B);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {
+#ifdef EIGEN_VECTORIZE_SSSE3
+  __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+  return _mm_shuffle_epi8(a, mask);
+#else
+  Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
+  tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
+  return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
+#endif
+}
+
 template <>
 EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
   return pfrexp_generic(a, exponent);
@@ -1835,7 +1610,6 @@
 
 // We specialize pldexp here, since the generic implementation uses Packet2l, which is not well
 // supported by SSE, and has more range than is needed for exponents.
-// TODO(rmlarsen): Remove this specialization once Packet2l has support or casting.
 template <>
 EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
   // Clamp exponent to [-2099, 2099]
@@ -1916,11 +1690,6 @@
   // #endif
 }
 
-template <>
-EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a) {
-  return pfirst<Packet2l>(_mm_add_epi64(a, _mm_unpackhi_epi64(a, a)));
-}
-
 #ifdef EIGEN_VECTORIZE_SSSE3
 template <>
 EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
@@ -1932,6 +1701,7 @@
   Packet4ui tmp0 = _mm_hadd_epi32(a, a);
   return pfirst<Packet4ui>(_mm_hadd_epi32(tmp0, tmp0));
 }
+
 #else
 template <>
 EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
@@ -1964,15 +1734,9 @@
   return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a, a)));
 }
 template <>
-EIGEN_STRONG_INLINE int64_t predux_mul<Packet2l>(const Packet2l& a) {
-  EIGEN_ALIGN16 int64_t aux[2];
-  pstore(aux, a);
-  return aux[0] * aux[1];
-}
-template <>
 EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) {
   // after some experiments, it is seems this is the fastest way to implement it
-  // for GCC (e.g., reusing pmul is very slow!)
+  // for GCC (eg., reusing pmul is very slow !)
   // TODO try to call _mm_mul_epu32 directly
   EIGEN_ALIGN16 int aux[4];
   pstore(aux, a);
@@ -2083,21 +1847,11 @@
 // }
 
 template <>
-EIGEN_STRONG_INLINE bool predux_any(const Packet2d& x) {
-  return _mm_movemask_pd(x) != 0x0;
-}
-
-template <>
 EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) {
   return _mm_movemask_ps(x) != 0x0;
 }
 
 template <>
-EIGEN_STRONG_INLINE bool predux_any(const Packet2l& x) {
-  return _mm_movemask_pd(_mm_castsi128_pd(x)) != 0x0;
-}
-
-template <>
 EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x) {
   return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
 }
@@ -2106,23 +1860,17 @@
   return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
 }
 
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
   _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
 }
 
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
   __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
   kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
   kernel.packet[1] = tmp;
 }
 
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2l, 2>& kernel) {
-  __m128i tmp = _mm_unpackhi_epi64(kernel.packet[0], kernel.packet[1]);
-  kernel.packet[0] = _mm_unpacklo_epi64(kernel.packet[0], kernel.packet[1]);
-  kernel.packet[1] = tmp;
-}
-
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
   __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
   __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
   __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
@@ -2133,11 +1881,11 @@
   kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
   kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
 }
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {
   ptranspose((PacketBlock<Packet4i, 4>&)kernel);
 }
 
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16b, 4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16b, 4>& kernel) {
   __m128i T0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]);
   __m128i T1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]);
   __m128i T2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]);
@@ -2148,7 +1896,7 @@
   kernel.packet[3] = _mm_unpackhi_epi16(T1, T3);
 }
 
-EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16b, 16>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16b, 16>& kernel) {
   // If we number the elements in the input thus:
   // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f}
   // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f}
@@ -2235,18 +1983,6 @@
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket,
-                                    const Packet2l& elsePacket) {
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]);
-  __m128i false_mask = pcmp_eq<Packet2l>(select, zero);
-#ifdef EIGEN_VECTORIZE_SSE4_1
-  return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
-#else
-  return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
-#endif
-}
-template <>
 EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
                                     const Packet4i& elsePacket) {
   const __m128i zero = _mm_setzero_si128();
@@ -2453,6 +2189,11 @@
     HasMax    = 0,
     HasConj   = 0,
     HasSetLinear = 0,
+    HasSqrt = 0,
+    HasRsqrt = 0,
+    HasExp = 0,
+    HasLog = 0,
+    HasBlend = 0
   };
 };
 
diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
index 9a7732a..cbc6d47 100644
--- a/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -37,13 +37,6 @@
 struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
 template <>
 struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
-
-#ifndef EIGEN_VECTORIZE_AVX2
-template <>
-struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
-template <>
-struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
-#endif
 #endif
 
 template <>
@@ -87,22 +80,6 @@
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
-#if EIGEN_ARCH_x86_64
-  return _mm_set_epi64x(_mm_cvttsd_si64(preverse(a)), _mm_cvttsd_si64(a));
-#else
-  return _mm_set_epi64x(static_cast<int64_t>(pfirst(preverse(a))), static_cast<int64_t>(pfirst(a)));
-#endif
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {
-  EIGEN_ALIGN16 int64_t aux[2];
-  pstore(aux, a);
-  return _mm_set_pd(static_cast<double>(aux[1]), static_cast<double>(aux[0]));
-}
-
-template <>
 EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
   return _mm_cvtepi32_ps(a);
 }
@@ -150,15 +127,6 @@
 }
 
 template <>
-EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
-  return _mm_castsi128_pd(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
-  return _mm_castpd_si128(a);
-}
-
-template <>
 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
   return _mm_castpd_si128(a);
 }
@@ -172,7 +140,6 @@
 EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
   return Packet4i(a);
 }
-
 // Disable the following code since it's broken on too many platforms / compilers.
 // #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
 #if 0
diff --git a/Eigen/src/Core/arch/SVE/MathFunctions.h b/Eigen/src/Core/arch/SVE/MathFunctions.h
index 8c8ed84..b095275 100644
--- a/Eigen/src/Core/arch/SVE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SVE/MathFunctions.h
@@ -39,9 +39,8 @@
 // Hyperbolic Tangent function.
 template <>
 EIGEN_STRONG_INLINE PacketXf ptanh<PacketXf>(const PacketXf& x) {
-  return ptanh_float(x);
+  return internal::generic_fast_tanh_float(x);
 }
-
 }  // end namespace internal
 }  // end namespace Eigen
 
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index 9b89747..e8bd17d 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -61,7 +61,6 @@
     HasMul = 1,
     HasDiv = 1,
     HasLog = 1,
-    HasExp = 1,
     HasNegate = 1,
     HasAbs = 0,
     HasAbs2 = 0,
@@ -437,11 +436,6 @@
   return plog_complex(a, b);
 }
 
-template <>
-EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
-  return pexp_complex(a, b);
-}
-
 EIGEN_STRONG_INLINE Packet2cf pcplxflip /*<Packet2cf>*/ (const Packet2cf& x) {
   Packet2cf res;
   res.cd[0] = pcplxflip(x.cd[0]);
diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
index 32e0425..5c55350 100644
--- a/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -220,7 +220,7 @@
 // Hyperbolic Tangent function.
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x) {
-  return ptanh_float(x);
+  return internal::generic_fast_tanh_float(x);
 }
 
 }  // end namespace internal
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2f9b920..a3fc44c 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -286,10 +286,9 @@
 template <typename Scalar>
 struct scalar_real_ref_op {
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const {
-    return numext::real_ref(a);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const {
+    return numext::real_ref(*const_cast<Scalar*>(&a));
   }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::real_ref(a); }
 };
 template <typename Scalar>
 struct functor_traits<scalar_real_ref_op<Scalar>> {
@@ -304,9 +303,8 @@
 template <typename Scalar>
 struct scalar_imag_ref_op {
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(Scalar& a) const { return numext::imag_ref(a); }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type& operator()(const Scalar& a) const {
-    return numext::imag_ref(a);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type& operator()(const Scalar& a) const {
+    return numext::imag_ref(*const_cast<Scalar*>(&a));
   }
 };
 template <typename Scalar>
@@ -1126,7 +1124,7 @@
 
 // TODO(rmlarsen): Enable the following on host when integer_packet is defined
 // for the relevant packet types.
-#ifndef EIGEN_GPUCC
+#ifdef EIGEN_GPU_CC
 
 /** \internal
  * \brief Template specialization of the logistic function for float.
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index e9d0cae..55fa5ff 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -69,7 +69,7 @@
     gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
     gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
 
-#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL))
+#if defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)
     if (info) {
       // this is the parallel version!
       int tid = info->logical_thread_id;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index e138535..f569907 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -84,7 +84,7 @@
                                         const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, \
                                         EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {           \
       /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/                                    \
-      if (size == 0 || depth == 0) return;                                                                          \
+                                                                                                                    \
       BlasIndex lda = convert_index<BlasIndex>(lhsStride), ldc = convert_index<BlasIndex>(resStride),               \
                 n = convert_index<BlasIndex>(size), k = convert_index<BlasIndex>(depth);                            \
       char uplo = ((IsLower) ? 'L' : 'U'), trans = ((AStorageOrder == RowMajor) ? 'T' : 'N');                       \
@@ -107,7 +107,7 @@
                                         const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, \
                                         EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {           \
       typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType;                                          \
-      if (size == 0 || depth == 0) return;                                                                          \
+                                                                                                                    \
       BlasIndex lda = convert_index<BlasIndex>(lhsStride), ldc = convert_index<BlasIndex>(resStride),               \
                 n = convert_index<BlasIndex>(size), k = convert_index<BlasIndex>(depth);                            \
       char uplo = ((IsLower) ? 'L' : 'U'), trans = ((AStorageOrder == RowMajor) ? 'C' : 'N');                       \
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index 56743da..af64fd2 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -59,7 +59,7 @@
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                   \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, GemmParallelInfo<Index>* /*info = 0*/) {       \
       using std::conj;                                                                                              \
-      if (rows == 0 || cols == 0 || depth == 0) return;                                                             \
+                                                                                                                    \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                           \
       eigen_assert(resIncr == 1);                                                                                   \
       char transa, transb;                                                                                          \
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index 4010a0a..556c6ac 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -95,7 +95,6 @@
                                                                                                                     \
     static void run(Index rows, Index cols, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* rhs, Index rhsIncr, \
                     EIGTYPE* res, Index resIncr, EIGTYPE alpha) {                                                   \
-      if (rows == 0 || cols == 0) return;                                                                           \
       BlasIndex m = convert_index<BlasIndex>(rows), n = convert_index<BlasIndex>(cols),                             \
                 lda = convert_index<BlasIndex>(lhsStride), incx = convert_index<BlasIndex>(rhsIncr),                \
                 incy = convert_index<BlasIndex>(resIncr);                                                           \
@@ -112,9 +111,8 @@
         x_tmp = map_x.conjugate();                                                                                  \
         x_ptr = x_tmp.data();                                                                                       \
         incx = 1;                                                                                                   \
-      } else {                                                                                                      \
+      } else                                                                                                        \
         x_ptr = rhs;                                                                                                \
-      }                                                                                                             \
       BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda,               \
                (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy);     \
     }                                                                                                               \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index c0dbfd1..25daba6 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -49,7 +49,6 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,           \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                           \
-      if (rows == 0 || cols == 0) return;                                                                        \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                        \
       eigen_assert(resIncr == 1);                                                                                \
       char side = 'L', uplo = 'L';                                                                               \
@@ -92,7 +91,6 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,             \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                  \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                             \
-      if (rows == 0 || cols == 0) return;                                                                          \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                          \
       eigen_assert(resIncr == 1);                                                                                  \
       char side = 'L', uplo = 'L';                                                                                 \
@@ -166,7 +164,6 @@
     static void run(Index rows, Index cols, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs,           \
                     Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha,                \
                     level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {                                           \
-      if (rows == 0 || cols == 0) return;                                                                        \
       EIGEN_ONLY_USED_FOR_DEBUG(resIncr);                                                                        \
       eigen_assert(resIncr == 1);                                                                                \
       char side = 'R', uplo = 'L';                                                                               \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 187c911..c3311da 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -78,7 +78,6 @@
                                                                                                                    \
     static void run(Index size, const EIGTYPE* lhs, Index lhsStride, const EIGTYPE* _rhs, EIGTYPE* res,            \
                     EIGTYPE alpha) {                                                                               \
-      if (size == 0) return;                                                                                       \
       enum { IsRowMajor = StorageOrder == RowMajor ? 1 : 0, IsLower = UpLo == Lower ? 1 : 0 };                     \
       BlasIndex n = convert_index<BlasIndex>(size), lda = convert_index<BlasIndex>(lhsStride), incx = 1, incy = 1; \
       EIGTYPE beta(1);                                                                                             \
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index 3d612b0..78e48ad 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -90,7 +90,6 @@
     static void run(Index _rows, Index _cols, Index _depth, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs, \
                     Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha,                                     \
                     level3_blocking<EIGTYPE, EIGTYPE>& blocking) {                                                     \
-      if (_rows == 0 || _cols == 0 || _depth == 0) return;                                                             \
       Index diagSize = (std::min)(_rows, _depth);                                                                      \
       Index rows = IsLower ? _rows : diagSize;                                                                         \
       Index depth = IsLower ? diagSize : _depth;                                                                       \
@@ -212,7 +211,6 @@
     static void run(Index _rows, Index _cols, Index _depth, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs, \
                     Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha,                                     \
                     level3_blocking<EIGTYPE, EIGTYPE>& blocking) {                                                     \
-      if (_rows == 0 || _cols == 0 || _depth == 0) return;                                                             \
       Index diagSize = (std::min)(_cols, _depth);                                                                      \
       Index rows = _rows;                                                                                              \
       Index depth = IsLower ? _depth : diagSize;                                                                       \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 05a5827..413f0ee 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -287,39 +287,21 @@
 
     constexpr bool DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1;
 
-    const RhsScalar* actualRhsPtr = actualRhs.data();
-
-    // Potentially create a temporary buffer to copy RHS to contiguous memory.
     gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,
                           ActualRhsTypeCleaned::MaxSizeAtCompileTime, !DirectlyUseRhs>
-        static_rhs;  // Fixed-sized array.
-    RhsScalar* buffer = nullptr;
+        static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(
+        RhsScalar, actualRhsPtr, actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
     if (!DirectlyUseRhs) {
-      // Maybe used fixed-sized buffer, otherwise allocate.
-      if (static_rhs.data() != nullptr) {
-        buffer = static_rhs.data();
-      } else {
-        // Allocate either with alloca or malloc.
-        Eigen::internal::check_size_for_overflow<RhsScalar>(actualRhs.size());
-#ifdef EIGEN_ALLOCA
-        buffer = static_cast<RhsScalar*>((sizeof(RhsScalar) * actualRhs.size() <= EIGEN_STACK_ALLOCATION_LIMIT)
-                                             ? EIGEN_ALIGNED_ALLOCA(sizeof(RhsScalar) * actualRhs.size())
-                                             : Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
-#else
-        buffer = static_cast<RhsScalar*>(Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
-#endif
-      }
 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       Index size = actualRhs.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
 #endif
-      Map<typename ActualRhsTypeCleaned::PlainObject, Eigen::AlignedMax>(buffer, actualRhs.size()) = actualRhs;
-      actualRhsPtr = buffer;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     }
-    // Deallocate only if malloced.
-    Eigen::internal::aligned_stack_memory_handler<RhsScalar> buffer_stack_memory_destructor(
-        buffer, actualRhs.size(),
-        !DirectlyUseRhs && static_rhs.data() == nullptr && actualRhs.size() > EIGEN_STACK_ALLOCATION_LIMIT);
 
     internal::triangular_matrix_vector_product<Index, Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar,
                                                RhsBlasTraits::NeedToConjugate, RowMajor>::run(actualLhs.rows(),
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 1de6880..0c1d56b 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -87,7 +87,6 @@
     };                                                                                                               \
     static void run(Index rows_, Index cols_, const EIGTYPE* lhs_, Index lhsStride, const EIGTYPE* rhs_,             \
                     Index rhsIncr, EIGTYPE* res_, Index resIncr, EIGTYPE alpha) {                                    \
-      if (rows_ == 0 || cols_ == 0) return;                                                                          \
       if (ConjLhs || IsZeroDiag) {                                                                                   \
         triangular_matrix_vector_product<Index, Mode, EIGTYPE, ConjLhs, EIGTYPE, ConjRhs, ColMajor, BuiltIn>::run(   \
             rows_, cols_, lhs_, lhsStride, rhs_, rhsIncr, res_, resIncr, alpha);                                     \
@@ -184,7 +183,6 @@
     };                                                                                                               \
     static void run(Index rows_, Index cols_, const EIGTYPE* lhs_, Index lhsStride, const EIGTYPE* rhs_,             \
                     Index rhsIncr, EIGTYPE* res_, Index resIncr, EIGTYPE alpha) {                                    \
-      if (rows_ == 0 || cols_ == 0) return;                                                                          \
       if (IsZeroDiag) {                                                                                              \
         triangular_matrix_vector_product<Index, Mode, EIGTYPE, ConjLhs, EIGTYPE, ConjRhs, RowMajor, BuiltIn>::run(   \
             rows_, cols_, lhs_, lhsStride, rhs_, rhsIncr, res_, resIncr, alpha);                                     \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 9cc15fb..ce8fcb9 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -52,7 +52,6 @@
     };                                                                                                              \
     static void run(Index size, Index otherSize, const EIGTYPE* _tri, Index triStride, EIGTYPE* _other,             \
                     Index otherIncr, Index otherStride, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {          \
-      if (size == 0 || otherSize == 0) return;                                                                      \
       EIGEN_ONLY_USED_FOR_DEBUG(otherIncr);                                                                         \
       eigen_assert(otherIncr == 1);                                                                                 \
       BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb;              \
@@ -111,7 +110,6 @@
     };                                                                                                              \
     static void run(Index size, Index otherSize, const EIGTYPE* _tri, Index triStride, EIGTYPE* _other,             \
                     Index otherIncr, Index otherStride, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) {          \
-      if (size == 0 || otherSize == 0) return;                                                                      \
       EIGEN_ONLY_USED_FOR_DEBUG(otherIncr);                                                                         \
       eigen_assert(otherIncr == 1);                                                                                 \
       BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb;              \
diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index 1c72173..e692438 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@@ -266,9 +266,6 @@
 #ifdef __AVX512BF16__
 #define EIGEN_VECTORIZE_AVX512BF16
 #endif
-#ifdef __AVX512VL__
-#define EIGEN_VECTORIZE_AVX512VL
-#endif
 #ifdef __AVX512FP16__
 #ifdef __AVX512VL__
 #define EIGEN_VECTORIZE_AVX512FP16
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 9f4a2d8..8b06c67 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -29,9 +29,9 @@
  */
 const int DynamicIndex = 0xffffff;
 
-/** This value means that the requested value is not defined.
+/** This value means that the increment to go from one value to another in a sequence is not constant for each step.
  */
-const int Undefined = 0xfffffe;
+const int UndefinedIncr = 0xfffffe;
 
 /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
  * The value Infinity there means the L-infinity norm.
diff --git a/Eigen/src/Core/util/EmulateArray.h b/Eigen/src/Core/util/EmulateArray.h
index f2fd10b..2b11552 100644
--- a/Eigen/src/Core/util/EmulateArray.h
+++ b/Eigen/src/Core/util/EmulateArray.h
@@ -27,14 +27,16 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE iterator end() { return values + n; }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_iterator end() const { return values + n; }
 
+#if !defined(EIGEN_GPUCC)
   typedef std::reverse_iterator<iterator> reverse_iterator;
   typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
 
-  EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); }
-  EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rbegin() { return reverse_iterator(end()); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
 
-  EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
-  EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE reverse_iterator rend() { return reverse_iterator(begin()); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+#endif
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& operator[](size_t index) {
     eigen_internal_assert(index < size());
@@ -202,19 +204,19 @@
 
 template <class T, std::size_t N>
 struct array_size<array<T, N> > {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 template <class T, std::size_t N>
 struct array_size<array<T, N>&> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 template <class T, std::size_t N>
 struct array_size<const array<T, N> > {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 template <class T, std::size_t N>
 struct array_size<const array<T, N>&> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 
 }  // end namespace internal
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 2f2ba9b..c312939 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -91,8 +91,6 @@
 class IndexedView;
 template <typename XprType, int Rows = Dynamic, int Cols = Dynamic, int Order = 0>
 class Reshaped;
-template <typename FirstType, typename SizeType, typename IncrType>
-class ArithmeticSequence;
 
 template <typename MatrixType, int Size = Dynamic>
 class VectorBlock;
diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h
index c187002..3b45108 100644
--- a/Eigen/src/Core/util/IndexedViewHelper.h
+++ b/Eigen/src/Core/util/IndexedViewHelper.h
@@ -17,9 +17,6 @@
 
 namespace internal {
 struct symbolic_last_tag {};
-
-struct all_t {};
-
 }  // namespace internal
 
 namespace placeholders {
@@ -45,7 +42,126 @@
  *
  * \sa end
  */
-static constexpr const last_t last;
+static const last_t last;
+
+}  // namespace placeholders
+
+namespace internal {
+
+// Replace symbolic last/end "keywords" by their true runtime value
+inline Index eval_expr_given_size(Index x, Index /* size */) { return x; }
+
+template <int N>
+FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) {
+  return x;
+}
+
+template <typename Derived>
+Index eval_expr_given_size(const symbolic::BaseExpr<Derived>& x, Index size) {
+  return x.derived().eval(Eigen::placeholders::last = size - 1);
+}
+
+// Extract increment/step at compile time
+template <typename T, typename EnableIf = void>
+struct get_compile_time_incr {
+  enum { value = UndefinedIncr };
+};
+
+// Analogue of std::get<0>(x), but tailored for our needs.
+template <typename T>
+EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT {
+  return x.first();
+}
+
+// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by
+// MatrixSlice The generic implementation is a no-op
+template <typename T, int XprSize, typename EnableIf = void>
+struct IndexedViewCompatibleType {
+  typedef T type;
+};
+
+template <typename T, typename Q>
+const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) {
+  return x;
+}
+
+//--------------------------------------------------------------------------------
+// Handling of a single Index
+//--------------------------------------------------------------------------------
+
+struct SingleRange {
+  enum { SizeAtCompileTime = 1 };
+  SingleRange(Index val) : m_value(val) {}
+  Index operator[](Index) const { return m_value; }
+  static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; }
+  Index first() const EIGEN_NOEXCEPT { return m_value; }
+  Index m_value;
+};
+
+template <>
+struct get_compile_time_incr<SingleRange> {
+  enum { value = 1 };  // 1 or 0 ??
+};
+
+// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int)
+// methods)
+template <typename T, int XprSize>
+struct IndexedViewCompatibleType<T, XprSize, std::enable_if_t<internal::is_integral<T>::value>> {
+  // Here we could simply use Array, but maybe it's less work for the compiler to use
+  // a simpler wrapper as SingleRange
+  // typedef Eigen::Array<Index,1,1> type;
+  typedef SingleRange type;
+};
+
+template <typename T, int XprSize>
+struct IndexedViewCompatibleType<T, XprSize, std::enable_if_t<symbolic::is_symbolic<T>::value>> {
+  typedef SingleRange type;
+};
+
+template <typename T>
+std::enable_if_t<symbolic::is_symbolic<T>::value, SingleRange> makeIndexedViewCompatible(const T& id, Index size,
+                                                                                         SpecializedType) {
+  return eval_expr_given_size(id, size);
+}
+
+//--------------------------------------------------------------------------------
+// Handling of all
+//--------------------------------------------------------------------------------
+
+struct all_t {
+  all_t() {}
+};
+
+// Convert a symbolic 'all' into a usable range type
+template <int XprSize>
+struct AllRange {
+  enum { SizeAtCompileTime = XprSize };
+  AllRange(Index size = XprSize) : m_size(size) {}
+  EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; }
+  EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); }
+  EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; }
+  variable_if_dynamic<Index, XprSize> m_size;
+};
+
+template <int XprSize>
+struct IndexedViewCompatibleType<all_t, XprSize> {
+  typedef AllRange<XprSize> type;
+};
+
+template <typename XprSizeType>
+inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t, XprSizeType size,
+                                                                               SpecializedType) {
+  return AllRange<get_fixed_value<XprSizeType>::value>(size);
+}
+
+template <int Size>
+struct get_compile_time_incr<AllRange<Size>> {
+  enum { value = 1 };
+};
+
+}  // end namespace internal
+
+namespace placeholders {
 
 typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,
                           symbolic::ValueExpr<Eigen::internal::FixedInt<1>>>
@@ -65,251 +181,28 @@
  * \sa last
  */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
-static constexpr auto lastp1 = last + fix<1>;
+static const auto lastp1 = last + fix<1>;
 #else
 // Using a FixedExpr<1> expression is important here to make sure the compiler
 // can fully optimize the computation starting indices with zero overhead.
-static constexpr lastp1_t lastp1(last + fix<1>());
+static const lastp1_t lastp1(last + fix<1>());
 #endif
 
 /** \var end
  * \ingroup Core_Module
  * \sa lastp1
  */
-static constexpr lastp1_t end = lastp1;
+static const lastp1_t end = lastp1;
 
 /** \var all
  * \ingroup Core_Module
  * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or
  * columns
  */
-static constexpr Eigen::internal::all_t all;
+static const Eigen::internal::all_t all;
 
 }  // namespace placeholders
 
-namespace internal {
-
-// Evaluate a symbolic expression or constant given the "size" of an object, allowing
-// any symbols like `last` to be evaluated.  The default here assumes a dynamic constant.
-template <typename Expr, int SizeAtCompileTime, typename EnableIf = void>
-struct SymbolicExpressionEvaluator {
-  static constexpr Index ValueAtCompileTime = Undefined;
-  static Index eval(const Expr& expr, Index /*size*/) { return static_cast<Index>(expr); }
-};
-
-// Symbolic expression with size known at compile-time.
-template <typename Expr, int SizeAtCompileTime>
-struct SymbolicExpressionEvaluator<Expr, SizeAtCompileTime, std::enable_if_t<symbolic::is_symbolic<Expr>::value>> {
-  static constexpr Index ValueAtCompileTime =
-      Expr::Derived::eval_at_compile_time(Eigen::placeholders::last = fix<SizeAtCompileTime - 1>);
-  static Index eval(const Expr& expr, Index /*size*/) {
-    return expr.eval(Eigen::placeholders::last = fix<SizeAtCompileTime - 1>);
-  }
-};
-
-// Symbolic expression with dynamic size.
-template <typename Expr>
-struct SymbolicExpressionEvaluator<Expr, Dynamic, std::enable_if_t<symbolic::is_symbolic<Expr>::value>> {
-  static constexpr Index ValueAtCompileTime = Undefined;
-  static Index eval(const Expr& expr, Index size) { return expr.eval(Eigen::placeholders::last = size - 1); }
-};
-
-// Fixed int.
-template <int N, int SizeAtCompileTime>
-struct SymbolicExpressionEvaluator<FixedInt<N>, SizeAtCompileTime, void> {
-  static constexpr Index ValueAtCompileTime = static_cast<Index>(N);
-  static Index eval(const FixedInt<N>& /*expr*/, Index /*size*/) { return ValueAtCompileTime; }
-};
-
-//--------------------------------------------------------------------------------
-// Handling of generic indices (e.g. array)
-//--------------------------------------------------------------------------------
-
-// Potentially wrap indices in a type that is better-suited for IndexedView evaluation.
-template <typename Indices, int NestedSizeAtCompileTime, typename EnableIf = void>
-struct IndexedViewHelperIndicesWrapper {
-  using type = Indices;
-  static const type& CreateIndexSequence(const Indices& indices, Index /*nested_size*/) { return indices; }
-};
-
-// Extract compile-time and runtime first, size, increments.
-template <typename Indices, typename EnableIf = void>
-struct IndexedViewHelper {
-  static constexpr Index FirstAtCompileTime = Undefined;
-  static constexpr Index SizeAtCompileTime = array_size<Indices>::value;
-  static constexpr Index IncrAtCompileTime = Undefined;
-
-  static constexpr Index first(const Indices& indices) { return static_cast<Index>(indices[0]); }
-  static constexpr Index size(const Indices& indices) { return index_list_size(indices); }
-  static constexpr Index incr(const Indices& /*indices*/) { return Undefined; }
-};
-
-//--------------------------------------------------------------------------------
-// Handling of ArithmeticSequence
-//--------------------------------------------------------------------------------
-
-template <Index FirstAtCompileTime_, Index SizeAtCompileTime_, Index IncrAtCompileTime_>
-class ArithmeticSequenceRange {
- public:
-  static constexpr Index FirstAtCompileTime = FirstAtCompileTime_;
-  static constexpr Index SizeAtCompileTime = SizeAtCompileTime_;
-  static constexpr Index IncrAtCompileTime = IncrAtCompileTime_;
-
-  constexpr ArithmeticSequenceRange(Index first, Index size, Index incr) : first_{first}, size_{size}, incr_{incr} {}
-  constexpr Index operator[](Index i) const { return first() + i * incr(); }
-  constexpr Index first() const noexcept { return first_.value(); }
-  constexpr Index size() const noexcept { return size_.value(); }
-  constexpr Index incr() const noexcept { return incr_.value(); }
-
- private:
-  variable_if_dynamicindex<Index, int(FirstAtCompileTime)> first_;
-  variable_if_dynamic<Index, int(SizeAtCompileTime)> size_;
-  variable_if_dynamicindex<Index, int(IncrAtCompileTime)> incr_;
-};
-
-template <typename FirstType, typename SizeType, typename IncrType, int NestedSizeAtCompileTime>
-struct IndexedViewHelperIndicesWrapper<ArithmeticSequence<FirstType, SizeType, IncrType>, NestedSizeAtCompileTime,
-                                       void> {
-  static constexpr Index EvalFirstAtCompileTime =
-      SymbolicExpressionEvaluator<FirstType, NestedSizeAtCompileTime>::ValueAtCompileTime;
-  static constexpr Index EvalSizeAtCompileTime =
-      SymbolicExpressionEvaluator<SizeType, NestedSizeAtCompileTime>::ValueAtCompileTime;
-  static constexpr Index EvalIncrAtCompileTime =
-      SymbolicExpressionEvaluator<IncrType, NestedSizeAtCompileTime>::ValueAtCompileTime;
-
-  static constexpr Index FirstAtCompileTime =
-      (int(EvalFirstAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalFirstAtCompileTime;
-  static constexpr Index SizeAtCompileTime =
-      (int(EvalSizeAtCompileTime) == Undefined) ? Index(Dynamic) : EvalSizeAtCompileTime;
-  static constexpr Index IncrAtCompileTime =
-      (int(EvalIncrAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalIncrAtCompileTime;
-
-  using Indices = ArithmeticSequence<FirstType, SizeType, IncrType>;
-  using type = ArithmeticSequenceRange<FirstAtCompileTime, SizeAtCompileTime, IncrAtCompileTime>;
-
-  static type CreateIndexSequence(const Indices& indices, Index nested_size) {
-    Index first =
-        SymbolicExpressionEvaluator<FirstType, NestedSizeAtCompileTime>::eval(indices.firstObject(), nested_size);
-    Index size =
-        SymbolicExpressionEvaluator<SizeType, NestedSizeAtCompileTime>::eval(indices.sizeObject(), nested_size);
-    Index incr =
-        SymbolicExpressionEvaluator<IncrType, NestedSizeAtCompileTime>::eval(indices.incrObject(), nested_size);
-    return type(first, size, incr);
-  }
-};
-
-template <Index FirstAtCompileTime_, Index SizeAtCompileTime_, Index IncrAtCompileTime_>
-struct IndexedViewHelper<ArithmeticSequenceRange<FirstAtCompileTime_, SizeAtCompileTime_, IncrAtCompileTime_>, void> {
- public:
-  using Indices = ArithmeticSequenceRange<FirstAtCompileTime_, SizeAtCompileTime_, IncrAtCompileTime_>;
-  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
-  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
-  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
-  static Index first(const Indices& indices) { return indices.first(); }
-  static Index size(const Indices& indices) { return indices.size(); }
-  static Index incr(const Indices& indices) { return indices.incr(); }
-};
-
-//--------------------------------------------------------------------------------
-// Handling of a single index.
-//--------------------------------------------------------------------------------
-
-template <Index ValueAtCompileTime>
-class SingleRange {
- public:
-  static constexpr Index FirstAtCompileTime = ValueAtCompileTime;
-  static constexpr Index SizeAtCompileTime = Index(1);
-  static constexpr Index IncrAtCompileTime = Index(1);  // Needs to be 1 to be treated as block-like.
-
-  constexpr SingleRange(Index v) noexcept : value_(v) {}
-  constexpr Index operator[](Index) const noexcept { return first(); }
-  constexpr Index first() const noexcept { return value_.value(); }
-  constexpr Index size() const noexcept { return SizeAtCompileTime; }
-  constexpr Index incr() const noexcept { return IncrAtCompileTime; }
-
- private:
-  variable_if_dynamicindex<Index, int(ValueAtCompileTime)> value_;
-};
-
-template <typename T>
-struct is_single_range : public std::false_type {};
-
-template <Index ValueAtCompileTime>
-struct is_single_range<SingleRange<ValueAtCompileTime>> : public std::true_type {};
-
-template <typename SingleIndex, int NestedSizeAtCompileTime>
-struct IndexedViewHelperIndicesWrapper<
-    SingleIndex, NestedSizeAtCompileTime,
-    std::enable_if_t<std::is_integral<SingleIndex>::value || symbolic::is_symbolic<SingleIndex>::value>> {
-  static constexpr Index EvalValueAtCompileTime =
-      SymbolicExpressionEvaluator<SingleIndex, NestedSizeAtCompileTime>::ValueAtCompileTime;
-  static constexpr Index ValueAtCompileTime =
-      (int(EvalValueAtCompileTime) == Undefined) ? Index(DynamicIndex) : EvalValueAtCompileTime;
-  using type = SingleRange<ValueAtCompileTime>;
-  static type CreateIndexSequence(const SingleIndex& index, Index nested_size) {
-    return type(SymbolicExpressionEvaluator<SingleIndex, NestedSizeAtCompileTime>::eval(index, nested_size));
-  }
-};
-
-template <int N, int NestedSizeAtCompileTime>
-struct IndexedViewHelperIndicesWrapper<FixedInt<N>, NestedSizeAtCompileTime, void> {
-  using type = SingleRange<Index(N)>;
-  static type CreateIndexSequence(const FixedInt<N>& /*index*/) { return type(Index(N)); }
-};
-
-template <Index ValueAtCompileTime>
-struct IndexedViewHelper<SingleRange<ValueAtCompileTime>, void> {
-  using Indices = SingleRange<ValueAtCompileTime>;
-  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
-  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
-  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
-
-  static constexpr Index first(const Indices& indices) { return indices.first(); }
-  static constexpr Index size(const Indices& /*indices*/) { return SizeAtCompileTime; }
-  static constexpr Index incr(const Indices& /*indices*/) { return IncrAtCompileTime; }
-};
-
-//--------------------------------------------------------------------------------
-// Handling of all
-//--------------------------------------------------------------------------------
-
-// Convert a symbolic 'all' into a usable range type
-template <Index SizeAtCompileTime_>
-class AllRange {
- public:
-  static constexpr Index FirstAtCompileTime = Index(0);
-  static constexpr Index SizeAtCompileTime = SizeAtCompileTime_;
-  static constexpr Index IncrAtCompileTime = Index(1);
-  constexpr AllRange(Index size) : size_(size) {}
-  constexpr Index operator[](Index i) const noexcept { return i; }
-  constexpr Index first() const noexcept { return FirstAtCompileTime; }
-  constexpr Index size() const noexcept { return size_.value(); }
-  constexpr Index incr() const noexcept { return IncrAtCompileTime; }
-
- private:
-  variable_if_dynamic<Index, int(SizeAtCompileTime)> size_;
-};
-
-template <int NestedSizeAtCompileTime>
-struct IndexedViewHelperIndicesWrapper<all_t, NestedSizeAtCompileTime, void> {
-  using type = AllRange<Index(NestedSizeAtCompileTime)>;
-  static type CreateIndexSequence(const all_t& /*indices*/, Index nested_size) { return type(nested_size); }
-};
-
-template <Index SizeAtCompileTime_>
-struct IndexedViewHelper<AllRange<SizeAtCompileTime_>, void> {
-  using Indices = AllRange<SizeAtCompileTime_>;
-  static constexpr Index FirstAtCompileTime = Indices::FirstAtCompileTime;
-  static constexpr Index SizeAtCompileTime = Indices::SizeAtCompileTime;
-  static constexpr Index IncrAtCompileTime = Indices::IncrAtCompileTime;
-
-  static Index first(const Indices& indices) { return indices.first(); }
-  static Index size(const Indices& indices) { return indices.size(); }
-  static Index incr(const Indices& indices) { return indices.incr(); }
-};
-
-}  // end namespace internal
-
 }  // end namespace Eigen
 
 #endif  // EIGEN_INDEXED_VIEW_HELPER_H
diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h
index 2eb5fd9..279d553 100644
--- a/Eigen/src/Core/util/IntegralConstant.h
+++ b/Eigen/src/Core/util/IntegralConstant.h
@@ -54,60 +54,65 @@
 template <int N>
 class FixedInt {
  public:
-  static constexpr int value = N;
-  constexpr operator int() const { return N; }
+  static const int value = N;
+  EIGEN_CONSTEXPR operator int() const { return value; }
 
-  constexpr FixedInt() = default;
-  constexpr FixedInt(std::integral_constant<int, N>) {}
+  EIGEN_CONSTEXPR
+  FixedInt() = default;
 
-  constexpr FixedInt(VariableAndFixedInt<N> other) {
+  EIGEN_CONSTEXPR
+  FixedInt(std::integral_constant<int, N>) {}
+
+  EIGEN_CONSTEXPR
+  FixedInt(VariableAndFixedInt<N> other) {
 #ifndef EIGEN_INTERNAL_DEBUGGING
     EIGEN_UNUSED_VARIABLE(other);
 #endif
     eigen_internal_assert(int(other) == N);
   }
 
-  constexpr FixedInt<-N> operator-() const { return FixedInt<-N>(); }
+  EIGEN_CONSTEXPR
+  FixedInt<-N> operator-() const { return FixedInt<-N>(); }
 
   template <int M>
-  constexpr FixedInt<N + M> operator+(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N + M> operator+(FixedInt<M>) const {
     return FixedInt<N + M>();
   }
 
   template <int M>
-  constexpr FixedInt<N - M> operator-(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N - M> operator-(FixedInt<M>) const {
     return FixedInt<N - M>();
   }
 
   template <int M>
-  constexpr FixedInt<N * M> operator*(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N * M> operator*(FixedInt<M>) const {
     return FixedInt<N * M>();
   }
 
   template <int M>
-  constexpr FixedInt<N / M> operator/(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N / M> operator/(FixedInt<M>) const {
     return FixedInt<N / M>();
   }
 
   template <int M>
-  constexpr FixedInt<N % M> operator%(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N % M> operator%(FixedInt<M>) const {
     return FixedInt<N % M>();
   }
 
   template <int M>
-  constexpr FixedInt<N | M> operator|(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N | M> operator|(FixedInt<M>) const {
     return FixedInt<N | M>();
   }
 
   template <int M>
-  constexpr FixedInt<N & M> operator&(FixedInt<M>) const {
+  EIGEN_CONSTEXPR FixedInt<N & M> operator&(FixedInt<M>) const {
     return FixedInt<N & M>();
   }
 
   // Needed in C++14 to allow fix<N>():
-  constexpr FixedInt operator()() const { return *this; }
+  EIGEN_CONSTEXPR FixedInt operator()() const { return *this; }
 
-  constexpr VariableAndFixedInt<N> operator()(int val) const { return VariableAndFixedInt<N>(val); }
+  VariableAndFixedInt<N> operator()(int val) const { return VariableAndFixedInt<N>(val); }
 };
 
 /** \internal
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 0236b51..030d99f 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -710,7 +710,7 @@
     (EIGEN_COMP_ICC && EIGEN_COMP_ICC < 1500) || (EIGEN_COMP_NVCC && EIGEN_COMP_NVCC < 80000) ||       \
     (EIGEN_COMP_CLANG_STRICT && EIGEN_COMP_CLANG < 390) ||                                             \
     (EIGEN_COMP_CLANGAPPLE && EIGEN_COMP_CLANGAPPLE < 9000000) || (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 510)
-#error Eigen requires at least c++14 support.
+#error This compiler appears to be too old to be supported by Eigen
 #endif
 
 // Does the compiler support C99?
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index d2336ce..859d2f1 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -303,30 +303,30 @@
  */
 template <typename T, typename EnableIf = void>
 struct array_size {
-  static constexpr Index value = Dynamic;
+  enum { value = Dynamic };
 };
 
 template <typename T>
 struct array_size<T, std::enable_if_t<((T::SizeAtCompileTime & 0) == 0)>> {
-  static constexpr Index value = T::SizeAtCompileTime;
+  enum { value = T::SizeAtCompileTime };
 };
 
 template <typename T, int N>
 struct array_size<const T (&)[N]> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 template <typename T, int N>
 struct array_size<T (&)[N]> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 
 template <typename T, std::size_t N>
 struct array_size<const std::array<T, N>> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 template <typename T, std::size_t N>
 struct array_size<std::array<T, N>> {
-  static constexpr Index value = N;
+  enum { value = N };
 };
 
 /** \internal
diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h
index befb485..136942c 100644
--- a/Eigen/src/Core/util/SymbolicIndex.h
+++ b/Eigen/src/Core/util/SymbolicIndex.h
@@ -44,8 +44,6 @@
 
 template <typename Tag>
 class Symbol;
-template <typename Tag, typename Type>
-class SymbolValue;
 template <typename Arg0>
 class NegateExpr;
 template <typename Arg1, typename Arg2>
@@ -54,123 +52,136 @@
 class ProductExpr;
 template <typename Arg1, typename Arg2>
 class QuotientExpr;
+
+// A simple wrapper around an integral value to provide the eval method.
+// We could also use a free-function symbolic_eval...
 template <typename IndexType = Index>
-class ValueExpr;
+class ValueExpr {
+ public:
+  ValueExpr(IndexType val) : m_value(val) {}
+  template <typename T>
+  IndexType eval_impl(const T&) const {
+    return m_value;
+  }
+
+ protected:
+  IndexType m_value;
+};
+
+// Specialization for compile-time value,
+// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
+template <int N>
+class ValueExpr<internal::FixedInt<N> > {
+ public:
+  ValueExpr() {}
+  template <typename T>
+  EIGEN_CONSTEXPR Index eval_impl(const T&) const {
+    return N;
+  }
+};
 
 /** \class BaseExpr
  * \ingroup Core_Module
  * Common base class of any symbolic expressions
  */
-template <typename Derived_>
+template <typename Derived>
 class BaseExpr {
  public:
-  using Derived = Derived_;
-  constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
+  const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
   /** Evaluate the expression given the \a values of the symbols.
    *
-   * \param values defines the values of the symbols, as constructed by SymbolExpr::operator= operator.
+   * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue
+   *               as constructed by SymbolExpr::operator= operator.
    *
    */
-  template <typename... Tags, typename... Types>
-  constexpr Index eval(const SymbolValue<Tags, Types>&... values) const {
-    return derived().eval_impl(values...);
+  template <typename T>
+  Index eval(const T& values) const {
+    return derived().eval_impl(values);
   }
 
-  /** Evaluate the expression at compile time given the \a values of the symbols.
-   *
-   * If a value is not known at compile-time, returns Eigen::Undefined.
-   *
-   */
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time(const SymbolValue<Tags, Types>&...) {
-    return Derived::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
+  template <typename... Types>
+  Index eval(Types&&... values) const {
+    return derived().eval_impl(std::make_tuple(values...));
   }
 
-  constexpr NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
+  NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
 
-  constexpr AddExpr<Derived, ValueExpr<>> operator+(Index b) const {
-    return AddExpr<Derived, ValueExpr<>>(derived(), b);
-  }
-  constexpr AddExpr<Derived, ValueExpr<>> operator-(Index a) const {
-    return AddExpr<Derived, ValueExpr<>>(derived(), -a);
-  }
-  constexpr ProductExpr<Derived, ValueExpr<>> operator*(Index a) const {
+  AddExpr<Derived, ValueExpr<> > operator+(Index b) const { return AddExpr<Derived, ValueExpr<> >(derived(), b); }
+  AddExpr<Derived, ValueExpr<> > operator-(Index a) const { return AddExpr<Derived, ValueExpr<> >(derived(), -a); }
+  ProductExpr<Derived, ValueExpr<> > operator*(Index a) const {
     return ProductExpr<Derived, ValueExpr<> >(derived(), a);
   }
-  constexpr QuotientExpr<Derived, ValueExpr<>> operator/(Index a) const {
+  QuotientExpr<Derived, ValueExpr<> > operator/(Index a) const {
     return QuotientExpr<Derived, ValueExpr<> >(derived(), a);
   }
 
-  friend constexpr AddExpr<Derived, ValueExpr<>> operator+(Index a, const BaseExpr& b) {
+  friend AddExpr<Derived, ValueExpr<> > operator+(Index a, const BaseExpr& b) {
     return AddExpr<Derived, ValueExpr<> >(b.derived(), a);
   }
-  friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<>> operator-(Index a, const BaseExpr& b) {
+  friend AddExpr<NegateExpr<Derived>, ValueExpr<> > operator-(Index a, const BaseExpr& b) {
     return AddExpr<NegateExpr<Derived>, ValueExpr<> >(-b.derived(), a);
   }
-  friend constexpr ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
+  friend ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
     return ProductExpr<ValueExpr<>, Derived>(a, b.derived());
   }
-  friend constexpr QuotientExpr<ValueExpr<>, Derived> operator/(Index a, const BaseExpr& b) {
+  friend QuotientExpr<ValueExpr<>, Derived> operator/(Index a, const BaseExpr& b) {
     return QuotientExpr<ValueExpr<>, Derived>(a, b.derived());
   }
 
   template <int N>
-  constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>) const {
+  AddExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>> operator-(internal::FixedInt<N>) const {
+  AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >());
   }
   template <int N>
-  constexpr ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator*(internal::FixedInt<N>) const {
+  ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const {
     return ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  constexpr QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator/(internal::FixedInt<N>) const {
+  QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const {
     return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
   }
 
   template <int N>
-  friend constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>,
-                                                                                const BaseExpr& b) {
+  friend AddExpr<Derived, ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) {
     return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>> operator-(internal::FixedInt<N>,
-                                                                                            const BaseExpr& b) {
+  friend AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>,
+                                                                                    const BaseExpr& b) {
     return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > >(-b.derived(),
                                                                             ValueExpr<internal::FixedInt<N> >());
   }
   template <int N>
-  friend constexpr ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator*(internal::FixedInt<N>,
-                                                                                    const BaseExpr& b) {
+  friend ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) {
     return ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
   }
   template <int N>
-  friend constexpr QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator/(internal::FixedInt<N>,
-                                                                                     const BaseExpr& b) {
+  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) {
     return QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
   }
 
   template <typename OtherDerived>
-  constexpr AddExpr<Derived, OtherDerived> operator+(const BaseExpr<OtherDerived>& b) const {
+  AddExpr<Derived, OtherDerived> operator+(const BaseExpr<OtherDerived>& b) const {
     return AddExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 
   template <typename OtherDerived>
-  constexpr AddExpr<Derived, NegateExpr<OtherDerived>> operator-(const BaseExpr<OtherDerived>& b) const {
+  AddExpr<Derived, NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived>& b) const {
     return AddExpr<Derived, NegateExpr<OtherDerived> >(derived(), -b.derived());
   }
 
   template <typename OtherDerived>
-  constexpr ProductExpr<Derived, OtherDerived> operator*(const BaseExpr<OtherDerived>& b) const {
+  ProductExpr<Derived, OtherDerived> operator*(const BaseExpr<OtherDerived>& b) const {
     return ProductExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 
   template <typename OtherDerived>
-  constexpr QuotientExpr<Derived, OtherDerived> operator/(const BaseExpr<OtherDerived>& b) const {
+  QuotientExpr<Derived, OtherDerived> operator/(const BaseExpr<OtherDerived>& b) const {
     return QuotientExpr<Derived, OtherDerived>(derived(), b.derived());
   }
 };
@@ -182,137 +193,21 @@
   enum { value = internal::is_convertible<T, BaseExpr<T> >::value };
 };
 
-// A simple wrapper around an integral value to provide the eval method.
-// We could also use a free-function symbolic_eval...
-template <typename IndexType>
-class ValueExpr : BaseExpr<ValueExpr<IndexType>> {
- public:
-  constexpr ValueExpr() = default;
-  constexpr ValueExpr(IndexType val) : value_(val) {}
-  template <typename... Tags, typename... Types>
-  constexpr IndexType eval_impl(const SymbolValue<Tags, Types>&...) const {
-    return value_;
-  }
-  template <typename... Tags, typename... Types>
-  static constexpr IndexType eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    return IndexType(Undefined);
-  }
-
- protected:
-  IndexType value_;
-};
-
-// Specialization for compile-time value,
-// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
-template <int N>
-class ValueExpr<internal::FixedInt<N>> : public BaseExpr<ValueExpr<internal::FixedInt<N>>> {
- public:
-  constexpr ValueExpr() = default;
-  constexpr ValueExpr(internal::FixedInt<N>) {}
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
-    return Index(N);
-  }
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    return Index(N);
-  }
-};
-
 /** Represents the actual value of a symbol identified by its tag
  *
  * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used.
  */
-template <typename Tag, typename Type>
-class SymbolValue : public BaseExpr<SymbolValue<Tag, Type>> {};
-
 template <typename Tag>
-class SymbolValue<Tag, Index> : public BaseExpr<SymbolValue<Tag, Index>> {
+class SymbolValue {
  public:
-  constexpr SymbolValue() = default;
-
   /** Default constructor from the value \a val */
-  constexpr SymbolValue(Index val) : value_(val) {}
+  SymbolValue(Index val) : m_value(val) {}
 
   /** \returns the stored value of the symbol */
-  constexpr Index value() const { return value_; }
-
-  /** \returns the stored value of the symbol at compile time, or Undefined if not known. */
-  static constexpr Index value_at_compile_time() { return Index(Undefined); }
-
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
-    return value();
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    return value_at_compile_time();
-  }
+  Index value() const { return m_value; }
 
  protected:
-  Index value_;
-};
-
-template <typename Tag, int N>
-class SymbolValue<Tag, internal::FixedInt<N>> : public BaseExpr<SymbolValue<Tag, internal::FixedInt<N>>> {
- public:
-  constexpr SymbolValue() = default;
-
-  /** Default constructor from the value \a val */
-  constexpr SymbolValue(internal::FixedInt<N>){};
-
-  /** \returns the stored value of the symbol */
-  constexpr Index value() const { return static_cast<Index>(N); }
-
-  /** \returns the stored value of the symbol at compile time, or Undefined if not known. */
-  static constexpr Index value_at_compile_time() { return static_cast<Index>(N); }
-
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&...) const {
-    return value();
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    return value_at_compile_time();
-  }
-};
-
-// Find and return a symbol value based on the tag.
-template <typename Tag, typename... Types>
-struct EvalSymbolValueHelper;
-
-// Empty base case, symbol not found.
-template <typename Tag>
-struct EvalSymbolValueHelper<Tag> {
-  static constexpr Index eval_impl() {
-    eigen_assert(false && "Symbol not found.");
-    return Index(Undefined);
-  }
-  static constexpr Index eval_at_compile_time_impl() { return Index(Undefined); }
-};
-
-// We found a symbol value matching the provided Tag!
-template <typename Tag, typename Type, typename... OtherTypes>
-struct EvalSymbolValueHelper<Tag, SymbolValue<Tag, Type>, OtherTypes...> {
-  static constexpr Index eval_impl(const SymbolValue<Tag, Type>& symbol, const OtherTypes&...) {
-    return symbol.value();
-  }
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tag, Type>& symbol, const OtherTypes&...) {
-    return symbol.value_at_compile_time();
-  }
-};
-
-// No symbol value in first value, recursive search starting with next.
-template <typename Tag, typename T1, typename... OtherTypes>
-struct EvalSymbolValueHelper<Tag, T1, OtherTypes...> {
-  static constexpr Index eval_impl(const T1&, const OtherTypes&... values) {
-    return EvalSymbolValueHelper<Tag, OtherTypes...>::eval_impl(values...);
-  }
-  static constexpr Index eval_at_compile_time_impl(const T1&, const OtherTypes&...) {
-    return EvalSymbolValueHelper<Tag, OtherTypes...>::eval_at_compile_time_impl(OtherTypes{}...);
-  }
+  Index m_value;
 };
 
 /** Expression of a symbol uniquely identified by the template parameter type \c tag */
@@ -322,47 +217,32 @@
   /** Alias to the template parameter \c tag */
   typedef tag Tag;
 
-  constexpr SymbolExpr() = default;
+  SymbolExpr() {}
 
   /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag.
    *
    * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified
    * runtime-time value.
    */
-  constexpr SymbolValue<Tag, Index> operator=(Index val) const { return SymbolValue<Tag, Index>(val); }
+  SymbolValue<Tag> operator=(Index val) const { return SymbolValue<Tag>(val); }
 
-  template <int N>
-  constexpr SymbolValue<Tag, internal::FixedInt<N>> operator=(internal::FixedInt<N>) const {
-    return SymbolValue<Tag, internal::FixedInt<N>>{internal::FixedInt<N>{}};
-  }
+  Index eval_impl(const SymbolValue<Tag>& values) const { return values.value(); }
 
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
-    return EvalSymbolValueHelper<Tag, SymbolValue<Tags, Types>...>::eval_impl(values...);
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    return EvalSymbolValueHelper<Tag, SymbolValue<Tags, Types>...>::eval_at_compile_time_impl(
-        SymbolValue<Tags, Types>{}...);
+  // C++14 versions suitable for multiple symbols
+  template <typename... Types>
+  Index eval_impl(const std::tuple<Types...>& values) const {
+    return std::get<SymbolValue<Tag> >(values).value();
   }
 };
 
 template <typename Arg0>
 class NegateExpr : public BaseExpr<NegateExpr<Arg0> > {
  public:
-  constexpr NegateExpr() = default;
-  constexpr NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
+  NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
 
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
-    return -m_arg0.eval_impl(values...);
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    constexpr Index v = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    return (v == Undefined) ? Undefined : -v;
+  template <typename T>
+  Index eval_impl(const T& values) const {
+    return -m_arg0.eval_impl(values);
   }
 
  protected:
@@ -372,19 +252,11 @@
 template <typename Arg0, typename Arg1>
 class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1> > {
  public:
-  constexpr AddExpr() = default;
-  constexpr AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
-    return m_arg0.eval_impl(values...) + m_arg1.eval_impl(values...);
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 + v1;
+  template <typename T>
+  Index eval_impl(const T& values) const {
+    return m_arg0.eval_impl(values) + m_arg1.eval_impl(values);
   }
 
  protected:
@@ -395,19 +267,11 @@
 template <typename Arg0, typename Arg1>
 class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1> > {
  public:
-  constexpr ProductExpr() = default;
-  constexpr ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
-    return m_arg0.eval_impl(values...) * m_arg1.eval_impl(values...);
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 * v1;
+  template <typename T>
+  Index eval_impl(const T& values) const {
+    return m_arg0.eval_impl(values) * m_arg1.eval_impl(values);
   }
 
  protected:
@@ -418,19 +282,11 @@
 template <typename Arg0, typename Arg1>
 class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1> > {
  public:
-  constexpr QuotientExpr() = default;
-  constexpr QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+  QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
 
-  template <typename... Tags, typename... Types>
-  constexpr Index eval_impl(const SymbolValue<Tags, Types>&... values) const {
-    return m_arg0.eval_impl(values...) / m_arg1.eval_impl(values...);
-  }
-
-  template <typename... Tags, typename... Types>
-  static constexpr Index eval_at_compile_time_impl(const SymbolValue<Tags, Types>&...) {
-    constexpr Index v0 = Arg0::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    constexpr Index v1 = Arg1::eval_at_compile_time_impl(SymbolValue<Tags, Types>{}...);
-    return (v0 == Undefined || v1 == Undefined) ? Undefined : v0 / v1;
+  template <typename T>
+  Index eval_impl(const T& values) const {
+    return m_arg0.eval_impl(values) / m_arg1.eval_impl(values);
   }
 
  protected:
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 555faa1..5b7bdc0 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -484,7 +484,7 @@
                                       //      solution could be to count the number of temps?
     NAsInteger = n == Dynamic ? HugeCost : n,
     CostEval = (NAsInteger + 1) * ScalarReadCost + CoeffReadCost,
-    CostNoEval = int(NAsInteger) * int(CoeffReadCost),
+    CostNoEval = NAsInteger * CoeffReadCost,
     Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
   };
 
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 5cef658..970500c 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -408,29 +408,28 @@
   shiftInfo.coeffRef(1) = m_matT.coeff(iu - 1, iu - 1);
   shiftInfo.coeffRef(2) = m_matT.coeff(iu, iu - 1) * m_matT.coeff(iu - 1, iu);
 
-  // Alternate exceptional shifting strategy every 16 iterations.
-  if (iter % 16 == 0) {
-    // Wilkinson's original ad hoc shift
-    if (iter % 32 != 0) {
-      exshift += shiftInfo.coeff(0);
-      for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0);
-      Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2));
-      shiftInfo.coeffRef(0) = Scalar(0.75) * s;
-      shiftInfo.coeffRef(1) = Scalar(0.75) * s;
-      shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
-    } else {
-      // MATLAB's new ad hoc shift
-      Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
-      s = s * s + shiftInfo.coeff(2);
-      if (s > Scalar(0)) {
-        s = sqrt(s);
-        if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s;
-        s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
-        s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
-        exshift += s;
-        for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s;
-        shiftInfo.setConstant(Scalar(0.964));
-      }
+  // Wilkinson's original ad hoc shift
+  if (iter == 10) {
+    exshift += shiftInfo.coeff(0);
+    for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= shiftInfo.coeff(0);
+    Scalar s = abs(m_matT.coeff(iu, iu - 1)) + abs(m_matT.coeff(iu - 1, iu - 2));
+    shiftInfo.coeffRef(0) = Scalar(0.75) * s;
+    shiftInfo.coeffRef(1) = Scalar(0.75) * s;
+    shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
+  }
+
+  // MATLAB's new ad hoc shift
+  if (iter == 30) {
+    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+    s = s * s + shiftInfo.coeff(2);
+    if (s > Scalar(0)) {
+      s = sqrt(s);
+      if (shiftInfo.coeff(1) < shiftInfo.coeff(0)) s = -s;
+      s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+      s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
+      exshift += s;
+      for (Index i = 0; i <= iu; ++i) m_matT.coeffRef(i, i) -= s;
+      shiftInfo.setConstant(Scalar(0.964));
     }
   }
 }
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index e97a8f2..a4e76d9 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -173,7 +173,7 @@
   }
 
   /** \returns the volume of the bounding box */
-  EIGEN_DEVICE_FUNC inline Scalar volume() const { return isEmpty() ? Scalar(0) : sizes().prod(); }
+  EIGEN_DEVICE_FUNC inline Scalar volume() const { return sizes().prod(); }
 
   /** \returns an expression for the bounding box diagonal vector
    * if the length of the diagonal is needed: diagonal().norm()
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index a8e0502..34399a7 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -98,7 +98,7 @@
 namespace internal {
 
 template <int Arch, typename VectorLhs, typename VectorRhs, typename Scalar = typename VectorLhs::Scalar,
-          bool Vectorizable = bool((evaluator<VectorLhs>::Flags & evaluator<VectorRhs>::Flags) & PacketAccessBit)>
+          bool Vectorizable = bool((VectorLhs::Flags & VectorRhs::Flags) & PacketAccessBit)>
 struct cross3_impl {
   EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs,
                                                                                             const VectorRhs& rhs) {
diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h
index e8b210e..ce3cfea 100644
--- a/Eigen/src/Geometry/arch/Geometry_SIMD.h
+++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h
@@ -62,19 +62,16 @@
 
 template <typename VectorLhs, typename VectorRhs>
 struct cross3_impl<Architecture::Target, VectorLhs, VectorRhs, float, true> {
-  using DstPlainType = typename plain_matrix_type<VectorLhs>::type;
-  static constexpr int DstAlignment = evaluator<DstPlainType>::Alignment;
-  static constexpr int LhsAlignment = evaluator<VectorLhs>::Alignment;
-  static constexpr int RhsAlignment = evaluator<VectorRhs>::Alignment;
-  static inline DstPlainType run(const VectorLhs& lhs, const VectorRhs& rhs) {
+  enum { ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment };
+  static inline typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) {
     evaluator<VectorLhs> lhs_eval(lhs);
     evaluator<VectorRhs> rhs_eval(rhs);
-    Packet4f a = lhs_eval.template packet<LhsAlignment, Packet4f>(0);
-    Packet4f b = rhs_eval.template packet<RhsAlignment, Packet4f>(0);
+    Packet4f a = lhs_eval.template packet<traits<VectorLhs>::Alignment, Packet4f>(0);
+    Packet4f b = rhs_eval.template packet<traits<VectorRhs>::Alignment, Packet4f>(0);
     Packet4f mul1 = pmul(vec4f_swizzle1(a, 1, 2, 0, 3), vec4f_swizzle1(b, 2, 0, 1, 3));
     Packet4f mul2 = pmul(vec4f_swizzle1(a, 2, 0, 1, 3), vec4f_swizzle1(b, 1, 2, 0, 3));
-    DstPlainType res;
-    pstoret<float, Packet4f, DstAlignment>(&res.x(), psub(mul1, mul2));
+    typename plain_matrix_type<VectorLhs>::type res;
+    pstoret<float, Packet4f, ResAlignment>(&res.x(), psub(mul1, mul2));
     return res;
   }
 };
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
index a97b905..14ae6ea 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h
@@ -32,19 +32,17 @@
  *
  * \implsparsesolverconcept
  *
- * It performs the following incomplete factorization: \f$ S P A P' S + \sigma I \approx L L' \f$
- * where L is a lower triangular factor, S is a diagonal scaling matrix, P is a
- * fill-in reducing permutation as computed by the ordering method, and \f$ \sigma \f$ is a shift
- * for ensuring the decomposed matrix is positive definite.
+ * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$
+ * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a
+ * fill-in reducing permutation as computed by the ordering method.
  *
  * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$  be the scaled matrix on which the factorization is carried out,
  * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly
- * performed on the matrix B, and \sigma = 0. Otherwise, the factorization is performed on the shifted matrix \f$ B +
- * \sigma I \f$ for a shifting factor  \f$ \sigma \f$.  We start with \f$ \sigma = \sigma_0 - \beta \f$, where \f$
- * \sigma_0 \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$
- * \sigma_0 = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a maximum of ten
- * attempts. If it still fails, as returned by the info() method, then you can either increase the initial shift, or
- * better use another preconditioning technique.
+ * performed on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I
+ * \f$ where \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default
+ * value is \f$ \sigma = 10^{-3} \f$. If the factorization fails, then the shift in doubled until it succeed or a
+ * maximum of ten attempts. If it still fails, as returned by the info() method, then you can either increase the
+ * initial shift, or better use another preconditioning technique.
  *
  */
 template <typename Scalar, int UpLo_ = Lower, typename OrderingType_ = AMDOrdering<int> >
@@ -178,9 +176,6 @@
     return m_perm;
   }
 
-  /** \returns the final shift parameter from the computation */
-  RealScalar shift() const { return m_shift; }
-
  protected:
   FactorType m_L;             // The lower part stored in CSC
   VectorRx m_scale;           // The vector for scaling the matrix
@@ -189,7 +184,6 @@
   bool m_factorizationIsOk;
   ComputationInfo m_info;
   PermutationType m_perm;
-  RealScalar m_shift;  // The final shift parameter.
 
  private:
   inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col,
@@ -220,20 +214,6 @@
     m_L.template selfadjointView<Lower>() = mat.template selfadjointView<UpLo_>();
   }
 
-  // The algorithm will insert increasingly large shifts on the diagonal until
-  // factorization succeeds. Therefore we have to make sure that there is a
-  // space in the datastructure to store such values, even if the original
-  // matrix has a zero on the diagonal.
-  bool modified = false;
-  for (Index i = 0; i < mat.cols(); ++i) {
-    bool inserted = false;
-    m_L.findOrInsertCoeff(i, i, &inserted);
-    if (inserted) {
-      modified = true;
-    }
-  }
-  if (modified) m_L.makeCompressed();
-
   Index n = m_L.cols();
   Index nnz = m_L.nonZeros();
   Map<VectorSx> vals(m_L.valuePtr(), nnz);           // values
@@ -277,8 +257,8 @@
 
   FactorType L_save = m_L;
 
-  m_shift = RealScalar(0);
-  if (mindiag <= RealScalar(0.)) m_shift = m_initialShift - mindiag;
+  RealScalar shift = 0;
+  if (mindiag <= RealScalar(0.)) shift = m_initialShift - mindiag;
 
   m_info = NumericalIssue;
 
@@ -286,7 +266,7 @@
   int iter = 0;
   do {
     // Apply the shift to the diagonal elements of the matrix
-    for (Index j = 0; j < n; j++) vals[colPtr[j]] += m_shift;
+    for (Index j = 0; j < n; j++) vals[colPtr[j]] += shift;
 
     // jki version of the Cholesky factorization
     Index j = 0;
@@ -330,7 +310,7 @@
         if (++iter >= 10) return;
 
         // increase shift
-        m_shift = numext::maxi(m_initialShift, RealScalar(2) * m_shift);
+        shift = numext::maxi(m_initialShift, RealScalar(2) * shift);
         // restore m_L, col_pattern, and listCol
         vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
         rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 2686a52..f53b8ec 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -265,7 +265,7 @@
   internal::apply_rotation_in_the_plane(x, y, j);
 }
 
-/** \jacobi_module
+/** \ingroup Jacobi_Module
  * Applies the rotation in the plane \a j to the columns \a p and \a q of \c *this, i.e., it computes B = B * J
  * with \f$ B = \left ( \begin{array}{cc} \text{*this.col}(p) & \text{*this.col}(q) \end{array} \right ) \f$.
  *
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 092c29d..f1de6fd 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -238,20 +238,6 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
-  /** \returns the sign of the determinant of the matrix of which
-   * *this is the QR decomposition. It has only linear complexity
-   * (that is, O(n) where n is the dimension of the square matrix)
-   * as the QR decomposition has already been computed.
-   *
-   * \note This is only for square matrices.
-   *
-   * \note This method is useful to work around the risk of overflow/underflow that's inherent
-   * to determinant computation.
-   *
-   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
-   */
-  typename MatrixType::Scalar signDeterminant() const;
-
   /** \returns the rank of the matrix of which *this is the QR decomposition.
    *
    * \note This method has to determine which pivots should be considered nonzero.
@@ -442,7 +428,7 @@
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
   Scalar detQ;
   internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0);
+  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
 }
 
 template <typename MatrixType, typename PermutationIndex>
@@ -450,23 +436,14 @@
   using std::abs;
   eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return isInjective() ? abs(m_qr.diagonal().prod()) : RealScalar(0);
+  return abs(m_qr.diagonal().prod());
 }
 
 template <typename MatrixType, typename PermutationIndex>
 typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType, PermutationIndex>::logAbsDeterminant() const {
   eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits<RealScalar>::infinity();
-}
-
-template <typename MatrixType, typename PermutationIndex>
-typename MatrixType::Scalar ColPivHouseholderQR<MatrixType, PermutationIndex>::signDeterminant() const {
-  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  Scalar detQ;
-  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0);
+  return m_qr.diagonal().cwiseAbs().array().log().sum();
 }
 
 /** Performs the QR factorization of the given matrix \a matrix. The result of
diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index 960ccb1..8566e96 100644
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -228,21 +228,6 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
-  /** \returns the sign of the determinant of the
-   * matrix of which *this is the complete orthogonal decomposition. It has
-   * only linear complexity (that is, O(n) where n is the dimension of the
-   * square matrix) as the complete orthogonal decomposition has already been
-   * computed.
-   *
-   * \note This is only for square matrices.
-   *
-   * \note This method is useful to work around the risk of overflow/underflow
-   * that's inherent to determinant computation.
-   *
-   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
-   */
-  typename MatrixType::Scalar signDeterminant() const;
-
   /** \returns the rank of the matrix of which *this is the complete orthogonal
    * decomposition.
    *
@@ -439,11 +424,6 @@
   return m_cpqr.logAbsDeterminant();
 }
 
-template <typename MatrixType, typename PermutationIndex>
-typename MatrixType::Scalar CompleteOrthogonalDecomposition<MatrixType, PermutationIndex>::signDeterminant() const {
-  return m_cpqr.signDeterminant();
-}
-
 /** Performs the complete orthogonal decomposition of the given matrix \a
  * matrix. The result of the factorization is stored into \c *this, and a
  * reference to \c *this is returned.
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index cae9ae4..d93a5d1 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -248,20 +248,6 @@
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
-  /** \returns the sign of the determinant of the matrix of which
-   * *this is the QR decomposition. It has only linear complexity
-   * (that is, O(n) where n is the dimension of the square matrix)
-   * as the QR decomposition has already been computed.
-   *
-   * \note This is only for square matrices.
-   *
-   * \note This method is useful to work around the risk of overflow/underflow that's inherent
-   * to determinant computation.
-   *
-   * \sa determinant(), absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
-   */
-  typename MatrixType::Scalar signDeterminant() const;
-
   /** \returns the rank of the matrix of which *this is the QR decomposition.
    *
    * \note This method has to determine which pivots should be considered nonzero.
@@ -439,7 +425,7 @@
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
   Scalar detQ;
   internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0);
+  return m_qr.diagonal().prod() * detQ * Scalar(m_det_p);
 }
 
 template <typename MatrixType, typename PermutationIndex>
@@ -447,23 +433,14 @@
   using std::abs;
   eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return isInjective() ? abs(m_qr.diagonal().prod()) : RealScalar(0);
+  return abs(m_qr.diagonal().prod());
 }
 
 template <typename MatrixType, typename PermutationIndex>
 typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType, PermutationIndex>::logAbsDeterminant() const {
   eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
   eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits<RealScalar>::infinity();
-}
-
-template <typename MatrixType, typename PermutationIndex>
-typename MatrixType::Scalar FullPivHouseholderQR<MatrixType, PermutationIndex>::signDeterminant() const {
-  eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
-  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  Scalar detQ;
-  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0);
+  return m_qr.diagonal().cwiseAbs().array().log().sum();
 }
 
 /** Performs the QR factorization of the given matrix \a matrix. The result of
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index e297372..9e73672 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -187,8 +187,6 @@
    * \warning a determinant can be very big or small, so for matrices
    * of large enough dimension, there is a risk of overflow/underflow.
    * One way to work around that is to use logAbsDeterminant() instead.
-   * Also, do not rely on the determinant being exactly zero for testing
-   * singularity or rank-deficiency.
    *
    * \sa absDeterminant(), logAbsDeterminant(), MatrixBase::determinant()
    */
@@ -204,8 +202,6 @@
    * \warning a determinant can be very big or small, so for matrices
    * of large enough dimension, there is a risk of overflow/underflow.
    * One way to work around that is to use logAbsDeterminant() instead.
-   * Also, do not rely on the determinant being exactly zero for testing
-   * singularity or rank-deficiency.
    *
    * \sa determinant(), logAbsDeterminant(), MatrixBase::determinant()
    */
@@ -221,30 +217,10 @@
    * \note This method is useful to work around the risk of overflow/underflow that's inherent
    * to determinant computation.
    *
-   * \warning Do not rely on the determinant being exactly zero for testing
-   * singularity or rank-deficiency.
-   *
    * \sa determinant(), absDeterminant(), MatrixBase::determinant()
    */
   typename MatrixType::RealScalar logAbsDeterminant() const;
 
-  /** \returns the sign of the determinant of the matrix of which
-   * *this is the QR decomposition. It has only linear complexity
-   * (that is, O(n) where n is the dimension of the square matrix)
-   * as the QR decomposition has already been computed.
-   *
-   * \note This is only for square matrices.
-   *
-   * \note This method is useful to work around the risk of overflow/underflow that's inherent
-   * to determinant computation.
-   *
-   * \warning Do not rely on the determinant being exactly zero for testing
-   * singularity or rank-deficiency.
-   *
-   * \sa determinant(), absDeterminant(), MatrixBase::determinant()
-   */
-  typename MatrixType::Scalar signDeterminant() const;
-
   inline Index rows() const { return m_qr.rows(); }
   inline Index cols() const { return m_qr.cols(); }
 
@@ -330,15 +306,6 @@
   return m_qr.diagonal().cwiseAbs().array().log().sum();
 }
 
-template <typename MatrixType>
-typename MatrixType::Scalar HouseholderQR<MatrixType>::signDeterminant() const {
-  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
-  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
-  Scalar detQ;
-  internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
-  return detQ * m_qr.diagonal().array().sign().prod();
-}
-
 namespace internal {
 
 /** \internal */
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 086d750..cb41123 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -52,10 +52,7 @@
 class qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, Case, false> {
  public:
   void allocate(const JacobiSVD<MatrixType, Options>&) {}
-  template <typename Xpr>
-  bool run(JacobiSVD<MatrixType, Options>&, const Xpr&) {
-    return false;
-  }
+  bool run(JacobiSVD<MatrixType, Options>&, const MatrixType&) { return false; }
 };
 
 /*** preconditioner using FullPivHouseholderQR ***/
@@ -78,8 +75,8 @@
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
   }
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -120,12 +117,14 @@
       internal::destroy_at(&m_qr);
       internal::construct_at(&m_qr, svd.cols(), svd.rows());
     }
+    m_adjoint.resize(svd.cols(), svd.rows());
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
   }
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_qr.compute(matrix.adjoint());
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
       if (svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace);
@@ -138,6 +137,7 @@
  private:
   typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
   typename plain_row_type<MatrixType>::type m_workspace;
 };
 
@@ -167,8 +167,8 @@
     else if (svd.m_computeThinU)
       m_workspace.resize(svd.cols());
   }
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -222,11 +222,13 @@
       m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV)
       m_workspace.resize(svd.rows());
+    m_adjoint.resize(svd.cols(), svd.rows());
   }
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_qr.compute(matrix.adjoint());
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
 
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
@@ -245,6 +247,7 @@
  private:
   typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
   WorkspaceType m_workspace;
 };
 
@@ -273,8 +276,8 @@
     else if (svd.m_computeThinU)
       m_workspace.resize(svd.cols());
   }
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.rows() > matrix.cols()) {
       m_qr.compute(matrix);
       svd.m_workMatrix = m_qr.matrixQR().block(0, 0, matrix.cols(), matrix.cols()).template triangularView<Upper>();
@@ -327,12 +330,13 @@
       m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV)
       m_workspace.resize(svd.rows());
+    m_adjoint.resize(svd.cols(), svd.rows());
   }
 
-  template <typename Xpr>
-  bool run(SVDType& svd, const Xpr& matrix) {
+  bool run(SVDType& svd, const MatrixType& matrix) {
     if (matrix.cols() > matrix.rows()) {
-      m_qr.compute(matrix.adjoint());
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
 
       svd.m_workMatrix =
           m_qr.matrixQR().block(0, 0, matrix.rows(), matrix.rows()).template triangularView<Upper>().adjoint();
@@ -351,6 +355,7 @@
  private:
   typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
   QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
   WorkspaceType m_workspace;
 };
 
@@ -504,6 +509,7 @@
   typedef MatrixType_ MatrixType;
   typedef typename Base::Scalar Scalar;
   typedef typename Base::RealScalar RealScalar;
+  typedef typename Base::Index Index;
   enum : int {
     Options = Options_,
     QRPreconditioner = internal::get_qr_preconditioner(Options),
@@ -612,18 +618,7 @@
   using Base::rows;
 
  private:
-  void allocate(Index rows_, Index cols_, unsigned int computationOptions) {
-    if (Base::allocate(rows_, cols_, computationOptions)) return;
-    eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
-                 !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
-                 "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
-                 "Use the ColPivHouseholderQR preconditioner instead.");
-
-    m_workMatrix.resize(diagSize(), diagSize());
-    if (cols() > rows()) m_qr_precond_morecols.allocate(*this);
-    if (rows() > cols()) m_qr_precond_morerows.allocate(*this);
-  }
-
+  void allocate(Index rows, Index cols, unsigned int computationOptions);
   JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
 
  protected:
@@ -659,9 +654,25 @@
   internal::qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols>
       m_qr_precond_morerows;
   WorkMatrixType m_workMatrix;
+  MatrixType m_scaledMatrix;
 };
 
 template <typename MatrixType, int Options>
+void JacobiSVD<MatrixType, Options>::allocate(Index rows_, Index cols_, unsigned int computationOptions_) {
+  if (Base::allocate(rows_, cols_, computationOptions_)) return;
+
+  eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+               !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) &&
+               "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
+               "Use the ColPivHouseholderQR preconditioner instead.");
+
+  m_workMatrix.resize(diagSize(), diagSize());
+  if (cols() > rows()) m_qr_precond_morecols.allocate(*this);
+  if (rows() > cols()) m_qr_precond_morerows.allocate(*this);
+  if (rows() != cols()) m_scaledMatrix.resize(rows(), cols());
+}
+
+template <typename MatrixType, int Options>
 JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
                                                                              unsigned int computationOptions) {
   using std::abs;
@@ -688,8 +699,9 @@
   /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
 
   if (rows() != cols()) {
-    m_qr_precond_morecols.run(*this, matrix / scale);
-    m_qr_precond_morerows.run(*this, matrix / scale);
+    m_scaledMatrix = matrix / scale;
+    m_qr_precond_morecols.run(*this, m_scaledMatrix);
+    m_qr_precond_morerows.run(*this, m_scaledMatrix);
   } else {
     m_workMatrix =
         matrix.template topLeftCorner<DiagSizeAtCompileTime, DiagSizeAtCompileTime>(diagSize(), diagSize()) / scale;
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index d1ad63d..5f04647 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -125,6 +125,7 @@
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   typedef typename Eigen::internal::traits<SVDBase>::StorageIndex StorageIndex;
+  typedef Eigen::Index Index;  ///< \deprecated since Eigen 3.3
 
   static constexpr bool ShouldComputeFullU = internal::traits<Derived>::ShouldComputeFullU;
   static constexpr bool ShouldComputeThinU = internal::traits<Derived>::ShouldComputeThinU;
@@ -354,11 +355,11 @@
         m_isInitialized(false),
         m_isAllocated(false),
         m_usePrescribedThreshold(false),
-        m_computeFullU(ShouldComputeFullU),
-        m_computeThinU(ShouldComputeThinU),
-        m_computeFullV(ShouldComputeFullV),
-        m_computeThinV(ShouldComputeThinV),
-        m_computationOptions(internal::traits<Derived>::Options),
+        m_computeFullU(false),
+        m_computeThinU(false),
+        m_computeFullV(false),
+        m_computeThinV(false),
+        m_computationOptions(0),
         m_nonzeroSingularValues(0),
         m_rows(RowsAtCompileTime),
         m_cols(ColsAtCompileTime),
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index f3ce975..423287b 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -58,7 +58,6 @@
   enum { UpLo = internal::traits<Derived>::UpLo };
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename internal::traits<Derived>::DiagonalScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef CholMatrixType const* ConstCholMatrixPtr;
@@ -115,7 +114,7 @@
    *
    * \returns a reference to \c *this.
    */
-  Derived& setShift(const DiagonalScalar& offset, const DiagonalScalar& scale = 1) {
+  Derived& setShift(const RealScalar& offset, const RealScalar& scale = 1) {
     m_shiftOffset = offset;
     m_shiftScale = scale;
     return derived();
@@ -179,18 +178,18 @@
 
  protected:
   /** Computes the sparse Cholesky decomposition of \a matrix */
-  template <bool DoLDLT, bool NonHermitian>
+  template <bool DoLDLT>
   void compute(const MatrixType& matrix) {
     eigen_assert(matrix.rows() == matrix.cols());
     Index size = matrix.cols();
     CholMatrixType tmp(size, size);
     ConstCholMatrixPtr pmat;
-    ordering<NonHermitian>(matrix, pmat, tmp);
+    ordering(matrix, pmat, tmp);
     analyzePattern_preordered(*pmat, DoLDLT);
-    factorize_preordered<DoLDLT, NonHermitian>(*pmat);
+    factorize_preordered<DoLDLT>(*pmat);
   }
 
-  template <bool DoLDLT, bool NonHermitian>
+  template <bool DoLDLT>
   void factorize(const MatrixType& a) {
     eigen_assert(a.rows() == a.cols());
     Index size = a.cols();
@@ -201,33 +200,28 @@
       // If there is no ordering, try to directly use the input matrix without any copy
       internal::simplicial_cholesky_grab_input<CholMatrixType, MatrixType>::run(a, pmat, tmp);
     } else {
-      internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, tmp, m_P.indices().data());
+      tmp.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
       pmat = &tmp;
     }
 
-    factorize_preordered<DoLDLT, NonHermitian>(*pmat);
+    factorize_preordered<DoLDLT>(*pmat);
   }
 
-  template <bool DoLDLT, bool NonHermitian>
+  template <bool DoLDLT>
   void factorize_preordered(const CholMatrixType& a);
 
-  template <bool DoLDLT, bool NonHermitian>
-  void analyzePattern(const MatrixType& a) {
+  void analyzePattern(const MatrixType& a, bool doLDLT) {
     eigen_assert(a.rows() == a.cols());
     Index size = a.cols();
     CholMatrixType tmp(size, size);
     ConstCholMatrixPtr pmat;
-    ordering<NonHermitian>(a, pmat, tmp);
-    analyzePattern_preordered(*pmat, DoLDLT);
+    ordering(a, pmat, tmp);
+    analyzePattern_preordered(*pmat, doLDLT);
   }
   void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT);
 
-  template <bool NonHermitian>
   void ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap);
 
-  inline DiagonalScalar getDiag(Scalar x) { return internal::traits<Derived>::getDiag(x); }
-  inline Scalar getSymm(Scalar x) { return internal::traits<Derived>::getSymm(x); }
-
   /** keeps off-diagonal entries; drops diagonal entries */
   struct keep_diag {
     inline bool operator()(const Index& row, const Index& col, const Scalar&) const { return row != col; }
@@ -244,8 +238,8 @@
   PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P;     // the permutation
   PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv;  // the inverse permutation
 
-  DiagonalScalar m_shiftOffset;
-  DiagonalScalar m_shiftScale;
+  RealScalar m_shiftOffset;
+  RealScalar m_shiftScale;
 };
 
 template <typename MatrixType_, int UpLo_ = Lower,
@@ -256,12 +250,6 @@
 class SimplicialLDLT;
 template <typename MatrixType_, int UpLo_ = Lower,
           typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
-class SimplicialNonHermitianLLT;
-template <typename MatrixType_, int UpLo_ = Lower,
-          typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
-class SimplicialNonHermitianLDLT;
-template <typename MatrixType_, int UpLo_ = Lower,
-          typename Ordering_ = AMDOrdering<typename MatrixType_::StorageIndex> >
 class SimplicialCholesky;
 
 namespace internal {
@@ -272,15 +260,12 @@
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
   typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU;
   static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
-  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
-  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
 };
 
 template <typename MatrixType_, int UpLo_, typename Ordering_>
@@ -289,49 +274,12 @@
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar DiagonalScalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
   typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
   static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
   static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
-  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
-  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
-};
-
-template <typename MatrixType_, int UpLo_, typename Ordering_>
-struct traits<SimplicialNonHermitianLLT<MatrixType_, UpLo_, Ordering_> > {
-  typedef MatrixType_ MatrixType;
-  typedef Ordering_ OrderingType;
-  enum { UpLo = UpLo_ };
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Scalar DiagonalScalar;
-  typedef typename MatrixType::StorageIndex StorageIndex;
-  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
-  typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
-  typedef TriangularView<const typename CholMatrixType::ConstTransposeReturnType, Eigen::Upper> MatrixU;
-  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); }
-  static inline DiagonalScalar getDiag(Scalar x) { return x; }
-  static inline Scalar getSymm(Scalar x) { return x; }
-};
-
-template <typename MatrixType_, int UpLo_, typename Ordering_>
-struct traits<SimplicialNonHermitianLDLT<MatrixType_, UpLo_, Ordering_> > {
-  typedef MatrixType_ MatrixType;
-  typedef Ordering_ OrderingType;
-  enum { UpLo = UpLo_ };
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Scalar DiagonalScalar;
-  typedef typename MatrixType::StorageIndex StorageIndex;
-  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
-  typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
-  typedef TriangularView<const typename CholMatrixType::ConstTransposeReturnType, Eigen::UnitUpper> MatrixU;
-  static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.transpose()); }
-  static inline DiagonalScalar getDiag(Scalar x) { return x; }
-  static inline Scalar getSymm(Scalar x) { return x; }
 };
 
 template <typename MatrixType_, int UpLo_, typename Ordering_>
@@ -339,10 +287,6 @@
   typedef MatrixType_ MatrixType;
   typedef Ordering_ OrderingType;
   enum { UpLo = UpLo_ };
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar DiagonalScalar;
-  static inline DiagonalScalar getDiag(Scalar x) { return numext::real(x); }
-  static inline Scalar getSymm(Scalar x) { return numext::conj(x); }
 };
 
 }  // namespace internal
@@ -402,7 +346,7 @@
 
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialLLT& compute(const MatrixType& matrix) {
-    Base::template compute<false, false>(matrix);
+    Base::template compute<false>(matrix);
     return *this;
   }
 
@@ -412,7 +356,7 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<false, false>(a); }
+  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, false); }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -420,7 +364,7 @@
    *
    * \sa analyzePattern()
    */
-  void factorize(const MatrixType& a) { Base::template factorize<false, false>(a); }
+  void factorize(const MatrixType& a) { Base::template factorize<false>(a); }
 
   /** \returns the determinant of the underlying matrix from the current factorization */
   Scalar determinant() const {
@@ -490,7 +434,7 @@
 
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialLDLT& compute(const MatrixType& matrix) {
-    Base::template compute<true, false>(matrix);
+    Base::template compute<true>(matrix);
     return *this;
   }
 
@@ -500,7 +444,7 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<true, false>(a); }
+  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, true); }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -508,177 +452,7 @@
    *
    * \sa analyzePattern()
    */
-  void factorize(const MatrixType& a) { Base::template factorize<true, false>(a); }
-
-  /** \returns the determinant of the underlying matrix from the current factorization */
-  Scalar determinant() const { return Base::m_diag.prod(); }
-};
-
-/** \ingroup SparseCholesky_Module
- * \class SimplicialNonHermitianLLT
- * \brief A direct sparse LLT Cholesky factorizations, for symmetric non-hermitian matrices.
- *
- * This class provides a LL^T Cholesky factorizations of sparse matrices that are
- * symmetric but not hermitian. For real matrices, this is equivalent to the regular LLT factorization.
- * The factorization allows for solving A.X = B where X and B can be either dense or sparse.
- *
- * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
- * such that the factorized matrix is P A P^-1.
- *
- * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
- * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
- *               or Upper. Default is Lower.
- * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
- *
- * \implsparsesolverconcept
- *
- * \sa class SimplicialNonHermitianLDLT, SimplicialLLT, class AMDOrdering, class NaturalOrdering
- */
-template <typename MatrixType_, int UpLo_, typename Ordering_>
-class SimplicialNonHermitianLLT
-    : public SimplicialCholeskyBase<SimplicialNonHermitianLLT<MatrixType_, UpLo_, Ordering_> > {
- public:
-  typedef MatrixType_ MatrixType;
-  enum { UpLo = UpLo_ };
-  typedef SimplicialCholeskyBase<SimplicialNonHermitianLLT> Base;
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename MatrixType::StorageIndex StorageIndex;
-  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
-  typedef Matrix<Scalar, Dynamic, 1> VectorType;
-  typedef internal::traits<SimplicialNonHermitianLLT> Traits;
-  typedef typename Traits::MatrixL MatrixL;
-  typedef typename Traits::MatrixU MatrixU;
-
- public:
-  /** Default constructor */
-  SimplicialNonHermitianLLT() : Base() {}
-
-  /** Constructs and performs the LLT factorization of \a matrix */
-  explicit SimplicialNonHermitianLLT(const MatrixType& matrix) : Base(matrix) {}
-
-  /** \returns an expression of the factor L */
-  inline const MatrixL matrixL() const {
-    eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized");
-    return Traits::getL(Base::m_matrix);
-  }
-
-  /** \returns an expression of the factor U (= L^*) */
-  inline const MatrixU matrixU() const {
-    eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized");
-    return Traits::getU(Base::m_matrix);
-  }
-
-  /** Computes the sparse Cholesky decomposition of \a matrix */
-  SimplicialNonHermitianLLT& compute(const MatrixType& matrix) {
-    Base::template compute<false, true>(matrix);
-    return *this;
-  }
-
-  /** Performs a symbolic decomposition on the sparcity of \a matrix.
-   *
-   * This function is particularly useful when solving for several problems having the same structure.
-   *
-   * \sa factorize()
-   */
-  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<false, true>(a); }
-
-  /** Performs a numeric decomposition of \a matrix
-   *
-   * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
-   *
-   * \sa analyzePattern()
-   */
-  void factorize(const MatrixType& a) { Base::template factorize<false, true>(a); }
-
-  /** \returns the determinant of the underlying matrix from the current factorization */
-  Scalar determinant() const {
-    Scalar detL = Base::m_matrix.diagonal().prod();
-    return detL * detL;
-  }
-};
-
-/** \ingroup SparseCholesky_Module
- * \class SimplicialNonHermitianLDLT
- * \brief A direct sparse LDLT Cholesky factorizations without square root, for symmetric non-hermitian matrices.
- *
- * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are
- * symmetric but not hermitian. For real matrices, this is equivalent to the regular LDLT factorization.
- * The factorization allows for solving A.X = B where X and B can be either dense or sparse.
- *
- * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
- * such that the factorized matrix is P A P^-1.
- *
- * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
- * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower
- *               or Upper. Default is Lower.
- * \tparam Ordering_ The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
- *
- * \implsparsesolverconcept
- *
- * \sa class SimplicialNonHermitianLLT, SimplicialLDLT, class AMDOrdering, class NaturalOrdering
- */
-template <typename MatrixType_, int UpLo_, typename Ordering_>
-class SimplicialNonHermitianLDLT
-    : public SimplicialCholeskyBase<SimplicialNonHermitianLDLT<MatrixType_, UpLo_, Ordering_> > {
- public:
-  typedef MatrixType_ MatrixType;
-  enum { UpLo = UpLo_ };
-  typedef SimplicialCholeskyBase<SimplicialNonHermitianLDLT> Base;
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename MatrixType::StorageIndex StorageIndex;
-  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
-  typedef Matrix<Scalar, Dynamic, 1> VectorType;
-  typedef internal::traits<SimplicialNonHermitianLDLT> Traits;
-  typedef typename Traits::MatrixL MatrixL;
-  typedef typename Traits::MatrixU MatrixU;
-
- public:
-  /** Default constructor */
-  SimplicialNonHermitianLDLT() : Base() {}
-
-  /** Constructs and performs the LLT factorization of \a matrix */
-  explicit SimplicialNonHermitianLDLT(const MatrixType& matrix) : Base(matrix) {}
-
-  /** \returns a vector expression of the diagonal D */
-  inline const VectorType vectorD() const {
-    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
-    return Base::m_diag;
-  }
-  /** \returns an expression of the factor L */
-  inline const MatrixL matrixL() const {
-    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
-    return Traits::getL(Base::m_matrix);
-  }
-
-  /** \returns an expression of the factor U (= L^*) */
-  inline const MatrixU matrixU() const {
-    eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized");
-    return Traits::getU(Base::m_matrix);
-  }
-
-  /** Computes the sparse Cholesky decomposition of \a matrix */
-  SimplicialNonHermitianLDLT& compute(const MatrixType& matrix) {
-    Base::template compute<true, true>(matrix);
-    return *this;
-  }
-
-  /** Performs a symbolic decomposition on the sparcity of \a matrix.
-   *
-   * This function is particularly useful when solving for several problems having the same structure.
-   *
-   * \sa factorize()
-   */
-  void analyzePattern(const MatrixType& a) { Base::template analyzePattern<true, true>(a); }
-
-  /** Performs a numeric decomposition of \a matrix
-   *
-   * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
-   *
-   * \sa analyzePattern()
-   */
-  void factorize(const MatrixType& a) { Base::template factorize<true, true>(a); }
+  void factorize(const MatrixType& a) { Base::template factorize<true>(a); }
 
   /** \returns the determinant of the underlying matrix from the current factorization */
   Scalar determinant() const { return Base::m_diag.prod(); }
@@ -701,6 +475,7 @@
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
   typedef Matrix<Scalar, Dynamic, 1> VectorType;
+  typedef internal::traits<SimplicialCholesky> Traits;
   typedef internal::traits<SimplicialLDLT<MatrixType, UpLo> > LDLTTraits;
   typedef internal::traits<SimplicialLLT<MatrixType, UpLo> > LLTTraits;
 
@@ -736,9 +511,9 @@
   /** Computes the sparse Cholesky decomposition of \a matrix */
   SimplicialCholesky& compute(const MatrixType& matrix) {
     if (m_LDLT)
-      Base::template compute<true, false>(matrix);
+      Base::template compute<true>(matrix);
     else
-      Base::template compute<false, false>(matrix);
+      Base::template compute<false>(matrix);
     return *this;
   }
 
@@ -748,12 +523,7 @@
    *
    * \sa factorize()
    */
-  void analyzePattern(const MatrixType& a) {
-    if (m_LDLT)
-      Base::template analyzePattern<true, false>(a);
-    else
-      Base::template analyzePattern<false, false>(a);
-  }
+  void analyzePattern(const MatrixType& a) { Base::analyzePattern(a, m_LDLT); }
 
   /** Performs a numeric decomposition of \a matrix
    *
@@ -763,9 +533,9 @@
    */
   void factorize(const MatrixType& a) {
     if (m_LDLT)
-      Base::template factorize<true, false>(a);
+      Base::template factorize<true>(a);
     else
-      Base::template factorize<false, false>(a);
+      Base::template factorize<false>(a);
   }
 
   /** \internal */
@@ -824,7 +594,6 @@
 };
 
 template <typename Derived>
-template <bool NonHermitian>
 void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMatrixPtr& pmat, CholMatrixType& ap) {
   eigen_assert(a.rows() == a.cols());
   const Index size = a.rows();
@@ -833,7 +602,7 @@
   if (!internal::is_same<OrderingType, NaturalOrdering<Index> >::value) {
     {
       CholMatrixType C;
-      internal::permute_symm_to_fullsymm<UpLo, NonHermitian>(a, C, NULL);
+      C = a.template selfadjointView<UpLo>();
 
       OrderingType ordering;
       ordering(C, m_Pinv);
@@ -845,14 +614,14 @@
       m_P.resize(0);
 
     ap.resize(size, size);
-    internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, ap, m_P.indices().data());
+    ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
   } else {
     m_Pinv.resize(0);
     m_P.resize(0);
     if (int(UpLo) == int(Lower) || MatrixType::IsRowMajor) {
       // we have to transpose the lower part to to the upper one
       ap.resize(size, size);
-      internal::permute_symm_to_symm<UpLo, Upper, NonHermitian>(a, ap, NULL);
+      ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>();
     } else
       internal::simplicial_cholesky_grab_input<CholMatrixType, MatrixType>::run(a, pmat, ap);
   }
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
index 0b13c56..abfbbe6 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -67,7 +67,7 @@
 }
 
 template <typename Derived>
-template <bool DoLDLT, bool NonHermitian>
+template <bool DoLDLT>
 void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap) {
   using std::sqrt;
 
@@ -97,7 +97,7 @@
     for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) {
       StorageIndex i = it.index();
       if (i <= k) {
-        y[i] += getSymm(it.value()); /* scatter A(i,k) into Y (sum duplicates) */
+        y[i] += numext::conj(it.value()); /* scatter A(i,k) into Y (sum duplicates) */
         Index len;
         for (len = 0; tags[i] != k; i = m_parent[i]) {
           pattern[len++] = i; /* L(k,i) is nonzero */
@@ -109,8 +109,8 @@
 
     /* compute numerical values kth row of L (a sparse triangular solve) */
 
-    DiagonalScalar d =
-        getDiag(y[k]) * m_shiftScale + m_shiftOffset;  // get D(k,k), apply the shift function, and clear Y(k)
+    RealScalar d =
+        numext::real(y[k]) * m_shiftScale + m_shiftOffset;  // get D(k,k), apply the shift function, and clear Y(k)
     y[k] = Scalar(0);
     for (; top < size; ++top) {
       Index i = pattern[top]; /* pattern[top:n-1] is pattern of L(:,k) */
@@ -120,14 +120,14 @@
       /* the nonzero entry L(k,i) */
       Scalar l_ki;
       if (DoLDLT)
-        l_ki = yi / getDiag(m_diag[i]);
+        l_ki = yi / numext::real(m_diag[i]);
       else
         yi = l_ki = yi / Lx[Lp[i]];
 
       Index p2 = Lp[i] + m_nonZerosPerCol[i];
       Index p;
-      for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi;
-      d -= getDiag(l_ki * getSymm(yi));
+      for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= numext::conj(Lx[p]) * yi;
+      d -= numext::real(l_ki * numext::conj(yi));
       Li[p] = k; /* store L(k,i) in column form of L */
       Lx[p] = l_ki;
       ++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */
@@ -141,7 +141,7 @@
     } else {
       Index p = Lp[k] + m_nonZerosPerCol[k]++;
       Li[p] = k; /* store L(k,k) = sqrt (d) in column k */
-      if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) {
+      if (d <= RealScalar(0)) {
         ok = false; /* failure, matrix is not positive definite */
         break;
       }
diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index f040915..aa876ec 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h
@@ -17,8 +17,7 @@
 
 template <typename Derived>
 template <typename OtherDerived>
-inline typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
-    const MatrixBase<OtherDerived>& other) const {
+typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
   EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
@@ -31,23 +30,17 @@
 
   internal::evaluator<Derived> thisEval(derived());
   typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
-  // Two accumulators, which breaks the dependency chain on the accumulator
-  // and allows more instruction-level parallelism in the following loop.
-  Scalar res1(0);
-  Scalar res2(0);
-  for (; i; ++i) {
-    res1 += numext::conj(i.value()) * other.coeff(i.index());
+  Scalar res(0);
+  while (i) {
+    res += numext::conj(i.value()) * other.coeff(i.index());
     ++i;
-    if (i) {
-      res2 += numext::conj(i.value()) * other.coeff(i.index());
-    }
   }
-  return res1 + res2;
+  return res;
 }
 
 template <typename Derived>
 template <typename OtherDerived>
-inline typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
+typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot(
     const SparseMatrixBase<OtherDerived>& other) const {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 849970a..81b0a11 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -217,18 +217,15 @@
     return m_data.atInRange(m_outerIndex[outer], end, inner);
   }
 
-  /** \returns a non-const reference to the value of the matrix at position \a i, \a j.
+  /** \returns a non-const reference to the value of the matrix at position \a i, \a j
    *
    * If the element does not exist then it is inserted via the insert(Index,Index) function
    * which itself turns the matrix into a non compressed form if that was not the case.
-   * The output parameter `inserted` is set to true.
-   *
-   * Otherwise, if the element does exist, `inserted` will be set to false.
    *
    * This is a O(log(nnz_j)) operation (binary search) plus the cost of insert(Index,Index)
    * function if the element does not already exist.
    */
-  inline Scalar& findOrInsertCoeff(Index row, Index col, bool* inserted) {
+  inline Scalar& coeffRef(Index row, Index col) {
     eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
     const Index outer = IsRowMajor ? row : col;
     const Index inner = IsRowMajor ? col : row;
@@ -243,37 +240,17 @@
         m_innerNonZeros[outer]++;
         m_data.index(end) = StorageIndex(inner);
         m_data.value(end) = Scalar(0);
-        if (inserted != nullptr) {
-          *inserted = true;
-        }
         return m_data.value(end);
       }
     }
-    if ((dst < end) && (m_data.index(dst) == inner)) {
+    if ((dst < end) && (m_data.index(dst) == inner))
       // this coefficient exists, return a refernece to it
-      if (inserted != nullptr) {
-        *inserted = false;
-      }
       return m_data.value(dst);
-    } else {
-      if (inserted != nullptr) {
-        *inserted = true;
-      }
+    else
       // insertion will require reconfiguring the buffer
       return insertAtByOuterInner(outer, inner, dst);
-    }
   }
 
-  /** \returns a non-const reference to the value of the matrix at position \a i, \a j
-   *
-   * If the element does not exist then it is inserted via the insert(Index,Index) function
-   * which itself turns the matrix into a non compressed form if that was not the case.
-   *
-   * This is a O(log(nnz_j)) operation (binary search) plus the cost of insert(Index,Index)
-   * function if the element does not already exist.
-   */
-  inline Scalar& coeffRef(Index row, Index col) { return findOrInsertCoeff(row, col, nullptr); }
-
   /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col.
    * The non zero coefficient must \b not already exist.
    *
@@ -788,11 +765,8 @@
     Base::operator=(other);
   }
 
-  /** Move constructor */
-  inline SparseMatrix(SparseMatrix&& other) : SparseMatrix() { this->swap(other); }
-
-  template <typename OtherDerived>
-  inline SparseMatrix(SparseCompressedBase<OtherDerived>&& other) : SparseMatrix() {
+  inline SparseMatrix(SparseMatrix&& other)
+      : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) {
     *this = other.derived().markAsRValue();
   }
 
@@ -860,10 +834,7 @@
     return *this;
   }
 
-  inline SparseMatrix& operator=(SparseMatrix&& other) {
-    this->swap(other);
-    return *this;
-  }
+  inline SparseMatrix& operator=(SparseMatrix&& other) { return *this = other.derived().markAsRValue(); }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   template <typename OtherDerived>
@@ -878,12 +849,6 @@
   template <typename OtherDerived>
   EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
 
-  template <typename OtherDerived>
-  inline SparseMatrix& operator=(SparseCompressedBase<OtherDerived>&& other) {
-    *this = other.derived().markAsRValue();
-    return *this;
-  }
-
 #ifndef EIGEN_NO_IO
   friend std::ostream& operator<<(std::ostream& s, const SparseMatrix& m) {
     EIGEN_DBG_SPARSE(
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 3402bae..129899c 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -34,13 +34,13 @@
 template <typename MatrixType, unsigned int Mode>
 struct traits<SparseSelfAdjointView<MatrixType, Mode> > : traits<MatrixType> {};
 
-template <int SrcMode, int DstMode, bool NonHermitian, typename MatrixType, int DestOrder>
+template <int SrcMode, int DstMode, typename MatrixType, int DestOrder>
 void permute_symm_to_symm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
     const typename MatrixType::StorageIndex* perm = 0);
 
-template <int Mode, bool NonHermitian, typename MatrixType, int DestOrder>
+template <int Mode, typename MatrixType, int DestOrder>
 void permute_symm_to_fullsymm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
@@ -53,7 +53,7 @@
  public:
   enum {
     Mode = Mode_,
-    TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0),
+    TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
     RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
     ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
   };
@@ -234,7 +234,7 @@
   template <typename DestScalar, int StorageOrder>
   static void run(SparseMatrix<DestScalar, StorageOrder, StorageIndex>& dst, const SrcXprType& src,
                   const AssignOpType& /*func*/) {
-    internal::permute_symm_to_fullsymm<SrcXprType::Mode, false>(src.matrix(), dst);
+    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
   }
 
   // FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced
@@ -405,7 +405,7 @@
  ***************************************************************************/
 namespace internal {
 
-template <int Mode, bool NonHermitian, typename MatrixType, int DestOrder>
+template <int Mode, typename MatrixType, int DestOrder>
 void permute_symm_to_fullsymm(
     const MatrixType& mat,
     SparseMatrix<typename MatrixType::Scalar, DestOrder, typename MatrixType::StorageIndex>& _dest,
@@ -476,13 +476,13 @@
         dest.valuePtr()[k] = it.value();
         k = count[ip]++;
         dest.innerIndexPtr()[k] = jp;
-        dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value()));
+        dest.valuePtr()[k] = numext::conj(it.value());
       }
     }
   }
 }
 
-template <int SrcMode_, int DstMode_, bool NonHermitian, typename MatrixType, int DstOrder>
+template <int SrcMode_, int DstMode_, typename MatrixType, int DstOrder>
 void permute_symm_to_symm(const MatrixType& mat,
                           SparseMatrix<typename MatrixType::Scalar, DstOrder, typename MatrixType::StorageIndex>& _dest,
                           const typename MatrixType::StorageIndex* perm) {
@@ -534,7 +534,7 @@
 
       if (!StorageOrderMatch) std::swap(ip, jp);
       if (((int(DstMode) == int(Lower) && ip < jp) || (int(DstMode) == int(Upper) && ip > jp)))
-        dest.valuePtr()[k] = (NonHermitian ? it.value() : numext::conj(it.value()));
+        dest.valuePtr()[k] = numext::conj(it.value());
       else
         dest.valuePtr()[k] = it.value();
     }
@@ -595,14 +595,14 @@
                   const internal::assign_op<Scalar, typename MatrixType::Scalar>&) {
     // internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
     SparseMatrix<Scalar, (Options & RowMajor) == RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
-    internal::permute_symm_to_fullsymm<Mode, false>(src.matrix(), tmp, src.perm().indices().data());
+    internal::permute_symm_to_fullsymm<Mode>(src.matrix(), tmp, src.perm().indices().data());
     dst = tmp;
   }
 
   template <typename DestType, unsigned int DestMode>
   static void run(SparseSelfAdjointView<DestType, DestMode>& dst, const SrcXprType& src,
                   const internal::assign_op<Scalar, typename MatrixType::Scalar>&) {
-    internal::permute_symm_to_symm<Mode, DestMode, false>(src.matrix(), dst.matrix(), src.perm().indices().data());
+    internal::permute_symm_to_symm<Mode, DestMode>(src.matrix(), dst.matrix(), src.perm().indices().data());
   }
 };
 
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index fac162e..0733718 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -304,24 +304,6 @@
     return *this;
   }
 
-  inline SparseVector(SparseVector&& other) : SparseVector() { this->swap(other); }
-
-  template <typename OtherDerived>
-  inline SparseVector(SparseCompressedBase<OtherDerived>&& other) : SparseVector() {
-    *this = other.derived().markAsRValue();
-  }
-
-  inline SparseVector& operator=(SparseVector&& other) {
-    this->swap(other);
-    return *this;
-  }
-
-  template <typename OtherDerived>
-  inline SparseVector& operator=(SparseCompressedBase<OtherDerived>&& other) {
-    *this = other.derived().markAsRValue();
-    return *this;
-  }
-
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   template <typename Lhs, typename Rhs>
   inline SparseVector& operator=(const SparseSparseProduct<Lhs, Rhs>& product) {
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index acb0c5f..3e3352f 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -481,7 +481,9 @@
       tdot *= m_hcoeffs(curIdx);
 
       // Then update tval = tval - q * tau
-      tval -= tdot * m_Q.col(curIdx);
+      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense
+      // ?= sparse")
+      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) tval(itq.row()) -= itq.value() * tdot;
 
       // Detect fill-in for the current column of Q
       if (m_etree(Ridx(i)) == nonzeroCol) {
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.inc b/Eigen/src/plugins/CommonCwiseUnaryOps.inc
index 64f3648..f20f2f8 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.inc
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.inc
@@ -118,7 +118,7 @@
   return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
 }
 
-/// \returns a const expression of a custom coefficient-wise unary operator \a func of *this
+/// \returns an expression of a custom coefficient-wise unary operator \a func of *this
 ///
 /// The template parameter \a CustomUnaryOp is the type of the functor
 /// of the custom unary operator.
@@ -137,21 +137,6 @@
   return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
 }
 
-/// \returns a non-const expression of a custom coefficient-wise unary view \a func of *this
-///
-/// The template parameter \a CustomUnaryOp is the type of the functor
-/// of the custom unary operator.
-///
-EIGEN_DOC_UNARY_ADDONS(unaryViewExpr, unary function)
-///
-/// \sa unaryExpr, binaryExpr class CwiseUnaryOp
-///
-template <typename CustomViewOp>
-EIGEN_DEVICE_FUNC inline CwiseUnaryView<CustomViewOp, Derived> unaryViewExpr(
-    const CustomViewOp& func = CustomViewOp()) {
-  return CwiseUnaryView<CustomViewOp, Derived>(derived(), func);
-}
-
 /// \returns a non const expression of the real part of \c *this.
 ///
 EIGEN_DOC_UNARY_ADDONS(real, real part function)
diff --git a/Eigen/src/plugins/IndexedViewMethods.inc b/Eigen/src/plugins/IndexedViewMethods.inc
index c3df429..26e7b5f 100644
--- a/Eigen/src/plugins/IndexedViewMethods.inc
+++ b/Eigen/src/plugins/IndexedViewMethods.inc
@@ -9,47 +9,51 @@
 
 #if !defined(EIGEN_PARSED_BY_DOXYGEN)
 
-public:
+protected:
 // define some aliases to ease readability
 
 template <typename Indices>
-using IvcRowType = typename internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::type;
+using IvcRowType = typename internal::IndexedViewCompatibleType<Indices, RowsAtCompileTime>::type;
 
 template <typename Indices>
-using IvcColType = typename internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::type;
+using IvcColType = typename internal::IndexedViewCompatibleType<Indices, ColsAtCompileTime>::type;
 
 template <typename Indices>
-using IvcSizeType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type;
+using IvcType = typename internal::IndexedViewCompatibleType<Indices, SizeAtCompileTime>::type;
+
+typedef typename internal::IndexedViewCompatibleType<Index, 1>::type IvcIndex;
 
 template <typename Indices>
 inline IvcRowType<Indices> ivcRow(const Indices& indices) const {
-  return internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::CreateIndexSequence(indices,
-                                                                                                    derived().rows());
+  return internal::makeIndexedViewCompatible(
+      indices, internal::variable_if_dynamic<Index, RowsAtCompileTime>(derived().rows()), Specialized);
 }
 
 template <typename Indices>
 inline IvcColType<Indices> ivcCol(const Indices& indices) const {
-  return internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::CreateIndexSequence(indices,
-                                                                                                    derived().cols());
+  return internal::makeIndexedViewCompatible(
+      indices, internal::variable_if_dynamic<Index, ColsAtCompileTime>(derived().cols()), Specialized);
 }
 
 template <typename Indices>
-inline IvcSizeType<Indices> ivcSize(const Indices& indices) const {
-  return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices,
-                                                                                                    derived().size());
-  ;
+inline IvcType<Indices> ivcSize(const Indices& indices) const {
+  return internal::makeIndexedViewCompatible(
+      indices, internal::variable_if_dynamic<Index, SizeAtCompileTime>(derived().size()), Specialized);
 }
 
 // this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true
-template <typename RowIndices, typename ColIndices, typename EnableIf = void>
+template <typename RowIndices, typename ColIndices,
+          bool UseSymbolic =
+              internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar,
+          bool UseBlock =
+              internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock,
+          bool UseGeneric = internal::traits<
+              IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>
 struct IndexedViewSelector;
 
 // Generic
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<
-    RowIndices, ColIndices,
-    std::enable_if_t<
-        internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>> {
+struct IndexedViewSelector<RowIndices, ColIndices, false, false, true> {
   using ReturnType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
   using ConstReturnType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
 
@@ -64,73 +68,60 @@
 
 // Block
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices,
-                           std::enable_if_t<internal::traits<
-                               IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock>> {
-  using ActualRowIndices = IvcRowType<RowIndices>;
-  using ActualColIndices = IvcColType<ColIndices>;
-  using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>;
-  using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>;
+struct IndexedViewSelector<RowIndices, ColIndices, false, true, false> {
+  using IndexedViewType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
+  using ConstIndexedViewType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
   using ReturnType = typename internal::traits<IndexedViewType>::BlockType;
   using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType;
-  using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
-  using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
 
   static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
-    auto actualRowIndices = derived.ivcRow(rowIndices);
-    auto actualColIndices = derived.ivcCol(colIndices);
-    return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
-                      RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
+    IvcRowType<RowIndices> actualRowIndices = derived.ivcRow(rowIndices);
+    IvcColType<ColIndices> actualColIndices = derived.ivcCol(colIndices);
+    return ReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices),
+                      internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
   }
   static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
                                     const ColIndices& colIndices) {
-    auto actualRowIndices = derived.ivcRow(rowIndices);
-    auto actualColIndices = derived.ivcCol(colIndices);
-    return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
-                           RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
+    IvcRowType<RowIndices> actualRowIndices = derived.ivcRow(rowIndices);
+    IvcColType<ColIndices> actualColIndices = derived.ivcCol(colIndices);
+    return ConstReturnType(derived, internal::first(actualRowIndices), internal::first(actualColIndices),
+                           internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
   }
 };
 
-// Scalar
+// Symbolic
 template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices,
-                           std::enable_if_t<internal::traits<
-                               IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar>> {
+struct IndexedViewSelector<RowIndices, ColIndices, true, false, false> {
   using ReturnType = typename DenseBase<Derived>::Scalar&;
   using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
-  using ActualRowIndices = IvcRowType<RowIndices>;
-  using ActualColIndices = IvcColType<ColIndices>;
-  using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
-  using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
+
   static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
-    auto actualRowIndices = derived.ivcRow(rowIndices);
-    auto actualColIndices = derived.ivcCol(colIndices);
-    return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
+    return derived(internal::eval_expr_given_size(rowIndices, derived.rows()),
+                   internal::eval_expr_given_size(colIndices, derived.cols()));
   }
   static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
                                     const ColIndices& colIndices) {
-    auto actualRowIndices = derived.ivcRow(rowIndices);
-    auto actualColIndices = derived.ivcCol(colIndices);
-    return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
+    return derived(internal::eval_expr_given_size(rowIndices, derived.rows()),
+                   internal::eval_expr_given_size(colIndices, derived.cols()));
   }
 };
 
 // this helper class assumes internal::is_valid_index_type<Indices>::value == false
-template <typename Indices, typename EnableIf = void>
+template <typename Indices, bool UseSymbolic = symbolic::is_symbolic<Indices>::value,
+          bool UseBlock = !UseSymbolic && internal::get_compile_time_incr<IvcType<Indices>>::value == 1,
+          bool UseGeneric = !UseSymbolic && !UseBlock>
 struct VectorIndexedViewSelector;
 
 // Generic
 template <typename Indices>
-struct VectorIndexedViewSelector<
-    Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
-                              internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime != 1>> {
+struct VectorIndexedViewSelector<Indices, false, false, true> {
   static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor;
-  using ZeroIndex = internal::SingleRange<Index(0)>;
-  using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcSizeType<Indices>>;
-  using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcSizeType<Indices>>;
 
-  using ColMajorReturnType = IndexedView<Derived, IvcSizeType<Indices>, ZeroIndex>;
-  using ConstColMajorReturnType = IndexedView<const Derived, IvcSizeType<Indices>, ZeroIndex>;
+  using RowMajorReturnType = IndexedView<Derived, IvcIndex, IvcType<Indices>>;
+  using ConstRowMajorReturnType = IndexedView<const Derived, IvcIndex, IvcType<Indices>>;
+
+  using ColMajorReturnType = IndexedView<Derived, IvcType<Indices>, IvcIndex>;
+  using ConstColMajorReturnType = IndexedView<const Derived, IvcType<Indices>, IvcIndex>;
 
   using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type;
   using ConstReturnType =
@@ -138,53 +129,49 @@
 
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
   static inline RowMajorReturnType run(Derived& derived, const Indices& indices) {
-    return RowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
+    return RowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
   static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) {
-    return ConstRowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
+    return ConstRowMajorReturnType(derived, IvcIndex(0), derived.ivcCol(indices));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
   static inline ColMajorReturnType run(Derived& derived, const Indices& indices) {
-    return ColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
+    return ColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0));
   }
   template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
   static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) {
-    return ConstColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
+    return ConstColMajorReturnType(derived, derived.ivcRow(indices), IvcIndex(0));
   }
 };
 
 // Block
 template <typename Indices>
-struct VectorIndexedViewSelector<
-    Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
-                              internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime == 1>> {
-  using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
-  using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>;
-  using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>;
+struct VectorIndexedViewSelector<Indices, false, true, false> {
+  using ReturnType = VectorBlock<Derived, internal::array_size<Indices>::value>;
+  using ConstReturnType = VectorBlock<const Derived, internal::array_size<Indices>::value>;
+
   static inline ReturnType run(Derived& derived, const Indices& indices) {
-    auto actualIndices = derived.ivcSize(indices);
-    return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
+    IvcType<Indices> actualIndices = derived.ivcSize(indices);
+    return ReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices));
   }
   static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
-    auto actualIndices = derived.ivcSize(indices);
-    return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
+    IvcType<Indices> actualIndices = derived.ivcSize(indices);
+    return ConstReturnType(derived, internal::first(actualIndices), internal::index_list_size(actualIndices));
   }
 };
 
 // Symbolic
 template <typename Indices>
-struct VectorIndexedViewSelector<Indices, std::enable_if_t<internal::is_single_range<IvcSizeType<Indices>>::value>> {
+struct VectorIndexedViewSelector<Indices, true, false, false> {
   using ReturnType = typename DenseBase<Derived>::Scalar&;
   using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
-  using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
-  static inline ReturnType run(Derived& derived, const Indices& indices) {
-    auto actualIndices = derived.ivcSize(indices);
-    return derived(Helper::first(actualIndices));
+
+  static inline ReturnType run(Derived& derived, const Indices& id) {
+    return derived(internal::eval_expr_given_size(id, derived.size()));
   }
-  static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
-    auto actualIndices = derived.ivcSize(indices);
-    return derived(Helper::first(actualIndices));
+  static inline ConstReturnType run(const Derived& derived, const Indices& id) {
+    return derived(internal::eval_expr_given_size(id, derived.size()));
   }
 };
 
diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.inc b/Eigen/src/plugins/MatrixCwiseUnaryOps.inc
index 325b0fb..b23f4a5 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.inc
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.inc
@@ -17,7 +17,6 @@
 typedef CwiseUnaryOp<internal::scalar_carg_op<Scalar>, const Derived> CwiseCArgReturnType;
 typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
 typedef CwiseUnaryOp<internal::scalar_cbrt_op<Scalar>, const Derived> CwiseCbrtReturnType;
-typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> CwiseSquareReturnType;
 typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
 typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
 
@@ -67,15 +66,7 @@
 ///
 /// \sa cwiseSqrt(), cwiseSquare(), cwisePow()
 ///
-EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseCbrtReturnType(derived()); }
-
-/// \returns an expression of the coefficient-wise square of *this.
-///
-EIGEN_DOC_UNARY_ADDONS(cwiseSquare, square)
-///
-/// \sa cwisePow(), cwiseSqrt(), cwiseCbrt()
-///
-EIGEN_DEVICE_FUNC inline const CwiseSquareReturnType cwiseSquare() const { return CwiseSquareReturnType(derived()); }
+EIGEN_DEVICE_FUNC inline const CwiseCbrtReturnType cwiseCbrt() const { return CwiseSCbrtReturnType(derived()); }
 
 /// \returns an expression of the coefficient-wise signum of *this.
 ///
diff --git a/blas/BandTriangularSolver.h b/blas/BandTriangularSolver.h
index f9bfdc1..014af24 100644
--- a/blas/BandTriangularSolver.h
+++ b/blas/BandTriangularSolver.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_BAND_TRIANGULARSOLVER_H
 #define EIGEN_BAND_TRIANGULARSOLVER_H
 
-namespace Eigen {
 namespace internal {
 
 /* \internal
@@ -78,7 +77,6 @@
   }
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_BAND_TRIANGULARSOLVER_H
diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h
index e6c3cab..dd363e5 100644
--- a/blas/GeneralRank1Update.h
+++ b/blas/GeneralRank1Update.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_GENERAL_RANK1UPDATE_H
 #define EIGEN_GENERAL_RANK1UPDATE_H
 
-namespace Eigen {
 namespace internal {
 
 /* Optimized matrix += alpha * uv' */
@@ -36,7 +35,6 @@
   }
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_GENERAL_RANK1UPDATE_H
diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h
index 5109960..655da51 100644
--- a/blas/PackedSelfadjointProduct.h
+++ b/blas/PackedSelfadjointProduct.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H
 #define EIGEN_SELFADJOINT_PACKED_PRODUCT_H
 
-namespace Eigen {
 namespace internal {
 
 /* Optimized matrix += alpha * uv'
@@ -46,7 +45,6 @@
   }
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_SELFADJOINT_PACKED_PRODUCT_H
diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h
index 4e8e085..bb830cb 100644
--- a/blas/PackedTriangularMatrixVector.h
+++ b/blas/PackedTriangularMatrixVector.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H
 #define EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H
 
-namespace Eigen {
 namespace internal {
 
 template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,
@@ -76,7 +75,6 @@
   };
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H
diff --git a/blas/PackedTriangularSolverVector.h b/blas/PackedTriangularSolverVector.h
index 92964fb..6a1a8c1 100644
--- a/blas/PackedTriangularSolverVector.h
+++ b/blas/PackedTriangularSolverVector.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H
 #define EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H
 
-namespace Eigen {
 namespace internal {
 
 template <typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
@@ -70,7 +69,6 @@
   }
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H
diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h
index 9cb96ee..e5046cf 100644
--- a/blas/Rank2Update.h
+++ b/blas/Rank2Update.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_RANK2UPDATE_H
 #define EIGEN_RANK2UPDATE_H
 
-namespace Eigen {
 namespace internal {
 
 /* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
@@ -50,7 +49,6 @@
   }
 };
 
-}  // namespace internal
-}  // namespace Eigen
+}  // end namespace internal
 
 #endif  // EIGEN_RANK2UPDATE_H
diff --git a/blas/common.h b/blas/common.h
index 9e46b6e..2456273 100644
--- a/blas/common.h
+++ b/blas/common.h
@@ -29,13 +29,6 @@
 
 #include "blas.h"
 
-#include "BandTriangularSolver.h"
-#include "GeneralRank1Update.h"
-#include "PackedSelfadjointProduct.h"
-#include "PackedTriangularMatrixVector.h"
-#include "PackedTriangularSolverVector.h"
-#include "Rank2Update.h"
-
 #define NOTR 0
 #define TR 1
 #define ADJ 2
@@ -66,58 +59,58 @@
 
 inline bool check_uplo(const char* uplo) { return UPLO(*uplo) != 0xff; }
 
+namespace Eigen {
+#include "BandTriangularSolver.h"
+#include "GeneralRank1Update.h"
+#include "PackedSelfadjointProduct.h"
+#include "PackedTriangularMatrixVector.h"
+#include "PackedTriangularSolverVector.h"
+#include "Rank2Update.h"
+}  // namespace Eigen
+
+using namespace Eigen;
+
 typedef SCALAR Scalar;
-typedef Eigen::NumTraits<Scalar>::Real RealScalar;
+typedef NumTraits<Scalar>::Real RealScalar;
 typedef std::complex<RealScalar> Complex;
 
 enum { IsComplex = Eigen::NumTraits<SCALAR>::IsComplex, Conj = IsComplex };
 
-typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> PlainMatrixType;
-typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >
-    MatrixType;
-typedef Eigen::Map<const Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0,
-                   Eigen::OuterStride<> >
-    ConstMatrixType;
-typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > StridedVectorType;
-typedef Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1> > CompactVectorType;
+typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> PlainMatrixType;
+typedef Map<Matrix<Scalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > MatrixType;
+typedef Map<const Matrix<Scalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > ConstMatrixType;
+typedef Map<Matrix<Scalar, Dynamic, 1>, 0, InnerStride<Dynamic> > StridedVectorType;
+typedef Map<Matrix<Scalar, Dynamic, 1> > CompactVectorType;
 
 template <typename T>
-Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> > matrix(
-    T* data, int rows, int cols, int stride) {
-  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >(
-      data, rows, cols, Eigen::OuterStride<>(stride));
+Map<Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > matrix(T* data, int rows, int cols, int stride) {
+  return Map<Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride));
 }
 
 template <typename T>
-Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> > matrix(
-    const T* data, int rows, int cols, int stride) {
-  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>, 0, Eigen::OuterStride<> >(
-      data, rows, cols, Eigen::OuterStride<>(stride));
+Map<const Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > matrix(const T* data, int rows, int cols,
+                                                                           int stride) {
+  return Map<const Matrix<T, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride));
 }
 
 template <typename T>
-Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > make_vector(T* data, int size,
-                                                                                                    int incr) {
-  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> >(
-      data, size, Eigen::InnerStride<Eigen::Dynamic>(incr));
+Map<Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> > make_vector(T* data, int size, int incr) {
+  return Map<Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr));
 }
 
 template <typename T>
-Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> > make_vector(const T* data,
-                                                                                                          int size,
-                                                                                                          int incr) {
-  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>, 0, Eigen::InnerStride<Eigen::Dynamic> >(
-      data, size, Eigen::InnerStride<Eigen::Dynamic>(incr));
+Map<const Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> > make_vector(const T* data, int size, int incr) {
+  return Map<const Matrix<T, Dynamic, 1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr));
 }
 
 template <typename T>
-Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> > make_vector(T* data, int size) {
-  return Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >(data, size);
+Map<Matrix<T, Dynamic, 1> > make_vector(T* data, int size) {
+  return Map<Matrix<T, Dynamic, 1> >(data, size);
 }
 
 template <typename T>
-Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> > make_vector(const T* data, int size) {
-  return Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >(data, size);
+Map<const Matrix<T, Dynamic, 1> > make_vector(const T* data, int size) {
+  return Map<const Matrix<T, Dynamic, 1> >(data, size);
 }
 
 template <typename T>
diff --git a/blas/double.cpp b/blas/double.cpp
index 28a2563..4298665 100644
--- a/blas/double.cpp
+++ b/blas/double.cpp
@@ -19,7 +19,7 @@
 #include "level2_real_impl.h"
 #include "level3_impl.h"
 
-extern "C" double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
+double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
   if (*n <= 0) return 0;
 
   if (*incx == 1 && *incy == 1)
diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h
index 3181a50..be88b92 100644
--- a/blas/level1_cplx_impl.h
+++ b/blas/level1_cplx_impl.h
@@ -11,7 +11,7 @@
 
 struct scalar_norm1_op {
   typedef RealScalar result_type;
-  inline RealScalar operator()(const Scalar &a) const { return Eigen::numext::norm1(a); }
+  inline RealScalar operator()(const Scalar &a) const { return numext::norm1(a); }
 };
 namespace Eigen {
 namespace internal {
@@ -40,7 +40,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  Eigen::DenseIndex ret;
+  DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).unaryExpr<scalar_norm1_op>().maxCoeff(&ret);
   else
@@ -52,7 +52,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  Eigen::DenseIndex ret;
+  DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).unaryExpr<scalar_norm1_op>().minCoeff(&ret);
   else
@@ -132,16 +132,16 @@
   StridedVectorType vx(make_vector(x, *n, std::abs(*incx)));
   StridedVectorType vy(make_vector(y, *n, std::abs(*incy)));
 
-  Eigen::Reverse<StridedVectorType> rvx(vx);
-  Eigen::Reverse<StridedVectorType> rvy(vy);
+  Reverse<StridedVectorType> rvx(vx);
+  Reverse<StridedVectorType> rvy(vy);
 
   // TODO implement mixed real-scalar rotations
   if (*incx < 0 && *incy > 0)
-    Eigen::internal::apply_rotation_in_the_plane(rvx, vy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c, s));
   else if (*incx > 0 && *incy < 0)
-    Eigen::internal::apply_rotation_in_the_plane(vx, rvy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
   else
-    Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
 }
 
 EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) {
diff --git a/blas/level1_impl.h b/blas/level1_impl.h
index a65af92..2422d10 100644
--- a/blas/level1_impl.h
+++ b/blas/level1_impl.h
@@ -88,10 +88,10 @@
     a = b;
   } else {
     scale = abs(a) + abs(b);
-    norm = scale * sqrt((Eigen::numext::abs2(a / scale)) + (Eigen::numext::abs2(b / scale)));
+    norm = scale * sqrt((numext::abs2(a / scale)) + (numext::abs2(b / scale)));
     alpha = a / abs(a);
     *c = abs(a) / norm;
-    *s = alpha * Eigen::numext::conj(b) / norm;
+    *s = alpha * numext::conj(b) / norm;
     a = alpha * norm;
   }
 #endif
diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h
index 202f432..cd9c189 100644
--- a/blas/level1_real_impl.h
+++ b/blas/level1_real_impl.h
@@ -28,7 +28,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  Eigen::DenseIndex ret;
+  DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).cwiseAbs().maxCoeff(&ret);
   else
@@ -40,7 +40,7 @@
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
-  Eigen::DenseIndex ret;
+  DenseIndex ret;
   if (*incx == 1)
     make_vector(x, *n).cwiseAbs().minCoeff(&ret);
   else
@@ -97,15 +97,15 @@
   StridedVectorType vx(make_vector(x, *n, std::abs(*incx)));
   StridedVectorType vy(make_vector(y, *n, std::abs(*incy)));
 
-  Eigen::Reverse<StridedVectorType> rvx(vx);
-  Eigen::Reverse<StridedVectorType> rvy(vy);
+  Reverse<StridedVectorType> rvx(vx);
+  Reverse<StridedVectorType> rvy(vy);
 
   if (*incx < 0 && *incy > 0)
-    Eigen::internal::apply_rotation_in_the_plane(rvx, vy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c, s));
   else if (*incx > 0 && *incy < 0)
-    Eigen::internal::apply_rotation_in_the_plane(vx, rvy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
   else
-    Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation<Scalar>(c, s));
+    internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
 }
 
 /*
diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
index d1ce492..f04dda1 100644
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h
@@ -22,11 +22,9 @@
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, Eigen::ColMajor, Eigen::Upper, false,
-                                                          false>::run),
+      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
       // array index: LO
-      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, Eigen::ColMajor, Eigen::Lower, false,
-                                                          false>::run),
+      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
   };
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -109,9 +107,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, RealScalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, Conj>::run),
+      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
       // array index: LO
-      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, Conj>::run),
+      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -151,9 +149,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Upper>::run),
+      (internal::packed_rank2_update_selector<Scalar, int, Upper>::run),
       // array index: LO
-      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Lower>::run),
+      (internal::packed_rank2_update_selector<Scalar, int, Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -197,9 +195,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::selfadjoint_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, Conj>::run),
+      (selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
       // array index: LO
-      (Eigen::selfadjoint_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, Conj>::run),
+      (selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -244,9 +242,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Upper>::run),
+      (internal::rank2_update_selector<Scalar, int, Upper>::run),
       // array index: LO
-      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Lower>::run),
+      (internal::rank2_update_selector<Scalar, int, Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -315,8 +313,7 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy,
-                                                                                         alpha);
+  internal::general_rank1_update<Scalar, int, ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
@@ -354,8 +351,7 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, Conj>::run(*m, *n, a, *lda, x_cpy, y_cpy,
-                                                                                        alpha);
+  internal::general_rank1_update<Scalar, int, ColMajor, false, Conj>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index ca9f48f..5721ee6 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@@ -13,13 +13,12 @@
 struct general_matrix_vector_product_wrapper {
   static void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsIncr,
                   Scalar *res, Index resIncr, Scalar alpha) {
-    typedef Eigen::internal::const_blas_data_mapper<Scalar, Index, StorageOrder> LhsMapper;
-    typedef Eigen::internal::const_blas_data_mapper<Scalar, Index, Eigen::RowMajor> RhsMapper;
+    typedef internal::const_blas_data_mapper<Scalar, Index, StorageOrder> LhsMapper;
+    typedef internal::const_blas_data_mapper<Scalar, Index, RowMajor> RhsMapper;
 
-    Eigen::internal::general_matrix_vector_product<Index, Scalar, LhsMapper, StorageOrder, ConjugateLhs, Scalar,
-                                                   RhsMapper, ConjugateRhs>::run(rows, cols, LhsMapper(lhs, lhsStride),
-                                                                                 RhsMapper(rhs, rhsIncr), res, resIncr,
-                                                                                 alpha);
+    internal::general_matrix_vector_product<Index, Scalar, LhsMapper, StorageOrder, ConjugateLhs, Scalar, RhsMapper,
+                                            ConjugateRhs>::run(rows, cols, LhsMapper(lhs, lhsStride),
+                                                               RhsMapper(rhs, rhsIncr), res, resIncr, alpha);
   }
 };
 
@@ -27,13 +26,12 @@
 (const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda,
  const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
-  static const functype func[4] = {
-      // array index: NOTR
-      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::ColMajor, false, false>::run),
-      // array index: TR
-      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::RowMajor, false, false>::run),
-      // array index: ADJ
-      (general_matrix_vector_product_wrapper<int, Scalar, Eigen::RowMajor, Conj, false>::run), 0};
+  static const functype func[4] = {// array index: NOTR
+                                   (general_matrix_vector_product_wrapper<int, Scalar, ColMajor, false, false>::run),
+                                   // array index: TR
+                                   (general_matrix_vector_product_wrapper<int, Scalar, RowMajor, false, false>::run),
+                                   // array index: ADJ
+                                   (general_matrix_vector_product_wrapper<int, Scalar, RowMajor, Conj, false>::run), 0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *b = reinterpret_cast<const Scalar *>(pb);
@@ -86,43 +84,31 @@
 (const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
  RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, const Scalar *, int, Scalar *);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                ColMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                RowMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run),
-      0,
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                ColMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                RowMajor>::run),
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run),
-      0};
+      (internal::triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run), 0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   Scalar *b = reinterpret_cast<Scalar *>(pb);
@@ -154,46 +140,32 @@
 (const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
  RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar &);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
-      0,
+      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run), 0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
-      0,
+      (internal::triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run), 0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                         ColMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                         RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
-                                                         RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                         ColMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                         RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
-                                                         RowMajor>::run),
+      (internal::triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false, RowMajor>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -217,7 +189,7 @@
   if (*n == 0) return;
 
   Scalar *actual_b = get_compact_vector(b, *n, *incb);
-  Eigen::Matrix<Scalar, Eigen::Dynamic, 1> res(*n);
+  Matrix<Scalar, Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
@@ -373,40 +345,34 @@
 EIGEN_BLAS_FUNC(tbsv)
 (char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) {
   typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, ColMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, Conj, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, ColMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | 0, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | 0, Scalar, Conj, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | 0, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
+      (internal::band_solve_triangular_selector<int, Lower | UnitDiag, Scalar, false, Scalar, ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, false, Scalar, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
+      (internal::band_solve_triangular_selector<int, Upper | UnitDiag, Scalar, Conj, Scalar, RowMajor>::run),
       0,
   };
 
@@ -454,52 +420,40 @@
  */
 EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, const Scalar *, Scalar *, Scalar);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false,
-                                                                ColMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false,
-                                                                ColMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | 0, Scalar, false, Scalar, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, false, Scalar, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | 0, Scalar, Conj, Scalar, false, RowMajor>::run),
       0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                                ColMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                         ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                         RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, Conj, Scalar, false,
+                                                         RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
-                                                                ColMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Lower | UnitDiag, Scalar, false, Scalar, false,
+                                                         ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, false, Scalar, false,
+                                                         RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
-                                                                RowMajor>::run),
+      (internal::packed_triangular_matrix_vector_product<int, Upper | UnitDiag, Scalar, Conj, Scalar, false,
+                                                         RowMajor>::run),
       0};
 
   Scalar *ap = reinterpret_cast<Scalar *>(pap);
@@ -521,7 +475,7 @@
   if (*n == 0) return;
 
   Scalar *actual_x = get_compact_vector(x, *n, *incx);
-  Eigen::Matrix<Scalar, Eigen::Dynamic, 1> res(*n);
+  Matrix<Scalar, Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
@@ -545,50 +499,36 @@
  */
 EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, Scalar *);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false,
-                                                       ColMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, ColMajor>::run),
       // array index: TR    | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run),
-      0,
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false,
-                                                       ColMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | 0, false, ColMajor>::run),
       // array index: TR    | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, false, RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (NUNIT << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run),
-      0,
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | 0, Conj, RowMajor>::run), 0,
       // array index: NOTR  | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                       ColMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                ColMajor>::run),
       // array index: TR    | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                RowMajor>::run),
       // array index: ADJ   | (UP << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, Conj, RowMajor>::run),
       0,
       // array index: NOTR  | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
-                                                       ColMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Lower | UnitDiag, false,
+                                                ColMajor>::run),
       // array index: TR    | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, false,
+                                                RowMajor>::run),
       // array index: ADJ   | (LO << 2) | (UNIT  << 3)
-      (Eigen::internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj,
-                                                       RowMajor>::run),
+      (internal::packed_triangular_solve_vector<Scalar, Scalar, int, OnTheLeft, Upper | UnitDiag, Conj, RowMajor>::run),
       0};
 
   Scalar *ap = reinterpret_cast<Scalar *>(pap);
diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
index 415944c..5653767 100644
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h
@@ -14,14 +14,11 @@
 (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px,
  const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) {
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::Upper;
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
+      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Upper, false, false>::run),
       // array index: LO
-      (Eigen::internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
+      (internal::selfadjoint_matrix_vector_product<Scalar, int, ColMajor, Lower, false, false>::run),
   };
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -70,14 +67,11 @@
 (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc,
  const int *ldc) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
-  using Eigen::ColMajor;
-  using Eigen::Lower;
-  using Eigen::Upper;
   static const functype func[2] = {
       // array index: UP
-      (Eigen::selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
+      (selfadjoint_rank1_update<Scalar, int, ColMajor, Upper, false, Conj>::run),
       // array index: LO
-      (Eigen::selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
+      (selfadjoint_rank1_update<Scalar, int, ColMajor, Lower, false, Conj>::run),
   };
 
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
@@ -115,9 +109,9 @@
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Upper>::run),
+      (internal::rank2_update_selector<Scalar, int, Upper>::run),
       // array index: LO
-      (Eigen::internal::rank2_update_selector<Scalar, int, Eigen::Lower>::run),
+      (internal::rank2_update_selector<Scalar, int, Lower>::run),
   };
 
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
@@ -196,9 +190,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Upper, false, false>::run),
+      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Upper, false, false>::run),
       // array index: LO
-      (Eigen::internal::selfadjoint_packed_rank1_update<Scalar, int, Eigen::ColMajor, Eigen::Lower, false, false>::run),
+      (internal::selfadjoint_packed_rank1_update<Scalar, int, ColMajor, Lower, false, false>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -238,9 +232,9 @@
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
-      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Upper>::run),
+      (internal::packed_rank2_update_selector<Scalar, int, Upper>::run),
       // array index: LO
-      (Eigen::internal::packed_rank2_update_selector<Scalar, int, Eigen::Lower>::run),
+      (internal::packed_rank2_update_selector<Scalar, int, Lower>::run),
   };
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -305,8 +299,7 @@
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
-  Eigen::internal::general_rank1_update<Scalar, int, Eigen::ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy,
-                                                                                         alpha);
+  internal::general_rank1_update<Scalar, int, ColMajor, false, false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index 66a7d46..a6ddf26 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@@ -15,43 +15,39 @@
  const int *ldc) {
   //   std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " <<
   //   *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::RowMajor;
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
                            Scalar *, DenseIndex, DenseIndex, Scalar, Eigen::internal::level3_blocking<Scalar, Scalar> &,
                            Eigen::internal::GemmParallelInfo<DenseIndex> *);
   static const functype func[12] = {
       // array index: NOTR  | (NOTR << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, ColMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, ColMajor, false, ColMajor,
+                                               1>::run),
       // array index: TR    | (NOTR << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, false, ColMajor,
+                                               1>::run),
       // array index: ADJ   | (NOTR << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false, ColMajor,
+                                               1>::run),
       0,
       // array index: NOTR  | (TR   << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, false, ColMajor,
+                                               1>::run),
       // array index: TR    | (TR   << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, false, ColMajor,
+                                               1>::run),
       // array index: ADJ   | (TR   << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, false,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, false, ColMajor,
+                                               1>::run),
       0,
       // array index: NOTR  | (ADJ  << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj, ColMajor,
+                                               1>::run),
       // array index: TR    | (ADJ  << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, Conj,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, false, Scalar, RowMajor, Conj, ColMajor,
+                                               1>::run),
       // array index: ADJ   | (ADJ  << 2)
-      (Eigen::internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, Conj,
-                                                      ColMajor, 1>::run),
+      (internal::general_matrix_matrix_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, RowMajor, Conj, ColMajor,
+                                               1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -90,8 +86,7 @@
 
   if (*k == 0) return;
 
-  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1,
-                                                                                                     true);
+  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1, true);
 
   int code = OP(*opa) | (OP(*opb) << 2);
   func[code](*m, *n, *k, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking, 0);
@@ -102,97 +97,76 @@
  const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " "
   //   << *palpha << " " << *lda << " " << *ldb<< "\n";
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::Lower;
-  using Eigen::OnTheLeft;
-  using Eigen::OnTheRight;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex,
                            Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[32] = {
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, RowMajor, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, Conj, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, Conj, RowMajor, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, ColMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, ColMajor, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, RowMajor, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, Conj, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, Conj, RowMajor, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, ColMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | 0, false, ColMajor, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, RowMajor, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, Conj, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, Conj, RowMajor, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, ColMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | 0, false, ColMajor, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, false, RowMajor, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, Conj, RowMajor, ColMajor,
-                                                1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | 0, Conj, RowMajor, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, ColMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, ColMajor, ColMajor,
+                                         1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, RowMajor, ColMajor,
+                                         1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, Conj, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, Conj, RowMajor, ColMajor,
+                                         1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, ColMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, ColMajor, ColMajor,
+                                         1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, RowMajor, ColMajor,
+                                         1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, Conj, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, Conj, RowMajor, ColMajor,
+                                         1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, ColMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Lower | UnitDiag, false, ColMajor, ColMajor,
+                                         1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, false, RowMajor, ColMajor,
+                                         1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, Conj, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | UnitDiag, Conj, RowMajor, ColMajor,
+                                         1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, ColMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Lower | UnitDiag, false, ColMajor, ColMajor,
+                                         1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, false, RowMajor, ColMajor,
+                                         1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, Conj, RowMajor,
-                                                ColMajor, 1>::run),
+      (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheRight, Upper | UnitDiag, Conj, RowMajor, ColMajor,
+                                         1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -223,12 +197,12 @@
   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
 
   if (SIDE(*side) == LEFT) {
-    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
-                                                                                                          false);
+    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
+                                                                                                   false);
     func[code](*m, *n, a, *lda, b, 1, *ldb, blocking);
   } else {
-    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
-                                                                                                          false);
+    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
+                                                                                                   false);
     func[code](*n, *m, a, *lda, b, 1, *ldb, blocking);
   }
 
@@ -242,96 +216,89 @@
  const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " "
   //   << *lda << " " << *ldb << " " << *palpha << "\n";
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::Lower;
-  using Eigen::RowMajor;
-  using Eigen::UnitDiag;
-  using Eigen::Upper;
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
                            Scalar *, DenseIndex, DenseIndex, const Scalar &,
-                           Eigen::internal::level3_blocking<Scalar, Scalar> &);
+                           internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[32] = {
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, ColMajor, false, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, ColMajor, false, ColMajor, false,
+                                                  ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, false, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, false, ColMajor, false,
+                                                  ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, Conj, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, RowMajor, Conj, ColMajor, false,
+                                                  ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
-                                                         RowMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, RowMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
-                                                         RowMajor, Conj, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, RowMajor, Conj,
+                                                  ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, ColMajor, false, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, true, ColMajor, false, ColMajor, false,
+                                                  ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, false, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, false, ColMajor, false,
+                                                  ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, Conj, ColMajor,
-                                                         false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, true, RowMajor, Conj, ColMajor, false,
+                                                  ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | 0, false, ColMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
-                                                         RowMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, RowMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false,
-                                                         RowMajor, Conj, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | 0, false, ColMajor, false, RowMajor, Conj,
+                                                  ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, ColMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, Conj,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, RowMajor, Conj, ColMajor,
+                                                  false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                  ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                         RowMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                  RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (UP << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                         RowMajor, Conj, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                  RowMajor, Conj, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, true, ColMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: TR    | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, false, ColMajor,
+                                                  false, ColMajor, 1>::run),
       // array index: ADJ   | (LEFT  << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, Conj,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, true, RowMajor, Conj, ColMajor,
+                                                  false, ColMajor, 1>::run),
       0,
       // array index: NOTR  | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
-                                                         ColMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Lower | UnitDiag, false, ColMajor, false,
+                                                  ColMajor, false, ColMajor, 1>::run),
       // array index: TR    | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                         RowMajor, false, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                  RowMajor, false, ColMajor, 1>::run),
       // array index: ADJ   | (RIGHT << 2) | (LO << 3) | (UNIT  << 4)
-      (Eigen::internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
-                                                         RowMajor, Conj, ColMajor, 1>::run),
+      (internal::product_triangular_matrix_matrix<Scalar, DenseIndex, Upper | UnitDiag, false, ColMajor, false,
+                                                  RowMajor, Conj, ColMajor, 1>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -362,16 +329,16 @@
   if (*m == 0 || *n == 0) return;
 
   // FIXME find a way to avoid this copy
-  Eigen::Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb);
+  Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb);
   matrix(b, *m, *n, *ldb).setZero();
 
   if (SIDE(*side) == LEFT) {
-    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
-                                                                                                          false);
+    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *m, 1,
+                                                                                                   false);
     func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, 1, *ldb, alpha, blocking);
   } else {
-    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
-                                                                                                          false);
+    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic, 4> blocking(*m, *n, *n, 1,
+                                                                                                   false);
     func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking);
   }
 }
@@ -416,15 +383,9 @@
   if (*m == 0 || *n == 0) return;
 
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::Lower;
-  using Eigen::RowMajor;
-  using Eigen::Upper;
 #if ISCOMPLEX
   // FIXME add support for symmetric complex matrix
-  Eigen::Matrix<Scalar, Dynamic, Dynamic, ColMajor> matA(size, size);
+  Matrix<Scalar, Dynamic, Dynamic, ColMajor> matA(size, size);
   if (UPLO(*uplo) == UP) {
     matA.triangularView<Upper>() = matrix(a, size, size, *lda);
     matA.triangularView<Lower>() = matrix(a, size, size, *lda).transpose();
@@ -437,29 +398,24 @@
   else if (SIDE(*side) == RIGHT)
     matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
 #else
-  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1,
-                                                                                                     false);
+  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false);
 
   if (SIDE(*side) == LEFT)
     if (UPLO(*uplo) == UP)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, false, ColMajor, false, false,
-                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, false, ColMajor, false, false, ColMajor,
+                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else if (UPLO(*uplo) == LO)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false,
-                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
+                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else
       return;
   else if (SIDE(*side) == RIGHT)
     if (UPLO(*uplo) == UP)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false,
-                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false, ColMajor,
+                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
     else if (UPLO(*uplo) == LO)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false,
-                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
+                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
     else
       return;
   else
@@ -474,35 +430,29 @@
  const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::Lower;
-  using Eigen::RowMajor;
-  using Eigen::Upper;
 #if !ISCOMPLEX
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *,
-                           DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &);
+                           DenseIndex, DenseIndex, const Scalar &, internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[8] = {
       // array index: NOTR  | (UP << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
-                                                                 Conj, ColMajor, 1, Upper>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
+                                                          ColMajor, 1, Upper>::run),
       // array index: TR    | (UP << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor,
-                                                                 Conj, ColMajor, 1, Upper>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, Conj,
+                                                          ColMajor, 1, Upper>::run),
       // array index: ADJ   | (UP << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
-                                                                 false, ColMajor, 1, Upper>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
+                                                          ColMajor, 1, Upper>::run),
       0,
       // array index: NOTR  | (LO << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
-                                                                 Conj, ColMajor, 1, Lower>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
+                                                          ColMajor, 1, Lower>::run),
       // array index: TR    | (LO << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor,
-                                                                 Conj, ColMajor, 1, Lower>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, false, Scalar, ColMajor, Conj,
+                                                          ColMajor, 1, Lower>::run),
       // array index: ADJ   | (LO << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
-                                                                 false, ColMajor, 1, Lower>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
+                                                          ColMajor, 1, Lower>::run),
       0};
 #endif
 
@@ -558,8 +508,7 @@
           alpha * matrix(a, *k, *n, *lda).transpose() * matrix(a, *k, *n, *lda);
   }
 #else
-  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1,
-                                                                                                     false);
+  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1, false);
 
   int code = OP(*op) | (UPLO(*uplo) << 2);
   func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
@@ -597,8 +546,6 @@
     info = 12;
   if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info);
 
-  using Eigen::Lower;
-  using Eigen::Upper;
   if (beta != Scalar(1)) {
     if (UPLO(*uplo) == UP)
       if (beta == Scalar(0))
@@ -674,25 +621,16 @@
 
   if (*m == 0 || *n == 0) return;
 
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::RowMajor;
-  using Eigen::Upper;
-
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
-  Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1,
-                                                                                                     false);
+  internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false);
 
   if (SIDE(*side) == LEFT) {
     if (UPLO(*uplo) == UP)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, Conj, ColMajor, false, false,
-                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor, true, Conj, ColMajor, false, false, ColMajor,
+                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else if (UPLO(*uplo) == LO)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false,
-                                                  ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
+                                           1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else
       return;
   } else if (SIDE(*side) == RIGHT) {
@@ -704,9 +642,8 @@
 RowMajor,true,Conj,  ColMajor, 1>
 ::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);*/
     else if (UPLO(*uplo) == LO)
-      Eigen::internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false,
-                                                  ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha,
-                                                                    blocking);
+      internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
+                                           1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
     else
       return;
   } else {
@@ -721,32 +658,25 @@
  const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
-  using Eigen::ColMajor;
-  using Eigen::DenseIndex;
-  using Eigen::Dynamic;
-  using Eigen::Lower;
-  using Eigen::RowMajor;
-  using Eigen::StrictlyLower;
-  using Eigen::StrictlyUpper;
-  using Eigen::Upper;
+
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *,
                            DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[8] = {
       // array index: NOTR  | (UP << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
-                                                                 Conj, ColMajor, 1, Upper>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
+                                                          ColMajor, 1, Upper>::run),
       0,
       // array index: ADJ   | (UP << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
-                                                                 false, ColMajor, 1, Upper>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
+                                                          ColMajor, 1, Upper>::run),
       0,
       // array index: NOTR  | (LO << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor,
-                                                                 Conj, ColMajor, 1, Lower>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
+                                                          ColMajor, 1, Lower>::run),
       0,
       // array index: ADJ   | (LO << 2)
-      (Eigen::internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor,
-                                                                 false, ColMajor, 1, Lower>::run),
+      (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, RowMajor, Conj, Scalar, ColMajor, false,
+                                                          ColMajor, 1, Lower>::run),
       0};
 
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -792,8 +722,7 @@
   }
 
   if (*k > 0 && alpha != RealScalar(0)) {
-    Eigen::internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1,
-                                                                                                       false);
+    internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*n, *n, *k, 1, false);
     func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
   }
@@ -830,10 +759,6 @@
     info = 12;
   if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info);
 
-  using Eigen::Lower;
-  using Eigen::StrictlyLower;
-  using Eigen::StrictlyUpper;
-  using Eigen::Upper;
   if (beta != RealScalar(1)) {
     if (UPLO(*uplo) == UP)
       if (beta == Scalar(0))
@@ -858,20 +783,20 @@
     if (UPLO(*uplo) == UP) {
       matrix(c, *n, *n, *ldc).triangularView<Upper>() +=
           alpha * matrix(a, *n, *k, *lda) * matrix(b, *n, *k, *ldb).adjoint() +
-          Eigen::numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
+          numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
     } else if (UPLO(*uplo) == LO)
       matrix(c, *n, *n, *ldc).triangularView<Lower>() +=
           alpha * matrix(a, *n, *k, *lda) * matrix(b, *n, *k, *ldb).adjoint() +
-          Eigen::numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
+          numext::conj(alpha) * matrix(b, *n, *k, *ldb) * matrix(a, *n, *k, *lda).adjoint();
   } else if (OP(*op) == ADJ) {
     if (UPLO(*uplo) == UP)
       matrix(c, *n, *n, *ldc).triangularView<Upper>() +=
           alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) +
-          Eigen::numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
+          numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
     else if (UPLO(*uplo) == LO)
       matrix(c, *n, *n, *ldc).triangularView<Lower>() +=
           alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) +
-          Eigen::numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
+          numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
   }
 }
 
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index 22cf346..da89857 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
-  <!-- Fixed x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- 2 x 2 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
-      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 3 x 3 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
-      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 4 x 4 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
-      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>  
-  
-  <!-- Dynamic x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Column Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_cols</Item>
-        <ArrayItems>
-          <Size>m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Row Vector -->
-  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_rows</Item>
-        <ArrayItems>
-          <Size>m_storage.m_rows</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
-      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
-      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
-      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-      </Expand>
-  </Type>
-  
-    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
-      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-        <Item Name="[w]">m_storage.m_data.array[3]</Item>
-      </Expand>
-  </Type>
-
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>

+

+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

+

+  <!-- Fixed x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- 2 x 2 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>

+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 3 x 3 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>

+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 4 x 4 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>

+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>  

+  

+  <!-- Dynamic x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Column Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_cols</Item>

+        <ArrayItems>

+          <Size>m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Row Vector -->

+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_rows</Item>

+        <ArrayItems>

+          <Size>m_storage.m_rows</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>

+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>

+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>

+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+      </Expand>

+  </Type>

+  

+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>

+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+        <Item Name="[w]">m_storage.m_data.array[3]</Item>

+      </Expand>

+  </Type>

+

+</AutoVisualizer>

diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 273c10d..35ef580 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@
-; ***************************************************************
-; * Eigen Visualizer
-; *
-; * Author: Hauke Heibel <hauke.heibel@gmail.com>
-; *
-; * Support the enhanced debugging of the following Eigen
-; * types (*: any, +:fixed dimension) :
-; *
-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
-; * - Eigen::Matrix<*,-1,-1,*,*,*>
-; * - Eigen::Matrix<*,+,-1,*,*,*>
-; * - Eigen::Matrix<*,-1,+,*,*,*>
-; * - Eigen::Matrix<*,+,+,*,*,*>
-; *
-; * Matrices are displayed properly independently of the memory
-; * alignment (RowMajor vs. ColMajor).
-; *
-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
-; * that your original autoexp.dat file is copied to a safe 
-; * place before proceeding with its modification.
-; ***************************************************************
-
-[Visualizer]
-
-; Fixed size 4-vectors
-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2],
-         w : ($c.m_storage.m_data.array)[3]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        4,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 4),
-        ")"
-      )
-   )
-}
-
-; Fixed size 3-vectors
-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        3,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 3),
-        ")"
-      )
-   )
-}
-
-; Fixed size 2-vectors
-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        2,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 2),
-        ")"
-      )
-   )
-}
-
-; Fixed size 1-vectors
-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        1,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 1),
-        ")"
-      )
-   )
-}
-
-; Dynamic matrices (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed size matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data.array)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
+; ***************************************************************

+; * Eigen Visualizer

+; *

+; * Author: Hauke Heibel <hauke.heibel@gmail.com>

+; *

+; * Support the enhanced debugging of the following Eigen

+; * types (*: any, +:fixed dimension) :

+; *

+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>

+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>

+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>

+; * - Eigen::Matrix<*,-1,-1,*,*,*>

+; * - Eigen::Matrix<*,+,-1,*,*,*>

+; * - Eigen::Matrix<*,-1,+,*,*,*>

+; * - Eigen::Matrix<*,+,+,*,*,*>

+; *

+; * Matrices are displayed properly independently of the memory

+; * alignment (RowMajor vs. ColMajor).

+; *

+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure

+; * that your original autoexp.dat file is copied to a safe 

+; * place before proceeding with its modification.

+; ***************************************************************

+

+[Visualizer]

+

+; Fixed size 4-vectors

+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2],

+         w : ($c.m_storage.m_data.array)[3]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        4,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 4),

+        ")"

+      )

+   )

+}

+

+; Fixed size 3-vectors

+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        3,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 3),

+        ")"

+      )

+   )

+}

+

+; Fixed size 2-vectors

+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        2,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 2),

+        ")"

+      )

+   )

+}

+

+; Fixed size 1-vectors

+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        1,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 1),

+        ")"

+      )

+   )

+}

+

+; Dynamic matrices (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed size matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data.array)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}

diff --git a/doc/TutorialSlicingIndexing.dox b/doc/TutorialSlicingIndexing.dox
index 6ebaa2d..7f89554 100644
--- a/doc/TutorialSlicingIndexing.dox
+++ b/doc/TutorialSlicingIndexing.dox
@@ -86,12 +86,12 @@
   <td></td>
 </tr>
 <tr>
-  <td>First \c n odd rows of A</td>
+  <td>First \c n odd rows A</td>
   <td>\code A(seqN(1,n,2), all) \endcode</td>
   <td></td>
 </tr>
 <tr>
-  <td>The second-last column</td>
+  <td>The last past one column</td>
   <td>\code A(all, last-1) \endcode</td>
   <td>\code A.col(A.cols()-2) \endcode</td>
 </tr>
@@ -158,7 +158,7 @@
 \endcode
 
 We can revisit the <i>even columns of A</i> example as follows:
-\code A(all, seq(fix<0>,last,fix<2>))
+\code A(all, seq(0,last,fix<2>))
 \endcode
 
 
diff --git a/failtest/cwiseunaryview_on_const_type_actually_const.cpp b/failtest/cwiseunaryview_on_const_type_actually_const.cpp
index fd3c1d6..7ecf542 100644
--- a/failtest/cwiseunaryview_on_const_type_actually_const.cpp
+++ b/failtest/cwiseunaryview_on_const_type_actually_const.cpp
@@ -10,7 +10,7 @@
 
 void foo() {
   MatrixXf m;
-  CwiseUnaryView<internal::scalar_real_ref_op<float>, CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f;
+  CwiseUnaryView<internal::scalar_real_ref_op<double>, CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f;
 }
 
 int main() {}
diff --git a/lapack/cholesky.inc b/lapack/cholesky.inc
index a93a511..dea5bf6 100644
--- a/lapack/cholesky.inc
+++ b/lapack/cholesky.inc
@@ -28,9 +28,9 @@
   MatrixType A(a, *n, *n, *lda);
   int ret;
   if (UPLO(*uplo) == UP)
-    ret = int(Eigen::internal::llt_inplace<Scalar, Eigen::Upper>::blocked(A));
+    ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A));
   else
-    ret = int(Eigen::internal::llt_inplace<Scalar, Eigen::Lower>::blocked(A));
+    ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A));
 
   if (ret >= 0) *info = ret + 1;
 }
@@ -61,10 +61,10 @@
   MatrixType B(b, *n, *nrhs, *ldb);
 
   if (UPLO(*uplo) == UP) {
-    A.triangularView<Eigen::Upper>().adjoint().solveInPlace(B);
-    A.triangularView<Eigen::Upper>().solveInPlace(B);
+    A.triangularView<Upper>().adjoint().solveInPlace(B);
+    A.triangularView<Upper>().solveInPlace(B);
   } else {
-    A.triangularView<Eigen::Lower>().solveInPlace(B);
-    A.triangularView<Eigen::Lower>().adjoint().solveInPlace(B);
+    A.triangularView<Lower>().solveInPlace(B);
+    A.triangularView<Lower>().adjoint().solveInPlace(B);
   }
 }
diff --git a/lapack/eigenvalues.inc b/lapack/eigenvalues.inc
index 211a7ff..6f168de 100644
--- a/lapack/eigenvalues.inc
+++ b/lapack/eigenvalues.inc
@@ -47,10 +47,9 @@
     mat = matrix(a, *n, *n, *lda);
 
   bool computeVectors = *jobz == 'V' || *jobz == 'v';
-  Eigen::SelfAdjointEigenSolver<PlainMatrixType> eig(
-      mat, computeVectors ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly);
+  SelfAdjointEigenSolver<PlainMatrixType> eig(mat, computeVectors ? ComputeEigenvectors : EigenvaluesOnly);
 
-  if (eig.info() == Eigen::NoConvergence) {
+  if (eig.info() == NoConvergence) {
     make_vector(w, *n).setZero();
     if (computeVectors) matrix(a, *n, *n, *lda).setIdentity();
     //*info = 1;
diff --git a/lapack/lu.inc b/lapack/lu.inc
index 2ddaf95..d30c8ce 100644
--- a/lapack/lu.inc
+++ b/lapack/lu.inc
@@ -62,8 +62,6 @@
   MatrixType lu(a, *n, *n, *lda);
   MatrixType B(b, *n, *nrhs, *ldb);
 
-  using Eigen::UnitLower;
-  using Eigen::Upper;
   for (int i = 0; i < *n; ++i) ipiv[i]--;
   if (OP(*trans) == NOTR) {
     B = PivotsType(ipiv, *n) * B;
diff --git a/lapack/svd.inc b/lapack/svd.inc
index 262c5c6..8e45310 100644
--- a/lapack/svd.inc
+++ b/lapack/svd.inc
@@ -56,12 +56,12 @@
   PlainMatrixType mat(*m, *n);
   mat = matrix(a, *m, *n, *lda);
 
-  int option = *jobz == 'A'   ? Eigen::ComputeFullU | Eigen::ComputeFullV
-               : *jobz == 'S' ? Eigen::ComputeThinU | Eigen::ComputeThinV
-               : *jobz == 'O' ? Eigen::ComputeThinU | Eigen::ComputeThinV
+  int option = *jobz == 'A'   ? ComputeFullU | ComputeFullV
+               : *jobz == 'S' ? ComputeThinU | ComputeThinV
+               : *jobz == 'O' ? ComputeThinU | ComputeThinV
                               : 0;
 
-  Eigen::BDCSVD<PlainMatrixType> svd(mat, option);
+  BDCSVD<PlainMatrixType> svd(mat, option);
 
   make_vector(s, diag_size) = svd.singularValues().head(diag_size);
 
@@ -119,14 +119,14 @@
   PlainMatrixType mat(*m, *n);
   mat = matrix(a, *m, *n, *lda);
 
-  int option = (*jobu == 'A'                   ? Eigen::ComputeFullU
-                : *jobu == 'S' || *jobu == 'O' ? Eigen::ComputeThinU
+  int option = (*jobu == 'A'                   ? ComputeFullU
+                : *jobu == 'S' || *jobu == 'O' ? ComputeThinU
                                                : 0) |
-               (*jobv == 'A'                   ? Eigen::ComputeFullV
-                : *jobv == 'S' || *jobv == 'O' ? Eigen::ComputeThinV
+               (*jobv == 'A'                   ? ComputeFullV
+                : *jobv == 'S' || *jobv == 'O' ? ComputeThinV
                                                : 0);
 
-  Eigen::JacobiSVD<PlainMatrixType> svd(mat, option);
+  JacobiSVD<PlainMatrixType> svd(mat, option);
 
   make_vector(s, diag_size) = svd.singularValues().head(diag_size);
   {
diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h
index 00a20c7..637fdbf 100644
--- a/test/AnnoyingScalar.h
+++ b/test/AnnoyingScalar.h
@@ -184,6 +184,19 @@
   return *x.v;
 }
 
+template <>
+struct random_impl<AnnoyingScalar> {
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run(const AnnoyingScalar& x, const AnnoyingScalar& y) {
+    float result = Impl::run(*x.v, *y.v);
+    return AnnoyingScalar(result);
+  }
+  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run() {
+    float result = Impl::run();
+    return AnnoyingScalar(result);
+  }
+};
+
 }  // namespace internal
 }  // namespace Eigen
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4692584..4c7c3a4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -218,10 +218,9 @@
 ei_add_test(smallvectors)
 ei_add_test(mapped_matrix)
 ei_add_test(mapstride)
-ei_add_test(unaryview)
+ei_add_test(unaryviewstride)
 ei_add_test(mapstaticmethods)
 ei_add_test(array_cwise)
-ei_add_test(matrix_cwise)
 ei_add_test(array_for_matrix)
 ei_add_test(array_replicate)
 ei_add_test(array_reverse)
diff --git a/test/MovableScalar.h b/test/MovableScalar.h
index c8bf546..56a873e 100644
--- a/test/MovableScalar.h
+++ b/test/MovableScalar.h
@@ -26,10 +26,24 @@
   operator Scalar() const { return this->size() > 0 ? this->back() : Scalar(); }
 };
 
-template <typename Scalar>
-struct NumTraits<MovableScalar<Scalar>> : GenericNumTraits<Scalar> {
-  enum { RequireInitialization = 1 };
+template <>
+struct NumTraits<MovableScalar<float>> : GenericNumTraits<float> {};
+
+namespace internal {
+template <typename T>
+struct random_impl<MovableScalar<T>> {
+  using MoveableT = MovableScalar<T>;
+  using Impl = random_impl<T>;
+  static EIGEN_DEVICE_FUNC inline MoveableT run(const MoveableT& x, const MoveableT& y) {
+    T result = Impl::run(x, y);
+    return MoveableT(result);
+  }
+  static EIGEN_DEVICE_FUNC inline MoveableT run() {
+    T result = Impl::run();
+    return MoveableT(result);
+  }
 };
+}  // namespace internal
 
 }  // namespace Eigen
 
diff --git a/test/SafeScalar.h b/test/SafeScalar.h
index 33a54c5..4f4da56 100644
--- a/test/SafeScalar.h
+++ b/test/SafeScalar.h
@@ -4,30 +4,43 @@
 class SafeScalar {
  public:
   SafeScalar() : initialized_(false) {}
+  SafeScalar(const SafeScalar& other) { *this = other; }
+  SafeScalar& operator=(const SafeScalar& other) {
+    val_ = T(other);
+    initialized_ = true;
+    return *this;
+  }
 
-  SafeScalar(const T& val) : val_(val), initialized_(true) {}
-
-  template <typename Source>
-  explicit SafeScalar(const Source& val) : SafeScalar(T(val)) {}
+  SafeScalar(T val) : val_(val), initialized_(true) {}
+  SafeScalar& operator=(T val) {
+    val_ = val;
+    initialized_ = true;
+  }
 
   operator T() const {
     VERIFY(initialized_ && "Uninitialized access.");
     return val_;
   }
 
-  template <typename Target>
-  explicit operator Target() const {
-    return Target(this->operator T());
-  }
-
  private:
   T val_;
   bool initialized_;
 };
 
 namespace Eigen {
+namespace internal {
 template <typename T>
-struct NumTraits<SafeScalar<T>> : GenericNumTraits<T> {
-  enum { RequireInitialization = 1 };
+struct random_impl<SafeScalar<T>> {
+  using SafeT = SafeScalar<T>;
+  using Impl = random_impl<T>;
+  static EIGEN_DEVICE_FUNC inline SafeT run(const SafeT& x, const SafeT& y) {
+    T result = Impl::run(x, y);
+    return SafeT(result);
+  }
+  static EIGEN_DEVICE_FUNC inline SafeT run() {
+    T result = Impl::run();
+    return SafeT(result);
+  }
 };
-}  // namespace Eigen
\ No newline at end of file
+}  // namespace internal
+}  // namespace Eigen
diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp
index da49c08..f335b34 100644
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp
@@ -51,8 +51,6 @@
   kill_extra_precision(p0);
   kill_extra_precision(p1);
 
-  VERIFY(numext::equal_strict(b0.volume(), Scalar(0)));
-
   b0.extend(p0);
   b0.extend(p1);
   VERIFY(b0.contains(p0 * s1 + (Scalar(1) - s1) * p1));
@@ -425,8 +423,6 @@
 
   BoxType b0(dim);
 
-  VERIFY(numext::equal_strict(b0.volume(), Scalar(0)));
-
   b0.extend(p0);
   b0.extend(p1);
 
diff --git a/test/incomplete_cholesky.cpp b/test/incomplete_cholesky.cpp
index fccc207..2b03ba1 100644
--- a/test/incomplete_cholesky.cpp
+++ b/test/incomplete_cholesky.cpp
@@ -54,28 +54,10 @@
   }
 }
 
-void test_non_spd() {
-  Eigen::SparseMatrix<double> A(2, 2);
-  A.insert(0, 0) = 0;
-  A.insert(1, 1) = 3;
-
-  Eigen::IncompleteCholesky<double> solver(A);
-
-  // Recover original matrix.
-  Eigen::MatrixXd M = solver.permutationP().transpose() *
-                      (solver.scalingS().asDiagonal().inverse() *
-                       (solver.matrixL() * solver.matrixL().transpose() -
-                        solver.shift() * Eigen::MatrixXd::Identity(A.rows(), A.cols())) *
-                       solver.scalingS().asDiagonal().inverse()) *
-                      solver.permutationP();
-  VERIFY_IS_APPROX(A.toDense(), M);
-}
-
 EIGEN_DECLARE_TEST(incomplete_cholesky) {
   CALL_SUBTEST_1((test_incomplete_cholesky_T<double, int>()));
   CALL_SUBTEST_2((test_incomplete_cholesky_T<std::complex<double>, int>()));
   CALL_SUBTEST_3((test_incomplete_cholesky_T<double, long int>()));
 
-  CALL_SUBTEST_4((bug1150<0>()));
-  CALL_SUBTEST_4(test_non_spd());
+  CALL_SUBTEST_1((bug1150<0>()));
 }
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index f165e8b..4040448 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp
@@ -498,352 +498,12 @@
     // A(1, seq(0,2,1)).cwiseAbs().colwise().replicate(2).eval();
     STATIC_CHECK(((internal::evaluator<decltype(A(1, seq(0, 2, 1)))>::Flags & RowMajorBit) == RowMajorBit));
   }
-
-  // Direct access.
-  {
-    int rows = 3;
-    int row_start = internal::random<int>(0, rows - 1);
-    int row_inc = internal::random<int>(1, rows - row_start);
-    int row_size = internal::random<int>(1, (rows - row_start) / row_inc);
-    auto row_seq = seqN(row_start, row_size, row_inc);
-
-    int cols = 3;
-    int col_start = internal::random<int>(0, cols - 1);
-    int col_inc = internal::random<int>(1, cols - col_start);
-    int col_size = internal::random<int>(1, (cols - col_start) / col_inc);
-    auto col_seq = seqN(col_start, col_size, col_inc);
-
-    MatrixXd m1 = MatrixXd::Random(rows, cols);
-    MatrixXd m2 = MatrixXd::Random(cols, rows);
-    VERIFY_IS_APPROX(m1(row_seq, indexing::all) * m2, m1(row_seq, indexing::all).eval() * m2);
-    VERIFY_IS_APPROX(m1 * m2(indexing::all, col_seq), m1 * m2(indexing::all, col_seq).eval());
-    VERIFY_IS_APPROX(m1(row_seq, col_seq) * m2(col_seq, row_seq),
-                     m1(row_seq, col_seq).eval() * m2(col_seq, row_seq).eval());
-
-    VectorXd v1 = VectorXd::Random(cols);
-    VERIFY_IS_APPROX(m1(row_seq, col_seq) * v1(col_seq), m1(row_seq, col_seq).eval() * v1(col_seq).eval());
-    VERIFY_IS_APPROX(v1(col_seq).transpose() * m2(col_seq, row_seq),
-                     v1(col_seq).transpose().eval() * m2(col_seq, row_seq).eval());
-  }
-}
-
-void check_tutorial_examples() {
-  constexpr int kRows = 11;
-  constexpr int kCols = 21;
-  Matrix<double, kRows, kCols> A = Matrix<double, kRows, kCols>::Random();
-  Vector<double, kRows> v = Vector<double, kRows>::Random();
-
-  {
-    auto slice = A(seqN(fix<0>, fix<5>, fix<2>), seqN(fix<2>, fix<7>, fix<1>));
-    EIGEN_UNUSED_VARIABLE(slice);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), 5);
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), 7);
-  }
-  {
-    auto slice = A(seqN(fix<0>, fix<5>, fix<2>), indexing::all);
-    EIGEN_UNUSED_VARIABLE(slice);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), 5);
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), kCols);
-  }
-
-  // Examples from slicing tutorial.
-  // Bottom-left corner.
-  {
-    Index i = 3;
-    Index n = 5;
-    auto slice = A(seq(i, indexing::last), seqN(0, n));
-    auto block = A.bottomLeftCorner(A.rows() - i, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), Dynamic);
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), Dynamic);
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto i = fix<3>;
-    auto n = fix<5>;
-    auto slice = A(seq(i, indexing::last), seqN(fix<0>, n));
-    auto block = A.bottomLeftCorner(fix<kRows> - i, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), A.RowsAtCompileTime - i);
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), n);
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Block starting at i,j of size m,n.
-  {
-    Index i = 4;
-    Index j = 2;
-    Index m = 3;
-    Index n = 5;
-    auto slice = A(seqN(i, m), seqN(j, n));
-    auto block = A.block(i, j, m, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto i = fix<4>;
-    auto j = fix<2>;
-    auto m = fix<3>;
-    auto n = fix<5>;
-    auto slice = A(seqN(i, m), seqN(j, n));
-    auto block = A.block(i, j, m, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Block starting at i0,j0 and ending at i1,j1.
-  {
-    Index i0 = 4;
-    Index i1 = 7;
-    Index j0 = 3;
-    Index j1 = 5;
-    auto slice = A(seq(i0, i1), seq(j0, j1));
-    auto block = A.block(i0, j0, i1 - i0 + 1, j1 - j0 + 1);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto i0 = fix<4>;
-    auto i1 = fix<7>;
-    auto j0 = fix<3>;
-    auto j1 = fix<5>;
-    auto slice = A(seq(i0, i1), seq(j0, j1));
-    auto block = A.block(i0, j0, i1 - i0 + fix<1>, j1 - j0 + fix<1>);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Even columns of A.
-  {
-    auto slice = A(all, seq(0, last, 2));
-    auto block =
-        Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<2 * kRows>>(A.data(), kRows, (kCols + 1) / 2);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto slice = A(all, seq(fix<0>, last, fix<2>));
-    auto block = Eigen::Map<Eigen::Matrix<double, kRows, (kCols + 1) / 2>, 0, OuterStride<2 * kRows>>(A.data());
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // First n odd rows of A.
-  {
-    Index n = 3;
-    auto slice = A(seqN(1, n, 2), all);
-    auto block = Eigen::Map<Eigen::Matrix<double, Dynamic, kCols>, 0, Stride<kRows, 2>>(A.data() + 1, n, kCols);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto n = fix<3>;
-    auto slice = A(seqN(fix<1>, n, fix<2>), all);
-    auto block = Eigen::Map<Eigen::Matrix<double, 3, kCols>, 0, Stride<kRows, 2>>(A.data() + 1);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // The second-last column.
-  {
-    auto slice = A(all, last - 1);
-    auto block = A.col(A.cols() - 2);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto slice = A(all, last - fix<1>);
-    auto block = A.col(fix<kCols> - fix<2>);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // The middle row.
-  {
-    auto slice = A(last / 2, all);
-    auto block = A.row((A.rows() - 1) / 2);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto slice = A(last / fix<2>, all);
-    auto block = A.row(fix<(kRows - 1) / 2>);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Last elements of v starting at i.
-  {
-    Index i = 7;
-    auto slice = v(seq(i, last));
-    auto block = v.tail(v.size() - i);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto i = fix<7>;
-    auto slice = v(seq(i, last));
-    auto block = v.tail(fix<kRows> - i);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Last n elements of v.
-  {
-    Index n = 6;
-    auto slice = v(seq(last + 1 - n, last));
-    auto block = v.tail(n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto n = fix<6>;
-    auto slice = v(seq(last + fix<1> - n, last));
-    auto block = v.tail(n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Last n elements of v.
-  {
-    Index n = 6;
-    auto slice = v(lastN(n));
-    auto block = v.tail(n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto n = fix<6>;
-    auto slice = v(lastN(n));
-    auto block = v.tail(n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Bottom-right corner of A of size m times n.
-  {
-    Index m = 3;
-    Index n = 6;
-    auto slice = A(lastN(m), lastN(n));
-    auto block = A.bottomRightCorner(m, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    auto m = fix<3>;
-    auto n = fix<6>;
-    auto slice = A(lastN(m), lastN(n));
-    auto block = A.bottomRightCorner(m, n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Last n columns with a stride of 3.
-  {
-    Index n = 4;
-    constexpr Index stride = 3;
-    auto slice = A(all, lastN(n, stride));
-    auto block = Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<stride * kRows>>(
-        A.data() + (kCols - 1 - (n - 1) * stride) * kRows, A.rows(), n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    constexpr auto n = fix<4>;
-    constexpr auto stride = fix<3>;
-    auto slice = A(all, lastN(n, stride));
-    auto block = Eigen::Map<Eigen::Matrix<double, kRows, n>, 0, OuterStride<stride * kRows>>(
-        A.data() + (kCols - 1 - (n - 1) * stride) * kRows, A.rows(), n);
-    VERIFY_IS_EQUAL(int(slice.RowsAtCompileTime), int(block.RowsAtCompileTime));
-    VERIFY_IS_EQUAL(int(slice.ColsAtCompileTime), int(block.ColsAtCompileTime));
-    VERIFY_IS_EQUAL(slice, block);
-  }
-
-  // Compile time size and increment.
-  {
-    auto slice1 = v(seq(last - fix<7>, last - fix<2>));
-    auto slice2 = v(seqN(last - 7, fix<6>));
-    VERIFY_IS_EQUAL(slice1, slice2);
-    VERIFY_IS_EQUAL(int(slice1.SizeAtCompileTime), 6);
-    VERIFY_IS_EQUAL(int(slice2.SizeAtCompileTime), 6);
-    auto slice3 = A(all, seq(fix<0>, last, fix<2>));
-    VERIFY_IS_EQUAL(int(slice3.RowsAtCompileTime), kRows);
-    VERIFY_IS_EQUAL(int(slice3.ColsAtCompileTime), (kCols + 1) / 2);
-  }
-
-  // Reverse order.
-  {
-    auto slice = A(all, seq(20, 10, fix<-2>));
-    auto block = Eigen::Map<Eigen::Matrix<double, kRows, Dynamic>, 0, OuterStride<-2 * kRows>>(
-        A.data() + 20 * kRows, A.rows(), (20 - 10 + 2) / 2);
-    VERIFY_IS_EQUAL(slice, block);
-  }
-  {
-    Index n = 10;
-    auto slice1 = A(seqN(last, n, fix<-1>), all);
-    auto slice2 = A(lastN(n).reverse(), all);
-    VERIFY_IS_EQUAL(slice1, slice2);
-  }
-
-  // Array of indices.
-  {
-    std::vector<int> ind{4, 2, 5, 5, 3};
-    auto slice1 = A(all, ind);
-    for (int i = 0; i < ind.size(); ++i) {
-      VERIFY_IS_EQUAL(slice1.col(i), A.col(ind[i]));
-    }
-
-    auto slice2 = A(all, {4, 2, 5, 5, 3});
-    VERIFY_IS_EQUAL(slice1, slice2);
-
-    Eigen::ArrayXi indarray(5);
-    indarray << 4, 2, 5, 5, 3;
-    auto slice3 = A(all, indarray);
-    VERIFY_IS_EQUAL(slice1, slice3);
-  }
-
-  // Custom index list.
-  {
-    struct pad {
-      Index size() const { return out_size; }
-      Index operator[](Index i) const { return std::max<Index>(0, i - (out_size - in_size)); }
-      Index in_size, out_size;
-    };
-
-    auto slice = A(pad{3, 5}, pad{3, 5});
-    Eigen::MatrixXd B = slice;
-    VERIFY_IS_EQUAL(B.block(2, 2, 3, 3), A.block(0, 0, 3, 3));
-  }
 }
 
 EIGEN_DECLARE_TEST(indexed_view) {
-  for (int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1(check_indexed_view());
-  }
-  CALL_SUBTEST_1(check_tutorial_examples());
+  //   for(int i = 0; i < g_repeat; i++) {
+  CALL_SUBTEST_1(check_indexed_view());
+  //   }
 
   // static checks of some internals:
   STATIC_CHECK((internal::is_valid_index_type<int>::value));
diff --git a/test/matrix_cwise.cpp b/test/matrix_cwise.cpp
deleted file mode 100644
index 56cd2d6..0000000
--- a/test/matrix_cwise.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include <vector>
-#include "main.h"
-
-template <typename MatrixType, typename NewScalar>
-struct matrix_of {
-  using type = MatrixType;
-};
-
-template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols, typename NewScalar>
-struct matrix_of<Eigen::Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, NewScalar> {
-  using type = Eigen::Matrix<NewScalar, Rows, Cols, Options, MaxRows, MaxCols>;
-};
-
-// Unary function reference.
-template <typename MatrixType, typename Func,
-          typename OutMatrixType = typename matrix_of<
-              MatrixType, typename Eigen::internal::result_of<Func(typename MatrixType::Scalar)>::type>::type>
-OutMatrixType cwise_ref(const MatrixType& m, Func f = Func()) {
-  OutMatrixType out(m.rows(), m.cols());
-  for (Eigen::Index r = 0; r < m.rows(); ++r) {
-    for (Eigen::Index c = 0; c < m.cols(); ++c) {
-      out(r, c) = f(m(r, c));
-    }
-  }
-  return out;
-}
-
-// Binary function reference.
-template <typename MatrixType, typename Func,
-          typename OutMatrixType = typename matrix_of<
-              MatrixType, typename Eigen::internal::result_of<Func(typename MatrixType::Scalar,
-                                                                   typename MatrixType::Scalar)>::type>::type>
-OutMatrixType cwise_ref(const MatrixType& m1, const MatrixType& m2, Func f = Func()) {
-  OutMatrixType out(m1.rows(), m1.cols());
-  for (Eigen::Index r = 0; r < m1.rows(); ++r) {
-    for (Eigen::Index c = 0; c < m1.cols(); ++c) {
-      out(r, c) = f(m1(r, c), m2(r, c));
-    }
-  }
-  return out;
-}
-
-template <typename MatrixType>
-void test_cwise_real(const MatrixType& m) {
-  using Scalar = typename MatrixType::Scalar;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  MatrixType m1 = MatrixType::Random(rows, cols);
-  MatrixType m2, m3, m4;
-
-  // Supported unary ops.
-  VERIFY_IS_CWISE_APPROX(m1.cwiseAbs(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::abs(x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseSign(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::sign(x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseCbrt(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::cbrt(x); }));
-  // For integers, avoid division by zero.
-  m2 = m1;
-  if (Eigen::NumTraits<Scalar>::IsInteger) {
-    m2 = m1.unaryExpr([](const Scalar& x) { return Eigen::numext::equal_strict(x, Scalar(0)) ? Scalar(1) : x; });
-  }
-  VERIFY_IS_CWISE_APPROX(m2.cwiseInverse(), cwise_ref(m2, [](const Scalar& x) { return Scalar(Scalar(1) / x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseArg(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::arg(x); }));
-  // Only take sqrt of positive values.
-  m2 = m1.cwiseAbs();
-  VERIFY_IS_CWISE_APPROX(m2.cwiseSqrt(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::sqrt(x); }));
-  // Only find Square/Abs2 of +/- sqrt values so we don't overflow.
-  m2 = m2.cwiseSqrt().array() * m1.cwiseSign().array();
-  VERIFY_IS_CWISE_APPROX(m2.cwiseAbs2(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::abs2(x); }));
-  VERIFY_IS_CWISE_APPROX(m2.cwiseSquare(), cwise_ref(m2, [](const Scalar& x) { return Scalar(x * x); }));
-  VERIFY_IS_CWISE_APPROX(m2.cwisePow(Scalar(2)),
-                         cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::pow(x, Scalar(2)); }));
-
-  // Supported binary ops.
-  m1.setRandom(rows, cols);
-  m2.setRandom(rows, cols);
-  VERIFY_IS_CWISE_EQUAL(m1.cwiseMin(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateFast>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNaN>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNumbers>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.cwiseMax(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateFast>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNaN>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNumbers>(m2),
-                        cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  // Scalar comparison.
-  Scalar mean = Eigen::NumTraits<Scalar>::highest() / Scalar(2) + Eigen::NumTraits<Scalar>::lowest() / Scalar(2);
-  m4.setConstant(rows, cols, mean);
-  VERIFY_IS_CWISE_EQUAL(m1.cwiseMin(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateFast>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNaN>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMin<PropagateNumbers>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::mini(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.cwiseMax(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateFast>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNaN>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  VERIFY_IS_CWISE_EQUAL(m1.template cwiseMax<PropagateNumbers>(mean),
-                        cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Eigen::numext::maxi(x, y); }));
-  // For products, avoid integer overflow by limiting the input < sqrt(max).
-  m3 = m1;
-  m4 = m2;
-  if (Eigen::NumTraits<Scalar>::IsInteger) {
-    const Scalar kMax = Eigen::numext::sqrt(Eigen::NumTraits<Scalar>::highest());
-    m3 = m1 - ((m1 / kMax) * kMax);
-    m4 = m2 - ((m2 / kMax) * kMax);
-  }
-  VERIFY_IS_CWISE_APPROX(m3.cwiseProduct(m4),
-                         cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return Scalar(x * y); }));
-  // For quotients involving integers, avoid division by zero.
-  m4 = m2;
-  if (Eigen::NumTraits<Scalar>::IsInteger) {
-    m4 = m2.unaryExpr([](const Scalar& x) { return Eigen::numext::equal_strict(x, Scalar(0)) ? Scalar(1) : x; });
-  }
-  VERIFY_IS_CWISE_APPROX(m1.cwiseQuotient(m4),
-                         cwise_ref(m1, m4, [](const Scalar& x, const Scalar& y) { return Scalar(x / y); }));
-  // For equality comparisons, limit range to increase number of equalities.
-  if (Eigen::NumTraits<Scalar>::IsInteger) {
-    const Scalar kMax = Scalar(10);
-    m3 = m1 - ((m1 / kMax) * kMax);
-    m4 = m2 - ((m2 / kMax) * kMax);
-    mean = Eigen::NumTraits<Scalar>::IsSigned ? Scalar(0) : kMax / Scalar(2);
-  } else {
-    const Scalar kShift = Scalar(10);
-    m3 = (m1 * kShift).array().floor() / kShift;
-    m4 = (m2 * kShift).array().floor() / kShift;
-    mean = Scalar(0);
-  }
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseLess(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x < y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreater(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x > y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseLessOrEqual(m4),
-                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x <= y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreaterOrEqual(m4),
-                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x >= y; }));
-  // Typed-Equality.
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLess(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x < y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreater(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x > y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLessOrEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x <= y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreaterOrEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x >= y ? Scalar(1) : Scalar(0);
-                        }));
-  // Scalar.
-  m4.setConstant(rows, cols, mean);
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseLess(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x < y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreater(mean),
-                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x > y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseLessOrEqual(mean),
-                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x <= y; }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseGreaterOrEqual(mean),
-                        cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) { return x >= y; }));
-  // Typed.
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLess(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x < y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreater(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x > y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedLessOrEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x <= y ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedGreaterOrEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return x >= y ? Scalar(1) : Scalar(0);
-                        }));
-}
-
-template <typename MatrixType>
-void test_cwise_complex(const MatrixType& m) {
-  using Scalar = typename MatrixType::Scalar;
-  using RealScalar = typename NumTraits<Scalar>::Real;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  MatrixType m1 = MatrixType::Random(rows, cols);
-  MatrixType m2, m3, m4;
-
-  // Supported unary ops.
-  VERIFY_IS_CWISE_APPROX(m1.cwiseAbs(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::abs(x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseSqrt(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::sqrt(x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseInverse(), cwise_ref(m1, [](const Scalar& x) { return Scalar(Scalar(1) / x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseArg(), cwise_ref(m1, [](const Scalar& x) { return Eigen::numext::arg(x); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseCArg(), cwise_ref(m1, [](const Scalar& x) { return Scalar(Eigen::numext::arg(x)); }));
-  // Only find Square/Abs2 of +/- sqrt values so we don't overflow.
-  m2 = m1.cwiseSqrt().array() * m1.cwiseSign().array();
-  VERIFY_IS_CWISE_APPROX(m2.cwiseAbs2(), cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::abs2(x); }));
-  VERIFY_IS_CWISE_APPROX(m2.cwiseSquare(), cwise_ref(m2, [](const Scalar& x) { return Scalar(x * x); }));
-  VERIFY_IS_CWISE_APPROX(m2.cwisePow(Scalar(2)),
-                         cwise_ref(m2, [](const Scalar& x) { return Eigen::numext::pow(x, Scalar(2)); }));
-
-  // Supported binary ops.
-  m1.setRandom(rows, cols);
-  m2.setRandom(rows, cols);
-  VERIFY_IS_CWISE_APPROX(m1.cwiseProduct(m2),
-                         cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Scalar(x * y); }));
-  VERIFY_IS_CWISE_APPROX(m1.cwiseQuotient(m2),
-                         cwise_ref(m1, m2, [](const Scalar& x, const Scalar& y) { return Scalar(x / y); }));
-  // For equality comparisons, limit range to increase number of equalities.
-  {
-    const RealScalar kShift = RealScalar(10);
-    m3 = m1;
-    m4 = m2;
-    m3.real() = (m1.real() * kShift).array().floor() / kShift;
-    m3.imag() = (m1.imag() * kShift).array().floor() / kShift;
-    m4.real() = (m2.real() * kShift).array().floor() / kShift;
-    m4.imag() = (m2.imag() * kShift).array().floor() / kShift;
-  }
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y);
-                        }));
-  // Typed-Equality.
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(m4), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  // Scalar.
-  Scalar mean = Scalar(0);
-  m4.setConstant(rows, cols, mean);
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y);
-                        }));
-  // Typed.
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-  VERIFY_IS_CWISE_EQUAL(m3.cwiseTypedNotEqual(mean), cwise_ref(m3, m4, [](const Scalar& x, const Scalar& y) {
-                          return !Eigen::numext::equal_strict(x, y) ? Scalar(1) : Scalar(0);
-                        }));
-}
-
-EIGEN_DECLARE_TEST(matrix_cwise) {
-  for (int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<Eigen::half, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_1(test_cwise_real(Eigen::Matrix<Eigen::bfloat16, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_2(test_cwise_complex(Eigen::Matrix<std::complex<float>, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_2(test_cwise_complex(Eigen::Matrix<std::complex<double>, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int8_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int16_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int32_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_3(test_cwise_real(Eigen::Matrix<int64_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint8_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint16_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint32_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-    CALL_SUBTEST_4(test_cwise_real(Eigen::Matrix<uint64_t, Eigen::Dynamic, Eigen::Dynamic>(20, 20)));
-  }
-}
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index db8c9b5..bf2970c 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -277,7 +277,6 @@
 
 template <typename Scalar, typename Packet>
 void packetmath_boolean_mask_ops() {
-  using RealScalar = typename NumTraits<Scalar>::Real;
   const int PacketSize = internal::unpacket_traits<Packet>::size;
   const int size = 2 * PacketSize;
   EIGEN_ALIGN_MAX Scalar data1[size];
@@ -290,7 +289,7 @@
   CHECK_CWISE1(internal::ptrue, internal::ptrue);
   CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
   for (int i = 0; i < PacketSize; ++i) {
-    data1[i] = Scalar(RealScalar(i));
+    data1[i] = Scalar(i);
     data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
   }
 
@@ -1333,138 +1332,6 @@
   VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
 }
 
-template <typename Scalar, typename Packet, bool HasExp = internal::packet_traits<Scalar>::HasExp>
-struct exp_complex_test_impl {
-  typedef typename Scalar::value_type RealScalar;
-
-  static Scalar pexp1(const Scalar& x) {
-    Packet px = internal::pset1<Packet>(x);
-    Packet py = internal::pexp(px);
-    return internal::pfirst(py);
-  }
-
-  static Scalar cis(const RealScalar& x) { return Scalar(numext::cos(x), numext::sin(x)); }
-
-  // Verify equality with signed zero.
-  static bool is_exactly_equal(RealScalar a, RealScalar b) {
-    // NaNs are always unsigned, and always compare not equal directly.
-    if ((numext::isnan)(a)) {
-      return (numext::isnan)(b);
-    }
-
-    RealScalar zero(0);
-#ifdef EIGEN_ARCH_ARM
-    // ARM automatically flushes denormals to zero.
-    // Preserve sign by multiplying by +0.
-    if (numext::abs(a) < (std::numeric_limits<RealScalar>::min)()) {
-      a = a * zero;
-    }
-    if (numext::abs(b) < (std::numeric_limits<RealScalar>::min)()) {
-      b = b * zero;
-    }
-#endif
-
-    // Signed zero.
-    if (a == zero) {
-      // Signs are either 0 or NaN, so verify that their comparisons to zero are equal.
-      return (a == b) && ((numext::signbit(a) == zero) == (numext::signbit(b) == zero));
-    }
-    // Allow _some_ tolerance.
-    return verifyIsApprox(a, b);
-  }
-
-  // Verify equality with signed zero.
-  static bool is_exactly_equal(const Scalar& a, const Scalar& b) {
-    bool result = is_exactly_equal(numext::real_ref(a), numext::real_ref(b)) &&
-                  is_exactly_equal(numext::imag_ref(a), numext::imag_ref(b));
-    if (!result) {
-      std::cout << a << " != " << b << std::endl;
-    }
-    return result;
-  }
-
-  static bool is_sign_exp_unspecified(const Scalar& z) {
-    const RealScalar inf = std::numeric_limits<RealScalar>::infinity();
-    // If z is (-∞,±∞), the result is (±0,±0) (signs are unspecified)
-    if (numext::real_ref(z) == -inf && (numext::isinf)(numext::imag_ref(z))) {
-      return true;
-    }
-    // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
-    if (numext::real_ref(z) == +inf && (numext::isinf)(numext::imag_ref(z))) {
-      return true;
-    }
-    // If z is (-∞,NaN), the result is (±0,±0) (signs are unspecified)
-    if (numext::real_ref(z) == -inf && (numext::isnan)(numext::imag_ref(z))) {
-      return true;
-    }
-    // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
-    if (numext::real_ref(z) == +inf && (numext::isnan)(numext::imag_ref(z))) {
-      return true;
-    }
-    return false;
-  }
-
-  static void run(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
-    const int PacketSize = internal::unpacket_traits<Packet>::size;
-
-    for (int i = 0; i < size; ++i) {
-      data1[i] = Scalar(internal::random<RealScalar>(), internal::random<RealScalar>());
-    }
-    CHECK_CWISE1_N(std::exp, internal::pexp, size);
-
-    // Test all corner cases (and more).
-    const RealScalar edges[] = {RealScalar(0),
-                                RealScalar(1),
-                                RealScalar(2),
-                                RealScalar(EIGEN_PI / 2),
-                                RealScalar(EIGEN_PI),
-                                RealScalar(3 * EIGEN_PI / 2),
-                                RealScalar(2 * EIGEN_PI),
-                                numext::log(NumTraits<RealScalar>::highest()) - 1,
-                                NumTraits<RealScalar>::highest(),
-                                std::numeric_limits<RealScalar>::infinity(),
-                                std::numeric_limits<RealScalar>::quiet_NaN(),
-                                -RealScalar(0),
-                                -RealScalar(1),
-                                -RealScalar(2),
-                                -RealScalar(EIGEN_PI / 2),
-                                -RealScalar(EIGEN_PI),
-                                -RealScalar(3 * EIGEN_PI / 2),
-                                -RealScalar(2 * EIGEN_PI),
-                                -numext::log(NumTraits<RealScalar>::highest()) + 1,
-                                -NumTraits<RealScalar>::highest(),
-                                -std::numeric_limits<RealScalar>::infinity(),
-                                -std::numeric_limits<RealScalar>::quiet_NaN()};
-
-    for (RealScalar x : edges) {
-      for (RealScalar y : edges) {
-        Scalar z = Scalar(x, y);
-        Scalar w = pexp1(z);
-        if (is_sign_exp_unspecified(z)) {
-          Scalar abs_w = Scalar(numext::abs(numext::real_ref(w)), numext::abs(numext::imag_ref(w)));
-          Scalar expected = numext::exp(z);
-          Scalar abs_expected =
-              Scalar(numext::abs(numext::real_ref(expected)), numext::abs(numext::imag_ref(expected)));
-          VERIFY(is_exactly_equal(abs_w, abs_expected));
-        } else {
-          VERIFY(is_exactly_equal(w, numext::exp(z)));
-        }
-      }
-    }
-  }
-};
-
-template <typename Scalar, typename Packet>
-struct exp_complex_test_impl<Scalar, Packet, false> {
-  typedef typename Scalar::value_type RealScalar;
-  static void run(Scalar*, Scalar*, Scalar*, int){};
-};
-
-template <typename Scalar, typename Packet>
-void exp_complex_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
-  exp_complex_test_impl<Scalar, Packet>::run(data1, data2, ref, size);
-}
-
 template <typename Scalar, typename Packet>
 void packetmath_complex() {
   typedef internal::packet_traits<Scalar> PacketTraits;
@@ -1578,9 +1445,8 @@
     data1[1] = Scalar(-inf, nan);
     data1[2] = Scalar(nan, inf);
     data1[3] = Scalar(nan, -inf);
-    CHECK_CWISE1_IM1ULP_N(numext::log, internal::plog, 4);
+    CHECK_CWISE1_IM1ULP_N(std::log, internal::plog, 4);
   }
-  exp_complex_test<Scalar, Packet>(data1, data2, ref, size);
 }
 
 template <typename Scalar, typename Packet>
diff --git a/test/qr.cpp b/test/qr.cpp
index f7f6990..de470ca 100644
--- a/test/qr.cpp
+++ b/test/qr.cpp
@@ -82,7 +82,6 @@
   m1 = m3 * m1 * m3.adjoint();
   qr.compute(m1);
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
-  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
   // This test is tricky if the determinant becomes too small.
   // Since we generate random numbers with magnitude range [0,1], the average determinant is 0.5^size
   RealScalar tol =
@@ -103,7 +102,7 @@
   VERIFY_RAISES_ASSERT(qr.householderQ())
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
-  VERIFY_RAISES_ASSERT(qr.signDeterminant())
+  VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr) {
diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp
index c821304..4f8711f 100644
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp
@@ -21,7 +21,6 @@
   Index rank = internal::random<Index>(1, (std::min)(rows, cols) - 1);
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   MatrixType matrix;
   createRandomPIMatrixOfRank(rank, rows, cols, matrix);
@@ -57,23 +56,6 @@
 
   MatrixType pinv = cod.pseudoInverse();
   VERIFY_IS_APPROX(cod_solution, pinv * rhs);
-
-  // now construct a (square) matrix with prescribed determinant
-  Index size = internal::random<Index>(2, 20);
-  matrix.setZero(size, size);
-  for (int i = 0; i < size; i++) {
-    matrix(i, i) = internal::random<Scalar>();
-  }
-  Scalar det = matrix.diagonal().prod();
-  RealScalar absdet = numext::abs(det);
-  CompleteOrthogonalDecomposition<MatrixType> cod2(matrix);
-  cod2.compute(matrix);
-  q = cod2.householderQ();
-  matrix = q * matrix * q.adjoint();
-  VERIFY_IS_APPROX(det, cod2.determinant());
-  VERIFY_IS_APPROX(absdet, cod2.absDeterminant());
-  VERIFY_IS_APPROX(numext::log(absdet), cod2.logAbsDeterminant());
-  VERIFY_IS_APPROX(numext::sign(det), cod2.signDeterminant());
 }
 
 template <typename MatrixType, int Cols2>
@@ -283,7 +265,6 @@
   VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
-  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
 }
 
 template <typename MatrixType>
@@ -304,7 +285,6 @@
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
-  VERIFY_RAISES_ASSERT(qr.signDeterminant())
 }
 
 template <typename MatrixType>
@@ -325,7 +305,6 @@
   VERIFY_RAISES_ASSERT(cod.determinant())
   VERIFY_RAISES_ASSERT(cod.absDeterminant())
   VERIFY_RAISES_ASSERT(cod.logAbsDeterminant())
-  VERIFY_RAISES_ASSERT(cod.signDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr_colpivoting) {
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 2b6ecc5..71f3a51 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -105,7 +105,6 @@
   VERIFY_IS_APPROX(det, qr.determinant());
   VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
-  VERIFY_IS_APPROX(numext::sign(det), qr.signDeterminant());
 }
 
 template <typename MatrixType>
@@ -126,7 +125,6 @@
   VERIFY_RAISES_ASSERT(qr.determinant())
   VERIFY_RAISES_ASSERT(qr.absDeterminant())
   VERIFY_RAISES_ASSERT(qr.logAbsDeterminant())
-  VERIFY_RAISES_ASSERT(qr.signDeterminant())
 }
 
 EIGEN_DECLARE_TEST(qr_fullpivoting) {
diff --git a/test/rand.cpp b/test/rand.cpp
index 4131f38..b5cf801 100644
--- a/test/rand.cpp
+++ b/test/rand.cpp
@@ -9,10 +9,6 @@
 
 #include <cstdlib>
 #include "main.h"
-#include "SafeScalar.h"
-
-// SafeScalar<T> is used to simulate custom Scalar types, which use a more generalized approach to generate random
-// numbers
 
 // For GCC-6, if this function is inlined then there seems to be an optimization
 // bug that triggers a failure.  This failure goes away if you access `r` in
@@ -29,28 +25,15 @@
 
 template <typename Scalar>
 void check_all_in_range(Scalar x, Scalar y) {
-  constexpr int repeats = 32;
-  uint64_t count = static_cast<uint64_t>(y) - static_cast<uint64_t>(x) + 1;
-  ArrayX<bool> mask(count);
-  // ensure that `count` does not overflow the return type of `mask.size()`
-  VERIFY(count == static_cast<uint64_t>(mask.size()));
-  mask.setConstant(false);
-  for (uint64_t k = 0; k < count; k++)
-    for (int repeat = 0; repeat < repeats; repeat++) {
-      Scalar r = check_in_range(x, y);
-      Index i = static_cast<Index>(r) - static_cast<Index>(x);
-      mask(i) = true;
-    }
+  Array<int, 1, Dynamic> mask(y - x + 1);
+  mask.fill(0);
+  int64_t n = (y - x + 1) * 32;
+  for (int64_t k = 0; k < n; ++k) {
+    mask(check_in_range(x, y) - x)++;
+  }
   for (Index i = 0; i < mask.size(); ++i)
-    if (mask(i) == false) std::cout << "WARNING: value " << x + i << " not reached." << std::endl;
-  VERIFY(mask.cwiseEqual(true).all());
-}
-
-template <typename Scalar>
-void check_all_in_range() {
-  const Scalar x = NumTraits<Scalar>::lowest();
-  const Scalar y = NumTraits<Scalar>::highest();
-  check_all_in_range(x, y);
+    if (mask(i) == 0) std::cout << "WARNING: value " << x + i << " not reached." << std::endl;
+  VERIFY((mask > 0).all());
 }
 
 template <typename Scalar, typename EnableIf = void>
@@ -83,108 +66,72 @@
   double bin_width_;
 };
 
-// helper class to avoid extending std:: namespace
-template <typename T>
-struct get_range_type : internal::make_unsigned<T> {};
-template <typename T>
-struct get_range_type<SafeScalar<T>> : internal::make_unsigned<T> {};
-
 template <typename Scalar>
 class HistogramHelper<Scalar, std::enable_if_t<Eigen::NumTraits<Scalar>::IsInteger>> {
  public:
-  using RangeType = typename get_range_type<Scalar>::type;
+  using RangeType = typename Eigen::internal::make_unsigned<Scalar>::type;
   HistogramHelper(int nbins)
       : HistogramHelper(Eigen::NumTraits<Scalar>::lowest(), Eigen::NumTraits<Scalar>::highest(), nbins) {}
   HistogramHelper(Scalar lower, Scalar upper, int nbins)
       : lower_{lower}, upper_{upper}, num_bins_{nbins}, bin_width_{bin_width(lower, upper, nbins)} {}
 
-  int bin(Scalar v) { return static_cast<int>(RangeType(RangeType(v) - RangeType(lower_)) / bin_width_); }
+  int bin(Scalar v) { return static_cast<int>(RangeType(v - lower_) / bin_width_); }
 
   double uniform_bin_probability(int bin) {
-    // The full range upper - lower + 1 might overflow the RangeType by one.
-    // So instead, we know we have (nbins - 1) bins of width bin_width_,
-    // and the last bin of width:
-    RangeType last_bin_width =
-        RangeType(upper_) - (RangeType(lower_) + RangeType(num_bins_ - 1) * bin_width_) + RangeType(1);
-    double last_bin_ratio = static_cast<double>(last_bin_width) / static_cast<double>(bin_width_);
-    // Total probability = (nbins - 1) * p + last_bin_ratio * p = 1.0
-    // p = 1.0 / (nbins - 1 + last_bin_ratio)
-    double p = 1.0 / (last_bin_ratio + num_bins_ - 1);
+    // Avoid overflow in computing range.
+    double range = static_cast<double>(RangeType(upper_ - lower_)) + 1.0;
     if (bin < num_bins_ - 1) {
-      return p;
+      return static_cast<double>(bin_width_) / range;
     }
-    return last_bin_ratio * p;
+    return static_cast<double>(RangeType(upper_) - RangeType((lower_ + bin * bin_width_)) + 1) / range;
   }
 
  private:
-  static constexpr RangeType bin_width(Scalar lower, Scalar upper, int nbins) {
+  static constexpr Scalar bin_width(Scalar lower, Scalar upper, int nbins) {
     // Avoid overflow in computing the full range.
-    // floor( (upper - lower + 1) / nbins) )
-    //    = floor( (upper- nbins - lower + 1 + nbins) / nbins) )
-    return RangeType(RangeType(upper - nbins) - RangeType(lower) + 1) / nbins + 1;
+    return RangeType(upper - nbins - lower + 1) / nbins + 1;
   }
 
   Scalar lower_;
   Scalar upper_;
   int num_bins_;
-  RangeType bin_width_;
+  Scalar bin_width_;
 };
 
 template <typename Scalar>
 void check_histogram(Scalar x, Scalar y, int bins) {
-  constexpr int repeats = 10000;
-  double count = double(bins) * double(repeats);
   Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
   HistogramHelper<Scalar> hist_helper(x, y, bins);
-  for (int k = 0; k < bins; k++)
-    for (int repeat = 0; repeat < repeats; repeat++) {
-      Scalar r = check_in_range(x, y);
-      int bin = hist_helper.bin(r);
-      hist(bin)++;
-    }
-  //  Normalize bins by probability.
-  hist /= count;
+  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
+  for (int64_t k = 0; k < n; ++k) {
+    Scalar r = check_in_range(x, y);
+    int bin = hist_helper.bin(r);
+    hist(bin)++;
+  }
+  // Normalize bins by probability.
   for (int i = 0; i < bins; ++i) {
-    hist(i) = hist(i) / hist_helper.uniform_bin_probability(i);
+    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
   }
   VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
 }
 
 template <typename Scalar>
 void check_histogram(int bins) {
-  constexpr int repeats = 10000;
-  double count = double(bins) * double(repeats);
   Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
   HistogramHelper<Scalar> hist_helper(bins);
-  for (int k = 0; k < bins; k++)
-    for (int repeat = 0; repeat < repeats; repeat++) {
-      Scalar r = Eigen::internal::random<Scalar>();
-      int bin = hist_helper.bin(r);
-      hist(bin)++;
-    }
-  //  Normalize bins by probability.
-  hist /= count;
+  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
+  for (int64_t k = 0; k < n; ++k) {
+    Scalar r = Eigen::internal::random<Scalar>();
+    int bin = hist_helper.bin(r);
+    hist(bin)++;
+  }
+  // Normalize bins by probability.
   for (int i = 0; i < bins; ++i) {
-    hist(i) = hist(i) / hist_helper.uniform_bin_probability(i);
+    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
   }
   VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
 }
 
-template <>
-void check_histogram<bool>(int) {
-  constexpr int bins = 2;
-  constexpr int repeats = 10000;
-  double count = double(bins) * double(repeats);
-  double true_count = 0.0;
-  for (int k = 0; k < bins; k++)
-    for (int repeat = 0; repeat < repeats; repeat++) {
-      bool r = Eigen::internal::random<bool>();
-      if (r) true_count += 1.0;
-    }
-  double p = true_count / count;
-  VERIFY(numext::abs(p - 0.5) < 0.05);
-}
-
 EIGEN_DECLARE_TEST(rand) {
   int64_t int64_ref = NumTraits<int64_t>::highest() / 10;
   // the minimum guarantees that these conversions are safe
@@ -235,16 +182,14 @@
   CALL_SUBTEST_7(check_all_in_range<int8_t>(-11 - int8t_offset, -11));
   CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, -126 + int8t_offset));
   CALL_SUBTEST_7(check_all_in_range<int8_t>(126 - int8t_offset, 126));
-  CALL_SUBTEST_7(check_all_in_range<int8_t>());
-  CALL_SUBTEST_7(check_all_in_range<uint8_t>());
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, 126));
 
   CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11 + int16t_offset));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-5, 5));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-11 - int16t_offset, -11));
   CALL_SUBTEST_8(check_all_in_range<int16_t>(-24345, -24345 + int16t_offset));
-  CALL_SUBTEST_8(check_all_in_range<int16_t>());
-  CALL_SUBTEST_8(check_all_in_range<uint16_t>());
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(24345, 24345 + int16t_offset));
 
   CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11));
   CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11 + g_repeat));
@@ -269,7 +214,6 @@
   CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10,
                                            -int64_t(RAND_MAX) + 10 + bins * (2 * int64_t(RAND_MAX) / bins) - 1, bins));
 
-  CALL_SUBTEST_12(check_histogram<bool>(/*bins=*/2));
   CALL_SUBTEST_12(check_histogram<uint8_t>(/*bins=*/16));
   CALL_SUBTEST_12(check_histogram<uint16_t>(/*bins=*/1024));
   CALL_SUBTEST_12(check_histogram<uint32_t>(/*bins=*/1024));
@@ -285,16 +229,10 @@
   CALL_SUBTEST_14(check_histogram<long double>(-10.0L, 10.0L, /*bins=*/1024));
   CALL_SUBTEST_14(check_histogram<half>(half(-10.0f), half(10.0f), /*bins=*/512));
   CALL_SUBTEST_14(check_histogram<bfloat16>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64));
-  CALL_SUBTEST_14(check_histogram<SafeScalar<float>>(-10.0f, 10.0f, /*bins=*/1024));
-  CALL_SUBTEST_14(check_histogram<SafeScalar<half>>(half(-10.0f), half(10.0f), /*bins=*/512));
-  CALL_SUBTEST_14(check_histogram<SafeScalar<bfloat16>>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64));
 
   CALL_SUBTEST_15(check_histogram<float>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<double>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<long double>(/*bins=*/1024));
   CALL_SUBTEST_15(check_histogram<half>(/*bins=*/512));
   CALL_SUBTEST_15(check_histogram<bfloat16>(/*bins=*/64));
-  CALL_SUBTEST_15(check_histogram<SafeScalar<float>>(/*bins=*/1024));
-  CALL_SUBTEST_15(check_histogram<SafeScalar<half>>(/*bins=*/512));
-  CALL_SUBTEST_15(check_histogram<SafeScalar<bfloat16>>(/*bins=*/64));
 }
diff --git a/test/schur_real.cpp b/test/schur_real.cpp
index 4a9dd89..cd0be92 100644
--- a/test/schur_real.cpp
+++ b/test/schur_real.cpp
@@ -97,13 +97,6 @@
   }
 }
 
-void test_bug2633() {
-  Eigen::MatrixXd A(4, 4);
-  A << 0, 0, 0, -2, 1, 0, 0, -0, 0, 1, 0, 2, 0, 0, 2, -0;
-  RealSchur<Eigen::MatrixXd> schur(A);
-  VERIFY(schur.info() == Eigen::Success);
-}
-
 EIGEN_DECLARE_TEST(schur_real) {
   CALL_SUBTEST_1((schur<Matrix4f>()));
   CALL_SUBTEST_2((schur<MatrixXd>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE / 4))));
@@ -112,6 +105,4 @@
 
   // Test problem size constructors
   CALL_SUBTEST_5(RealSchur<MatrixXf>(10));
-
-  CALL_SUBTEST_6((test_bug2633()));
 }
diff --git a/test/simplicial_cholesky.cpp b/test/simplicial_cholesky.cpp
index ed93218..ca67496 100644
--- a/test/simplicial_cholesky.cpp
+++ b/test/simplicial_cholesky.cpp
@@ -20,12 +20,6 @@
   SimplicialLDLT<SparseMatrixType, Upper> ldlt_colmajor_upper_amd;
   SimplicialLDLT<SparseMatrixType, Lower, NaturalOrdering<I_> > ldlt_colmajor_lower_nat;
   SimplicialLDLT<SparseMatrixType, Upper, NaturalOrdering<I_> > ldlt_colmajor_upper_nat;
-  SimplicialNonHermitianLLT<SparseMatrixType, Lower> nhllt_colmajor_lower_amd;
-  SimplicialNonHermitianLLT<SparseMatrixType, Upper> nhllt_colmajor_upper_amd;
-  SimplicialNonHermitianLDLT<SparseMatrixType, Lower> nhldlt_colmajor_lower_amd;
-  SimplicialNonHermitianLDLT<SparseMatrixType, Upper> nhldlt_colmajor_upper_amd;
-  SimplicialNonHermitianLDLT<SparseMatrixType, Lower, NaturalOrdering<I_> > nhldlt_colmajor_lower_nat;
-  SimplicialNonHermitianLDLT<SparseMatrixType, Upper, NaturalOrdering<I_> > nhldlt_colmajor_upper_nat;
 
   check_sparse_spd_solving(chol_colmajor_lower_amd);
   check_sparse_spd_solving(chol_colmajor_upper_amd);
@@ -33,10 +27,6 @@
   check_sparse_spd_solving(llt_colmajor_upper_amd);
   check_sparse_spd_solving(ldlt_colmajor_lower_amd);
   check_sparse_spd_solving(ldlt_colmajor_upper_amd);
-  check_sparse_nonhermitian_solving(nhllt_colmajor_lower_amd);
-  check_sparse_nonhermitian_solving(nhllt_colmajor_upper_amd);
-  check_sparse_nonhermitian_solving(nhldlt_colmajor_lower_amd);
-  check_sparse_nonhermitian_solving(nhldlt_colmajor_upper_amd);
 
   check_sparse_spd_determinant(chol_colmajor_lower_amd);
   check_sparse_spd_determinant(chol_colmajor_upper_amd);
@@ -44,15 +34,9 @@
   check_sparse_spd_determinant(llt_colmajor_upper_amd);
   check_sparse_spd_determinant(ldlt_colmajor_lower_amd);
   check_sparse_spd_determinant(ldlt_colmajor_upper_amd);
-  check_sparse_nonhermitian_determinant(nhllt_colmajor_lower_amd);
-  check_sparse_nonhermitian_determinant(nhllt_colmajor_upper_amd);
-  check_sparse_nonhermitian_determinant(nhldlt_colmajor_lower_amd);
-  check_sparse_nonhermitian_determinant(nhldlt_colmajor_upper_amd);
 
   check_sparse_spd_solving(ldlt_colmajor_lower_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
   check_sparse_spd_solving(ldlt_colmajor_upper_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
-  check_sparse_nonhermitian_solving(nhldlt_colmajor_lower_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
-  check_sparse_nonhermitian_solving(nhldlt_colmajor_upper_nat, (std::min)(300, EIGEN_TEST_MAX_SIZE), 1000);
 }
 
 EIGEN_DECLARE_TEST(simplicial_cholesky) {
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index a9c6f4c..364aac0 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -39,7 +39,7 @@
   typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
   typedef Matrix<Scalar, Dynamic, 1> DenseVector;
   typedef Matrix<Scalar, Dynamic, Dynamic, SparseMatrixType::IsRowMajor ? RowMajor : ColMajor> CompatibleDenseMatrix;
-  Scalar eps = Scalar(1e-6);
+  Scalar eps = 1e-6;
 
   Scalar s1 = internal::random<Scalar>();
   {
@@ -948,27 +948,6 @@
     SparseMatrixType m2(rows, 0);
     m2.reserve(ArrayXi::Constant(m2.outerSize(), 1));
   }
-
-  // test move
-  {
-    using TransposedType = SparseMatrix<Scalar, SparseMatrixType::IsRowMajor ? ColMajor : RowMajor,
-                                        typename SparseMatrixType::StorageIndex>;
-    DenseMatrix refMat1 = DenseMatrix::Random(rows, cols);
-    SparseMatrixType m1(rows, cols);
-    initSparse<Scalar>(density, refMat1, m1);
-    // test move ctor
-    SparseMatrixType m2(std::move(m1));
-    VERIFY_IS_APPROX(m2, refMat1);
-    // test move assignment
-    m1 = std::move(m2);
-    VERIFY_IS_APPROX(m1, refMat1);
-    // test move ctor (SparseMatrixBase)
-    TransposedType m3(std::move(m1.transpose()));
-    VERIFY_IS_APPROX(m3, refMat1.transpose());
-    // test move assignment (SparseMatrixBase)
-    m2 = std::move(m3.transpose());
-    VERIFY_IS_APPROX(m2, refMat1);
-  }
 }
 
 template <typename SparseMatrixType>
@@ -1015,7 +994,7 @@
   g_dense_op_sparse_count = 0;  // Suppresses compiler warning.
   for (int i = 0; i < g_repeat; i++) {
     int r = Eigen::internal::random<int>(1, 200), c = Eigen::internal::random<int>(1, 200);
-    if (Eigen::internal::random<int>(0, 3) == 0) {
+    if (Eigen::internal::random<int>(0, 4) == 0) {
       r = c;  // check square matrices in 25% of tries
     }
     EIGEN_UNUSED_VARIABLE(r + c);
@@ -1032,7 +1011,7 @@
 
     r = Eigen::internal::random<int>(1, 100);
     c = Eigen::internal::random<int>(1, 100);
-    if (Eigen::internal::random<int>(0, 3) == 0) {
+    if (Eigen::internal::random<int>(0, 4) == 0) {
       r = c;  // check square matrices in 25% of tries
     }
 
diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index 50cb463..033df83 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h
@@ -484,96 +484,6 @@
   }
 }
 
-template <typename Solver, typename DenseMat>
-int generate_sparse_nonhermitian_problem(Solver&, typename Solver::MatrixType& A, typename Solver::MatrixType& halfA,
-                                         DenseMat& dA, int maxSize = 300) {
-  typedef typename Solver::MatrixType Mat;
-  typedef typename Mat::Scalar Scalar;
-  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
-
-  int size = internal::random<int>(1, maxSize);
-  double density = (std::max)(8. / static_cast<double>(size * size), 0.01);
-
-  Mat M(size, size);
-  DenseMatrix dM(size, size);
-
-  initSparse<Scalar>(density, dM, M, ForceNonZeroDiag);
-
-  A = M * M.transpose();
-  dA = dM * dM.transpose();
-
-  halfA.resize(size, size);
-  if (Solver::UpLo == (Lower | Upper))
-    halfA = A;
-  else
-    halfA = A.template triangularView<Solver::UpLo>();
-
-  return size;
-}
-
-template <typename Solver>
-void check_sparse_nonhermitian_solving(Solver& solver, int maxSize = (std::min)(300, EIGEN_TEST_MAX_SIZE),
-                                       int maxRealWorldSize = 100000) {
-  typedef typename Solver::MatrixType Mat;
-  typedef typename Mat::Scalar Scalar;
-  typedef typename Mat::StorageIndex StorageIndex;
-  typedef SparseMatrix<Scalar, ColMajor, StorageIndex> SpMat;
-  typedef SparseVector<Scalar, 0, StorageIndex> SpVec;
-  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
-  typedef Matrix<Scalar, Dynamic, 1> DenseVector;
-
-  // generate the problem
-  Mat A, halfA;
-  DenseMatrix dA;
-  for (int i = 0; i < g_repeat; i++) {
-    int size = generate_sparse_nonhermitian_problem(solver, A, halfA, dA, maxSize);
-
-    // generate the right hand sides
-    int rhsCols = internal::random<int>(1, 16);
-    double density = (std::max)(8. / static_cast<double>(size * rhsCols), 0.1);
-    SpMat B(size, rhsCols);
-    DenseVector b = DenseVector::Random(size);
-    DenseMatrix dB(size, rhsCols);
-    initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
-    SpVec c = B.col(0);
-    DenseVector dc = dB.col(0);
-
-    CALL_SUBTEST(check_sparse_solving(solver, A, b, dA, b));
-    CALL_SUBTEST(check_sparse_solving(solver, halfA, b, dA, b));
-    CALL_SUBTEST(check_sparse_solving(solver, A, dB, dA, dB));
-    CALL_SUBTEST(check_sparse_solving(solver, halfA, dB, dA, dB));
-    CALL_SUBTEST(check_sparse_solving(solver, A, B, dA, dB));
-    CALL_SUBTEST(check_sparse_solving(solver, halfA, B, dA, dB));
-    CALL_SUBTEST(check_sparse_solving(solver, A, c, dA, dc));
-    CALL_SUBTEST(check_sparse_solving(solver, halfA, c, dA, dc));
-
-    // check only once
-    if (i == 0) {
-      b = DenseVector::Zero(size);
-      check_sparse_solving(solver, A, b, dA, b);
-    }
-  }
-
-  EIGEN_UNUSED_VARIABLE(maxRealWorldSize);
-}
-
-template <typename Solver>
-void check_sparse_nonhermitian_determinant(Solver& solver) {
-  typedef typename Solver::MatrixType Mat;
-  typedef typename Mat::Scalar Scalar;
-  typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
-
-  // generate the problem
-  Mat A, halfA;
-  DenseMatrix dA;
-  generate_sparse_nonhermitian_problem(solver, A, halfA, dA, 30);
-
-  for (int i = 0; i < g_repeat; i++) {
-    check_sparse_determinant(solver, A, dA);
-    check_sparse_determinant(solver, halfA, dA);
-  }
-}
-
 template <typename Solver>
 void check_sparse_zero_matrix(Solver& solver) {
   typedef typename Solver::MatrixType Mat;
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 8d47fb0..83ad324 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -108,33 +108,6 @@
   VERIFY_IS_APPROX(refV3 = v1.transpose(), v1.toDense());
   VERIFY_IS_APPROX(DenseVector(v1), v1.toDense());
 
-  // test move
-  {
-    SparseVectorType v3(std::move(v1));
-    VERIFY_IS_APPROX(v3, refV1);
-    v1 = v3;
-  }
-
-  {
-    SparseVectorType v3;
-    v3 = std::move(v1);
-    VERIFY_IS_APPROX(v3, refV1);
-    v1 = v3;
-  }
-
-  {
-    SparseVectorType v3(std::move(mv1));
-    VERIFY_IS_APPROX(v3, refV1);
-    mv1 = v3;
-  }
-
-  {
-    SparseVectorType v3;
-    v3 = std::move(mv1);
-    VERIFY_IS_APPROX(v3, refV1);
-    mv1 = v3;
-  }
-
   // test conservative resize
   {
     std::vector<StorageIndex> inc;
diff --git a/test/unaryview.cpp b/test/unaryview.cpp
deleted file mode 100644
index 58e95d6..0000000
--- a/test/unaryview.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2021 Andrew Johnson <andrew.johnson@arjohnsonau.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "main.h"
-
-template <int OuterStride, int InnerStride, typename VectorType>
-void unaryview_stride(const VectorType& m) {
-  typedef typename VectorType::Scalar Scalar;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  VectorType vec = VectorType::Random(rows, cols);
-
-  struct view_op {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const Scalar& v) const { return v; }
-  };
-
-  CwiseUnaryView<view_op, VectorType, Stride<OuterStride, InnerStride>> vec_view(vec);
-  VERIFY(vec_view.outerStride() == (OuterStride == 0 ? 0 : OuterStride));
-  VERIFY(vec_view.innerStride() == (InnerStride == 0 ? 1 : InnerStride));
-}
-
-void test_mutable_unaryview() {
-  struct Vec3 {
-    double x;
-    double y;
-    double z;
-  };
-
-  Eigen::Vector<Vec3, 3> m;
-  auto x_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.x; });
-  auto y_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.y; });
-  auto z_view = m.unaryViewExpr([](Vec3& v) -> double& { return v.z; });
-
-  x_view.setConstant(1);
-  y_view.setConstant(2);
-  z_view.setConstant(3);
-
-  for (int i = 0; i < m.size(); ++i) {
-    VERIFY_IS_EQUAL(m(i).x, 1);
-    VERIFY_IS_EQUAL(m(i).y, 2);
-    VERIFY_IS_EQUAL(m(i).z, 3);
-  }
-}
-
-void test_unaryview_solve() {
-  // Random upper-triangular system.
-  Eigen::MatrixXd A = Eigen::MatrixXd::Random(5, 5);
-  A.triangularView<Eigen::Lower>().setZero();
-  A.diagonal().setRandom();
-  Eigen::VectorXd b = Eigen::VectorXd::Random(5);
-
-  struct trivial_view_op {
-    double& operator()(double& x) const { return x; }
-    const double& operator()(const double& x) const { return x; }
-  };
-
-  // Non-const view:
-  {
-    auto b_view = b.unaryViewExpr(trivial_view_op());
-    b_view(0) = 1;  // Allows modification.
-    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
-    VERIFY_IS_APPROX(A * x, b);
-  }
-
-  // Const view:
-  {
-    const auto b_view = b.unaryViewExpr(trivial_view_op());
-    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
-    VERIFY_IS_APPROX(A * x, b);
-  }
-
-  // Non-const view of const matrix:
-  {
-    const Eigen::VectorXd const_b = b;
-    auto b_view = const_b.unaryViewExpr(trivial_view_op());
-    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
-    VERIFY_IS_APPROX(A * x, b);
-  }
-
-  // Const view of const matrix:
-  {
-    const Eigen::VectorXd const_b = b;
-    const auto b_view = const_b.unaryViewExpr(trivial_view_op());
-    Eigen::VectorXd x = A.triangularView<Eigen::Upper>().solve(b_view);
-    VERIFY_IS_APPROX(A * x, b);
-  }
-
-  // Eigen::MatrixXd out =
-  //       mat_in.real()
-  //             .triangularView<Eigen::Upper>()
-  //             .solve(mat_in.unaryViewExpr([&](const auto& x){ return std::real(x); }));
-}
-
-EIGEN_DECLARE_TEST(unaryviewstride) {
-  CALL_SUBTEST_1((unaryview_stride<1, 2>(MatrixXf())));
-  CALL_SUBTEST_1((unaryview_stride<0, 0>(MatrixXf())));
-  CALL_SUBTEST_2((unaryview_stride<1, 2>(VectorXf())));
-  CALL_SUBTEST_2((unaryview_stride<0, 0>(VectorXf())));
-  CALL_SUBTEST_3((unaryview_stride<1, 2>(RowVectorXf())));
-  CALL_SUBTEST_3((unaryview_stride<0, 0>(RowVectorXf())));
-  CALL_SUBTEST_4(test_mutable_unaryview());
-  CALL_SUBTEST_4(test_unaryview_solve());
-}
diff --git a/test/unaryviewstride.cpp b/test/unaryviewstride.cpp
new file mode 100644
index 0000000..490a5b7
--- /dev/null
+++ b/test/unaryviewstride.cpp
@@ -0,0 +1,35 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Andrew Johnson <andrew.johnson@arjohnsonau.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template <int OuterStride, int InnerStride, typename VectorType>
+void unaryview_stride(const VectorType& m) {
+  typedef typename VectorType::Scalar Scalar;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  VectorType vec = VectorType::Random(rows, cols);
+
+  struct view_op {
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const Scalar& v) const { return v; }
+  };
+
+  CwiseUnaryView<view_op, VectorType, Stride<OuterStride, InnerStride>> vec_view(vec);
+  VERIFY(vec_view.outerStride() == (OuterStride == 0 ? 0 : OuterStride));
+  VERIFY(vec_view.innerStride() == (InnerStride == 0 ? 1 : InnerStride));
+}
+
+EIGEN_DECLARE_TEST(unaryviewstride) {
+  CALL_SUBTEST_1((unaryview_stride<1, 2>(MatrixXf())));
+  CALL_SUBTEST_1((unaryview_stride<0, 0>(MatrixXf())));
+  CALL_SUBTEST_2((unaryview_stride<1, 2>(VectorXf())));
+  CALL_SUBTEST_2((unaryview_stride<0, 0>(VectorXf())));
+  CALL_SUBTEST_3((unaryview_stride<1, 2>(RowVectorXf())));
+  CALL_SUBTEST_3((unaryview_stride<0, 0>(RowVectorXf())));
+}
diff --git a/unsupported/Eigen/AutoDiff b/unsupported/Eigen/AutoDiff
index 0480c69..45078bc 100644
--- a/unsupported/Eigen/AutoDiff
+++ b/unsupported/Eigen/AutoDiff
@@ -33,7 +33,6 @@
 #include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
 
 // IWYU pragma: begin_exports
-#include "src/AutoDiff/CoherentPadOp.h"
 #include "src/AutoDiff/AutoDiffScalar.h"
 // #include "src/AutoDiff/AutoDiffVector.h"
 #include "src/AutoDiff/AutoDiffJacobian.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index 9417469..3b9eff7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -303,16 +303,12 @@
 
   /** Normal Dimension */
   EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions) {
-#ifndef EIGEN_NO_DEBUG
+    int i;
     Index size = Index(1);
-    for (int i = 0; i < NumIndices; i++) {
+    for (i = 0; i < NumIndices; i++) {
       internal::check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic>::run(size, dimensions[i]);
       size *= dimensions[i];
     }
-#else
-    Index size = internal::array_prod(dimensions);
-#endif
-
 #ifdef EIGEN_INITIALIZE_COEFFS
     bool size_changed = size != this->size();
     m_storage.resize(size, dimensions);
@@ -322,6 +318,15 @@
 #endif
   }
 
+  // Why this overload, DSizes is derived from array ??? //
+  EIGEN_DEVICE_FUNC void resize(const DSizes<Index, NumIndices>& dimensions) {
+    array<Index, NumIndices> dims;
+    for (int i = 0; i < NumIndices; ++i) {
+      dims[i] = dimensions[i];
+    }
+    resize(dims);
+  }
+
   EIGEN_DEVICE_FUNC void resize() {
     EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
     // Nothing to do: rank 0 tensors have fixed size
@@ -342,6 +347,7 @@
     resize(internal::customIndices2Array<Index, NumIndices>(dimensions));
   }
 
+#ifndef EIGEN_EMULATE_CXX11_META_H
   template <typename std::ptrdiff_t... Indices>
   EIGEN_DEVICE_FUNC void resize(const Sizes<Indices...>& dimensions) {
     array<Index, NumIndices> dims;
@@ -350,6 +356,16 @@
     }
     resize(dims);
   }
+#else
+  template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
+  EIGEN_DEVICE_FUNC void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
+    array<Index, NumIndices> dims;
+    for (int i = 0; i < NumIndices; ++i) {
+      dims[i] = static_cast<Index>(dimensions[i]);
+    }
+    resize(dims);
+  }
+#endif
 
 #ifdef EIGEN_TENSOR_PLUGIN
 #include EIGEN_TENSOR_PLUGIN
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index f88793e..f9f07d4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -999,9 +999,8 @@
     }
 
     // Returns a formatted tensor ready for printing to a stream
-    template<typename Format>
-    inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions, Format> format(const Format& fmt) const {
-      return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions, Format>(derived(), fmt);
+    inline const TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions> format(const TensorIOFormat& fmt) const {
+      return TensorWithFormat<Derived,DerivedTraits::Layout,DerivedTraits::NumDimensions>(derived(), fmt);
     }
 
     #ifdef EIGEN_READONLY_TENSORBASE_PLUGIN
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 9ef4bbc..6c91d93 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -55,10 +55,12 @@
 struct is_input_scalar<Sizes<>> {
   static const bool value = true;
 };
+#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct is_input_scalar<Sizes<Indices...>> {
-  static constexpr bool value = (Sizes<Indices...>::total_size == 1);
+  static const bool value = (Sizes<Indices...>::total_size == 1);
 };
+#endif
 
 }  // end namespace internal
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index 780e896..ef553e0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -1057,7 +1057,7 @@
     __syncthreads();
   }  // end loop over k
 
-#undef add_vals
+  #undef add_vals
 
   __syncthreads();
   Index horiz_base = (threadIdx.y / 4) * 8 + base_n;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 26984b6..0493fe9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -898,8 +898,8 @@
         // num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: "
         // << shared_mem << " in stream " << m_device.stream() << endl;
 
-        const array<Index, 1> indices{m_indices[0]};
-        const array<Index, 1> kernel_dims{m_kernelImpl.dimensions()[0]};
+        const array<Index, 1> indices(m_indices[0]);
+        const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
         internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
         switch (kernel_size) {
           case 4: {
@@ -965,8 +965,8 @@
         // " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << "
         // shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
 
-        const array<Index, 2> indices{m_indices[idxX], m_indices[idxY]};
-        const array<Index, 2> kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]};
+        const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
+        const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]);
         internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
         switch (kernel_size_x) {
           case 4: {
@@ -1059,9 +1059,9 @@
         // block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y <<
         // " num_blocks.z: " << num_blocks.z  << " shared_mem: " << shared_mem << " in stream " << m_device.stream() <<
         // endl;
-        const array<Index, 3> indices{m_indices[idxX], m_indices[idxY], m_indices[idxZ]};
-        const array<Index, 3> kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY],
-                                          m_kernelImpl.dimensions()[idxZ]};
+        const array<Index, 3> indices(m_indices[idxX], m_indices[idxY], m_indices[idxZ]);
+        const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY],
+                                          m_kernelImpl.dimensions()[idxZ]);
         internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
 
         LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>),
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index ae8f25f..4043e5e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -81,6 +81,7 @@
 }  // end namespace internal
 
 // Fixed size
+#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct Sizes {
   typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
@@ -132,6 +133,87 @@
 }
 }  // namespace internal
 
+#else
+
+template <std::ptrdiff_t n>
+struct non_zero_size {
+  typedef internal::type2val<std::ptrdiff_t, n> type;
+};
+template <>
+struct non_zero_size<0> {
+  typedef internal::null_type type;
+};
+
+template <std::ptrdiff_t V1 = 0, std::ptrdiff_t V2 = 0, std::ptrdiff_t V3 = 0, std::ptrdiff_t V4 = 0,
+          std::ptrdiff_t V5 = 0>
+struct Sizes {
+  typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type,
+                                            typename non_zero_size<V3>::type, typename non_zero_size<V4>::type,
+                                            typename non_zero_size<V5>::type>::type Base;
+  static const std::ptrdiff_t count = Base::count;
+  static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const { return count; }
+
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() { return internal::arg_prod<Base>::value; }
+
+  Sizes() {}
+  template <typename DenseIndex>
+  explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
+    // todo: add assertion
+  }
+
+  template <typename T>
+  Sizes& operator=(const T& /*other*/) {
+    // add assertion failure if the size of other is different
+    return *this;
+  }
+
+  template <typename... DenseIndex>
+  Sizes(DenseIndex... /*indices*/) {}
+  explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
+    // todo: add assertion
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[](const Index index) const {
+    switch (index) {
+      case 0:
+        return internal::get<0, Base>::value;
+      case 1:
+        return internal::get<1, Base>::value;
+      case 2:
+        return internal::get<2, Base>::value;
+      case 3:
+        return internal::get<3, Base>::value;
+      case 4:
+        return internal::get<4, Base>::value;
+      default:
+        eigen_assert(false && "index overflow");
+        return static_cast<Index>(-1);
+    }
+  }
+
+  template <typename DenseIndex>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
+    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(
+        indices, *reinterpret_cast<const Base*>(this));
+  }
+  template <typename DenseIndex>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
+    return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(
+        indices, *reinterpret_cast<const Base*>(this));
+  }
+};
+
+namespace internal {
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
+  return Sizes<V1, V2, V3, V4, V5>::total_size;
+}
+}  // namespace internal
+
+#endif
+
 // Boilerplate
 namespace internal {
 template <typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
@@ -207,12 +289,21 @@
     }
   }
 
+#ifndef EIGEN_EMULATE_CXX11_META_H
   template <typename std::ptrdiff_t... Indices>
   EIGEN_DEVICE_FUNC DSizes(const Sizes<Indices...>& a) {
     for (int i = 0; i < NumDims; ++i) {
       (*this)[i] = a[i];
     }
   }
+#else
+  template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
+  EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
+    for (int i = 0; i < NumDims; ++i) {
+      (*this)[i] = a[i];
+    }
+  }
+#endif
 
   template <typename... IndexTypes>
       EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension,
@@ -283,6 +374,7 @@
 struct array_size<DSizes<DenseIndex, NumDims> > {
   static const ptrdiff_t value = NumDims;
 };
+#ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices>
 struct array_size<const Sizes<Indices...> > {
   static const std::ptrdiff_t value = Sizes<Indices...>::count;
@@ -300,6 +392,22 @@
   eigen_assert(false && "should never be called");
   return -1;
 }
+#else
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
+struct array_size<const Sizes<V1, V2, V3, V4, V5> > {