Update Eigen to commit:66f7f51b7e069d0a03a21157fa60b24aece69aeb

CHANGELOG
=========
66f7f51b7 - Disable fno-check-new on clang.
151f6127d - Fix Warray-bounds warning for fixed-size assignments
1d8b82b07 - Fix power builds for no VSX and no POWER8.
eb3f9f443 - refactor AssignmentEvaluator
9c211430b - Fix TensorRef details
22cd7307d - Remove assumption of std::complex for complex scalar types.
6b4881ad4 - Eliminate type-punning UB in Eigen::half.
420d891de - Add missing mathjax/latex configuration.
becefd59e - Returns condition number of zero if matrix is not invertible.
809d266b4 - Fix numerical issues with BiCGSTAB.
ef475f277 - Add missing graphviz to doc build.
a0591cbc9 - Fix doxygen-generated pages
715deac18 - Add EIGEN_CI_CTEST_ARGS to allow for custom timeout.

PiperOrigin-RevId: 729330781
Change-Id: I16aadc1cb1ddf4816604ce120e933f9660da17f8
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 8954841..5a2a3ac 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -27,125 +27,115 @@
 
 // copy_using_evaluator_traits is based on assign_traits
 
-template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
 struct copy_using_evaluator_traits {
-  typedef typename DstEvaluator::XprType Dst;
-  typedef typename Dst::Scalar DstScalar;
+  using Src = typename SrcEvaluator::XprType;
+  using Dst = typename DstEvaluator::XprType;
+  using DstScalar = typename Dst::Scalar;
 
-  enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags };
+  static constexpr int DstFlags = DstEvaluator::Flags;
+  static constexpr int SrcFlags = SrcEvaluator::Flags;
 
  public:
-  enum {
-    DstAlignment = DstEvaluator::Alignment,
-    SrcAlignment = SrcEvaluator::Alignment,
-    DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
-    JointAlignment = plain_enum_min(DstAlignment, SrcAlignment)
-  };
-
- private:
-  enum {
-    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
-                : int(DstFlags) & RowMajorBit   ? int(Dst::ColsAtCompileTime)
-                                                : int(Dst::RowsAtCompileTime),
-    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
-                   : int(DstFlags) & RowMajorBit   ? int(Dst::MaxColsAtCompileTime)
-                                                   : int(Dst::MaxRowsAtCompileTime),
-    RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize),
-    RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize),
-    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
-    MaxSizeAtCompileTime = Dst::SizeAtCompileTime
-  };
+  static constexpr int DstAlignment = DstEvaluator::Alignment;
+  static constexpr int SrcAlignment = SrcEvaluator::Alignment;
+  static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
+  static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
+  static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
+  static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
+  static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
+  static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
+  static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
+  static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
+  static constexpr int MaxRowsAtCompileTime =
+      min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
+  static constexpr int MaxColsAtCompileTime =
+      min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
+  static constexpr int MaxSizeAtCompileTime =
+      min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
+  static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
+                                                : DstIsRowMajor       ? ColsAtCompileTime
+                                                                      : RowsAtCompileTime;
+  static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
+                                                   : DstIsRowMajor       ? MaxColsAtCompileTime
+                                                                         : MaxRowsAtCompileTime;
+  static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
+  static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
+  static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
 
   // TODO distinguish between linear traversal and inner-traversals
-  typedef typename find_best_packet<DstScalar, RestrictedLinearSize>::type LinearPacketType;
-  typedef typename find_best_packet<DstScalar, RestrictedInnerSize>::type InnerPacketType;
+  using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
+  using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
 
-  enum {
-    LinearPacketSize = unpacket_traits<LinearPacketType>::size,
-    InnerPacketSize = unpacket_traits<InnerPacketType>::size
-  };
+  static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
+  static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
 
  public:
-  enum {
-    LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
-    InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
-  };
+  static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
+  static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
 
  private:
-  enum {
-    DstIsRowMajor = DstFlags & RowMajorBit,
-    SrcIsRowMajor = SrcFlags & RowMajorBit,
-    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
-    MightVectorize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) &&
-                     bool(functor_traits<AssignFunc>::PacketAccess),
-    MayInnerVectorize = MightVectorize && int(InnerSize) != Dynamic && int(InnerSize) % int(InnerPacketSize) == 0 &&
-                        int(OuterStride) != Dynamic && int(OuterStride) % int(InnerPacketSize) == 0 &&
-                        (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment) >= int(InnerRequiredAlignment)),
-    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
-    MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) &&
-                         (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)) ||
-                          MaxSizeAtCompileTime == Dynamic),
-    /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-       so it's only good for large enough sizes. */
-    MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) &&
-                        (int(InnerMaxSize) == Dynamic ||
-                         int(InnerMaxSize) >= (EIGEN_UNALIGNED_VECTORIZE ? InnerPacketSize : (3 * InnerPacketSize)))
-    /* slice vectorization can be slow, so we only want it if the slices are big, which is
-       indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
-       in a fixed-size matrix
-       However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
-  };
+  static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
+  static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
+                                         bool(functor_traits<AssignFunc>::PacketAccess);
+  static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
+                                            (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
+                                            (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
+                                            (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
+  static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
+  static constexpr bool MayLinearVectorize =
+      MightVectorize && MayLinearize && DstHasDirectAccess &&
+      (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
+      (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
+  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+     so it's only good for large enough sizes. */
+  static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
+  static constexpr bool MaySliceVectorize =
+      MightVectorize && DstHasDirectAccess &&
+      (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
+  /* slice vectorization can be slow, so we only want it if the slices are big, which is
+     indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+     in a fixed-size matrix
+     However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
 
  public:
-  enum {
-    Traversal = int(Dst::SizeAtCompileTime) == 0
-                    ? int(AllAtOnceTraversal)  // If compile-size is zero, traversing will fail at compile-time.
-                : (int(MayLinearVectorize) && (LinearPacketSize > InnerPacketSize)) ? int(LinearVectorizedTraversal)
-                : int(MayInnerVectorize)                                            ? int(InnerVectorizedTraversal)
-                : int(MayLinearVectorize)                                           ? int(LinearVectorizedTraversal)
-                : int(MaySliceVectorize)                                            ? int(SliceVectorizedTraversal)
-                : int(MayLinearize)                                                 ? int(LinearTraversal)
-                                                                                    : int(DefaultTraversal),
-    Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal ||
-                 int(Traversal) == SliceVectorizedTraversal
-  };
+  static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
+                                   : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
+                                       ? LinearVectorizedTraversal
+                                   : MayInnerVectorize  ? InnerVectorizedTraversal
+                                   : MayLinearVectorize ? LinearVectorizedTraversal
+                                   : MaySliceVectorize  ? SliceVectorizedTraversal
+                                   : MayLinearize       ? LinearTraversal
+                                                        : DefaultTraversal;
+  static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
+                                     Traversal == SliceVectorizedTraversal;
 
-  typedef std::conditional_t<int(Traversal) == LinearVectorizedTraversal, LinearPacketType, InnerPacketType> PacketType;
+  using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
 
  private:
-  enum {
-    ActualPacketSize = int(Traversal) == LinearVectorizedTraversal ? LinearPacketSize
-                       : Vectorized                                ? InnerPacketSize
-                                                                   : 1,
-    UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
-    MayUnrollCompletely =
-        int(Dst::SizeAtCompileTime) != Dynamic &&
-        int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <=
-            int(UnrollingLimit),
-    MayUnrollInner =
-        int(InnerSize) != Dynamic &&
-        int(InnerSize) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
-  };
+  static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
+  static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
+  static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
+  static constexpr bool MayUnrollCompletely =
+      (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
+  static constexpr bool MayUnrollInner =
+      (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
 
  public:
-  enum {
-    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
-                    ? (int(MayUnrollCompletely) ? int(CompleteUnrolling)
-                       : int(MayUnrollInner)    ? int(InnerUnrolling)
-                                                : int(NoUnrolling))
-                : int(Traversal) == int(LinearVectorizedTraversal)
-                    ? (bool(MayUnrollCompletely) &&
-                               (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)))
-                           ? int(CompleteUnrolling)
-                           : int(NoUnrolling))
-                : int(Traversal) == int(LinearTraversal)
-                    ? (bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling))
+  static constexpr int Unrolling =
+      (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
+          ? (MayUnrollCompletely ? CompleteUnrolling
+             : MayUnrollInner    ? InnerUnrolling
+                                 : NoUnrolling)
+      : Traversal == LinearVectorizedTraversal
+          ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
+                 ? CompleteUnrolling
+                 : NoUnrolling)
+      : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
 #if EIGEN_UNALIGNED_VECTORIZE
-                : int(Traversal) == int(SliceVectorizedTraversal)
-                    ? (bool(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling))
+      : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
 #endif
-                    : int(NoUnrolling)
-  };
+                                              : NoUnrolling;
 
 #ifdef EIGEN_DEBUG_ASSIGN
   static void debug() {
@@ -162,8 +152,8 @@
     EIGEN_DEBUG_VAR(LinearRequiredAlignment)
     EIGEN_DEBUG_VAR(InnerRequiredAlignment)
     EIGEN_DEBUG_VAR(JointAlignment)
-    EIGEN_DEBUG_VAR(InnerSize)
-    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
+    EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
     EIGEN_DEBUG_VAR(LinearPacketSize)
     EIGEN_DEBUG_VAR(InnerPacketSize)
     EIGEN_DEBUG_VAR(ActualPacketSize)
@@ -196,17 +186,14 @@
 *** Default traversal ***
 ************************/
 
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
-  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
-  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
-  typedef typename DstEvaluatorType::XprType DstXprType;
-
-  enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime };
+  static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+  static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
 
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    kernel.assignCoeffByOuterInner(outer, inner);
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel);
+    kernel.assignCoeffByOuterInner(Outer, Inner);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
   }
 };
 
@@ -232,11 +219,11 @@
 *** Linear traversal ***
 ***********************/
 
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    kernel.assignCoeff(Index);
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel);
+    kernel.assignCoeff(Index_);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
   }
 };
 
@@ -249,23 +236,17 @@
 *** Inner vectorization ***
 **************************/
 
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling {
-  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
-  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  typedef typename Kernel::PacketType PacketType;
-
-  enum {
-    outer = Index / DstXprType::InnerSizeAtCompileTime,
-    inner = Index % DstXprType::InnerSizeAtCompileTime,
-    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
-    DstAlignment = Kernel::AssignmentTraits::DstAlignment
-  };
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+  static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
+  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
 
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
-    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
   }
 };
@@ -277,10 +258,11 @@
 
 template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
 struct copy_using_evaluator_innervec_InnerUnrolling {
-  typedef typename Kernel::PacketType PacketType;
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
-    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
     copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
                                                                                                            outer);
   }
@@ -308,9 +290,10 @@
 // Zero-sized assignment is a no-op.
 template <typename Kernel, int Unrolling>
 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling> {
+  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
   EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/) {
-    EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0,
-                        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
+    EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
   }
 };
 
@@ -331,21 +314,21 @@
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> {
+  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
   }
 };
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+  static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
 
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
     const Index outerSize = kernel.outerSize();
     for (Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel,
-                                                                                                               outer);
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
   }
 };
 
@@ -380,18 +363,15 @@
   }
 };
 
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_linearvec_CompleteUnrolling {
-  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
-  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  typedef typename Kernel::PacketType PacketType;
-
-  enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment };
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+  static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
 
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index);
-    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+    kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
     copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
   }
 };
@@ -403,26 +383,24 @@
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> {
+  using Scalar = typename Kernel::Scalar;
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+  static constexpr int RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment;
+  static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
+  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
+  static constexpr int DstAlignment =
+      packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
     const Index size = kernel.size();
-    typedef typename Kernel::Scalar Scalar;
-    typedef typename Kernel::PacketType PacketType;
-    enum {
-      requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
-      packetSize = unpacket_traits<PacketType>::size,
-      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
-      dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
-                                                            : int(Kernel::AssignmentTraits::DstAlignment),
-      srcAlignment = Kernel::AssignmentTraits::JointAlignment
-    };
-    const Index alignedStart =
-        dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
-    const Index alignedEnd = alignedStart + ((size - alignedStart) / packetSize) * packetSize;
+    const Index alignedStart = DstIsAligned ? 0 : first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
+    const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
 
-    unaligned_dense_assignment_loop<dstIsAligned != 0>::run(kernel, 0, alignedStart);
+    unaligned_dense_assignment_loop<DstIsAligned>::run(kernel, 0, alignedStart);
 
-    for (Index index = alignedStart; index < alignedEnd; index += packetSize)
-      kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
+    for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
+      kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
 
     unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
   }
@@ -430,18 +408,14 @@
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+  static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
+  static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    typedef typename Kernel::PacketType PacketType;
-
-    enum {
-      size = DstXprType::SizeAtCompileTime,
-      packetSize = unpacket_traits<PacketType>::size,
-      alignedSize = (int(size) / packetSize) * packetSize
-    };
-
-    copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+    copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, AlignedSize, Size>::run(kernel);
   }
 };
 
@@ -451,35 +425,40 @@
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> {
-  typedef typename Kernel::PacketType PacketType;
-  enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment };
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
+  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
     const Index innerSize = kernel.innerSize();
     const Index outerSize = kernel.outerSize();
-    const Index packetSize = unpacket_traits<PacketType>::size;
     for (Index outer = 0; outer < outerSize; ++outer)
-      for (Index inner = 0; inner < innerSize; inner += packetSize)
+      for (Index inner = 0; inner < innerSize; inner += PacketSize)
         kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
   }
 };
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
+  static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
   }
 };
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
+  static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+  static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+  static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    typedef typename Kernel::AssignmentTraits Traits;
     const Index outerSize = kernel.outerSize();
     for (Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, Traits::SrcAlignment,
-                                                   Traits::DstAlignment>::run(kernel, outer);
+      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
+                                                                                                          outer);
   }
 };
 
@@ -498,8 +477,8 @@
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> {
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
+        kernel);
   }
 };
 
@@ -509,42 +488,40 @@
 
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> {
+  using Scalar = typename Kernel::Scalar;
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+  static constexpr int RequestedAlignment = Kernel::AssignmentTraits::InnerRequiredAlignment;
+  static constexpr bool Alignable =
+      packet_traits<Scalar>::AlignedOnScalar || Kernel::AssignmentTraits::DstAlignment >= sizeof(Scalar);
+  static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
+  static constexpr int DstAlignment = Alignable ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
-    typedef typename Kernel::Scalar Scalar;
-    typedef typename Kernel::PacketType PacketType;
-    enum {
-      packetSize = unpacket_traits<PacketType>::size,
-      requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
-      alignable =
-          packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar),
-      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
-      dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment)
-    };
     const Scalar* dst_ptr = kernel.dstDataPtr();
-    if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
+    if ((!DstIsAligned) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
       // the pointer is not aligned-on scalar, so alignment is not possible
       return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel);
     }
-    const Index packetAlignedMask = packetSize - 1;
     const Index innerSize = kernel.innerSize();
     const Index outerSize = kernel.outerSize();
-    const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+    const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
     Index alignedStart =
-        ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+        ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<RequestedAlignment>(dst_ptr, innerSize);
 
     for (Index outer = 0; outer < outerSize; ++outer) {
-      const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask);
+      const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
       // do the non-vectorizable part of the assignment
       for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
 
       // do the vectorizable part of the assignment
-      for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize)
-        kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+      for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
+        kernel.template assignPacketByOuterInner<DstAlignment, Unaligned, PacketType>(outer, inner);
 
       // do the non-vectorizable part of the assignment
       for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
 
-      alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize);
+      alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
     }
   }
 };
@@ -552,20 +529,15 @@
 #if EIGEN_UNALIGNED_VECTORIZE
 template <typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
+  using PacketType = typename Kernel::PacketType;
+  static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+  static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+  static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    typedef typename Kernel::PacketType PacketType;
-
-    enum {
-      innerSize = DstXprType::InnerSizeAtCompileTime,
-      packetSize = unpacket_traits<PacketType>::size,
-      vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
-      size = DstXprType::SizeAtCompileTime
-    };
-
     for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
-      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
-      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
+      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, 0, 0>::run(kernel, outer);
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, VectorizableSize, InnerSize>::run(kernel, outer);
     }
   }
 };
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index c081499..528aed2 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -182,6 +182,35 @@
   typedef typename NumTraits<Scalar>::Real& type;
 };
 
+}  // namespace internal
+
+namespace numext {
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(
+    const Scalar& x) {
+  return internal::real_ref_impl<Scalar>::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+
+}  // namespace numext
+
+namespace internal {
+
 /****************************************************************************
  * Implementation of conj                                                 *
  ****************************************************************************/
@@ -221,7 +250,9 @@
 struct abs2_impl_default<Scalar, true>  // IsComplex
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x.real() * x.real() + x.imag() * x.imag(); }
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x);
+  }
 };
 
 template <typename Scalar>
@@ -250,16 +281,14 @@
 };
 
 // Complex sqrt defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x);
 
 // Custom implementation is faster than `std::sqrt`, works on
 // GPU, and correctly handles special cases (unlike MSVC).
 template <typename T>
 struct sqrt_impl<std::complex<T>> {
-  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) {
-    return complex_sqrt<T>(x);
-  }
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { return complex_sqrt(x); }
 };
 
 template <typename Scalar>
@@ -272,13 +301,13 @@
 struct rsqrt_impl;
 
 // Complex rsqrt defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x);
 
 template <typename T>
 struct rsqrt_impl<std::complex<T>> {
   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) {
-    return complex_rsqrt<T>(x);
+    return complex_rsqrt(x);
   }
 };
 
@@ -299,7 +328,7 @@
   typedef typename NumTraits<Scalar>::Real RealScalar;
   EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
     EIGEN_USING_STD(abs);
-    return abs(x.real()) + abs(x.imag());
+    return abs(numext::real(x)) + abs(numext::imag(x));
   }
 };
 
@@ -469,8 +498,8 @@
  ****************************************************************************/
 
 // Complex log defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z);
 
 template <typename Scalar>
 struct log_impl {
@@ -846,7 +875,7 @@
     real_type aa = abs(a);
     if (aa == real_type(0)) return Scalar(0);
     aa = real_type(1) / aa;
-    return Scalar(a.real() * aa, a.imag() * aa);
+    return Scalar(numext::real(a) * aa, numext::imag(a) * aa);
   }
 };
 
@@ -1043,27 +1072,6 @@
 #endif
 
 template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) {
-  return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(
-    const Scalar& x) {
-  return internal::real_ref_impl<Scalar>::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) {
-  return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) {
-  return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
-}
-
-template <typename Scalar>
 EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) {
   return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
 }
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 10ddabd..8e2705b 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -171,8 +171,8 @@
 
 // Generic complex sqrt implementation that correctly handles corner cases
 // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) {
   // Computes the principal sqrt of the input.
   //
   // For a complex square root of the number x + i*y. We want to find real
@@ -194,21 +194,21 @@
   //   if x == 0: u = w, v = sign(y) * w
   //   if x > 0:  u = w, v = y / (2 * w)
   //   if x < 0:  u = |y| / (2 * w), v = sign(y) * w
-
+  using T = typename NumTraits<ComplexT>::Real;
   const T x = numext::real(z);
   const T y = numext::imag(z);
   const T zero = T(0);
   const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
 
-  return (numext::isinf)(y)           ? std::complex<T>(NumTraits<T>::infinity(), y)
-         : numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
-         : x > zero                   ? std::complex<T>(w, y / (2 * w))
-                                      : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w);
+  return (numext::isinf)(y)           ? ComplexT(NumTraits<T>::infinity(), y)
+         : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w)
+         : x > zero                   ? ComplexT(w, y / (2 * w))
+                                      : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w);
 }
 
 // Generic complex rsqrt implementation.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) {
   // Computes the principal reciprocal sqrt of the input.
   //
   // For a complex reciprocal square root of the number z = x + i*y. We want to
@@ -230,7 +230,7 @@
   //   if x == 0: u = w / |z|, v = -sign(y) * w / |z|
   //   if x > 0:  u = w / |z|, v = -y / (2 * w * |z|)
   //   if x < 0:  u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
-
+  using T = typename NumTraits<ComplexT>::Real;
   const T x = numext::real(z);
   const T y = numext::imag(z);
   const T zero = T(0);
@@ -239,20 +239,21 @@
   const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
   const T woz = w / abs_z;
   // Corner cases consistent with 1/sqrt(z) on gcc/clang.
-  return numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
-         : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
-         : numext::is_exactly_zero(x)                 ? std::complex<T>(woz, y < zero ? woz : -woz)
-         : x > zero                                   ? std::complex<T>(woz, -y / (2 * w * abs_z))
-                    : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
+  return numext::is_exactly_zero(abs_z)               ? ComplexT(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
+         : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero)
+         : numext::is_exactly_zero(x)                 ? ComplexT(woz, y < zero ? woz : -woz)
+         : x > zero                                   ? ComplexT(woz, -y / (2 * w * abs_z))
+                    : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
 }
 
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) {
   // Computes complex log.
+  using T = typename NumTraits<ComplexT>::Real;
   T a = numext::abs(z);
   EIGEN_USING_STD(atan2);
   T b = atan2(z.imag(), z.real());
-  return std::complex<T>(numext::log(a), b);
+  return ComplexT(numext::log(a), b);
 }
 
 }  // end namespace internal
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 5466a57..8d5c47e 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -112,7 +112,7 @@
                              ConstTransposeReturnType>
       AdjointReturnType;
   /** \internal Return type of eigenvalues() */
-  typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor>
+  typedef Matrix<internal::make_complex_t<Scalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor>
       EigenvaluesReturnType;
   /** \internal the return type of identity */
   typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>, PlainObject> IdentityReturnType;
@@ -468,7 +468,7 @@
   EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
   EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
   EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
-  EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
+  EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const internal::make_complex_t<Scalar>& p)
 
  protected:
   EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index df5a0ef..038e233 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -1,877 +1,877 @@
-// This file is part of Eigen, a lightweight C++ template library

-// for linear algebra.

-//

-//

-//

-// This Source Code Form is subject to the terms of the Mozilla

-// Public License v. 2.0. If a copy of the MPL was not distributed

-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

-

-#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H

-#define EIGEN_PACKET_MATH_FP16_AVX512_H

-

-// IWYU pragma: private

-#include "../../InternalHeaderCheck.h"

-

-namespace Eigen {

-

-namespace internal {

-

-typedef __m512h Packet32h;

-typedef eigen_packet_wrapper<__m256i, 1> Packet16h;

-typedef eigen_packet_wrapper<__m128i, 2> Packet8h;

-

-template <>

-struct is_arithmetic<Packet8h> {

-  enum { value = true };

-};

-

-template <>

-struct packet_traits<half> : default_packet_traits {

-  typedef Packet32h type;

-  typedef Packet16h half;

-  enum {

-    Vectorizable = 1,

-    AlignedOnScalar = 1,

-    size = 32,

-

-    HasCmp = 1,

-    HasAdd = 1,

-    HasSub = 1,

-    HasMul = 1,

-    HasDiv = 1,

-    HasNegate = 1,

-    HasAbs = 1,

-    HasAbs2 = 0,

-    HasMin = 1,

-    HasMax = 1,

-    HasConj = 1,

-    HasSetLinear = 0,

-    HasLog = 1,

-    HasLog1p = 1,

-    HasExp = 1,

-    HasExpm1 = 1,

-    HasSqrt = 1,

-    HasRsqrt = 1,

-    // These ones should be implemented in future

-    HasBessel = 0,

-    HasNdtri = 0,

-    HasSin = EIGEN_FAST_MATH,

-    HasCos = EIGEN_FAST_MATH,

-    HasTanh = EIGEN_FAST_MATH,

-    HasErf = 0,  // EIGEN_FAST_MATH,

-    HasBlend = 0

-  };

-};

-

-template <>

-struct unpacket_traits<Packet32h> {

-  typedef Eigen::half type;

-  typedef Packet16h half;

-  enum {

-    size = 32,

-    alignment = Aligned64,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-template <>

-struct unpacket_traits<Packet16h> {

-  typedef Eigen::half type;

-  typedef Packet8h half;

-  enum {

-    size = 16,

-    alignment = Aligned32,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-template <>

-struct unpacket_traits<Packet8h> {

-  typedef Eigen::half type;

-  typedef Packet8h half;

-  enum {

-    size = 8,

-    alignment = Aligned16,

-    vectorizable = true,

-    masked_load_available = false,

-    masked_store_available = false

-  };

-};

-

-// Memory functions

-

-// pset1

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {

-  // half/half_raw is bit compatible

-  return _mm512_set1_ph(numext::bit_cast<_Float16>(from));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) {

-  return _mm512_setzero_ph();

-}

-

-// pset1frombits

-template <>

-EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {

-  return _mm512_castsi512_ph(_mm512_set1_epi16(from));

-}

-

-// pfirst

-

-template <>

-EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {

-#ifdef EIGEN_VECTORIZE_AVX512DQ

-  return half_impl::raw_uint16_to_half(

-      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));

-#else

-  Eigen::half dest[32];

-  _mm512_storeu_ph(dest, from);

-  return dest[0];

-#endif

-}

-

-// pload

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {

-  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);

-}

-

-// ploadu

-

-template <>

-EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {

-  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);

-}

-

-// pstore

-

-template <>

-EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {

-  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);

-}

-

-// pstoreu

-

-template <>

-EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {

-  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);

-}

-

-// ploaddup

-template <>

-EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {

-  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));

-  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,

-                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),

-                               a);

-}

-

-// ploadquad

-template <>

-EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {

-  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));

-  return _mm512_permutexvar_ph(

-      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),

-      a);

-}

-

-// pabs

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {

-  return _mm512_abs_ph(a);

-}

-

-// psignbit

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {

-  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));

-}

-

-// pmin

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_min_ph(a, b);

-}

-

-// pmax

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_max_ph(a, b);

-}

-

-// plset

-template <>

-EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {

-  return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,

-                                                          16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));

-}

-

-// por

-

-template <>

-EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pxor

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pand

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

-}

-

-// pandnot

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {

-  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));

-}

-

-// pselect

-

-template <>

-EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);

-  return _mm512_mask_blend_ph(mask32, a, b);

-}

-

-// pcmp_eq

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

-}

-

-// pcmp_le

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

-}

-

-// pcmp_lt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));

-}

-

-// pcmp_lt_or_nan

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {

-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);

-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu)));

-}

-

-// padd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_add_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// psub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_sub_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pmul

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_mul_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pdiv

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {

-  return _mm512_div_ph(a, b);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {

-  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {

-  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

-}

-

-// pround

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {

-  // Work-around for default std::round rounding mode.

-

-  // Mask for the sign bit

-  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));

-  // The largest half-preicision float less than 0.5

-  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));

-

-  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);

-}

-

-// print

-

-template <>

-EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);

-}

-

-// pceil

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);

-}

-

-// pfloor

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);

-}

-

-// ptrunc

-

-template <>

-EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) {

-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO);

-}

-

-// predux

-template <>

-EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {

-  return (half)_mm512_reduce_add_ph(a);

-}

-

-template <>

-EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {

-  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));

-}

-

-template <>

-EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {

-  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));

-}

-

-// predux_half_dowto4

-template <>

-EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {

-#ifdef EIGEN_VECTORIZE_AVX512DQ

-  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));

-  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));

-

-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

-#else

-  Eigen::half data[32];

-  _mm512_storeu_ph(data, a);

-

-  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));

-  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));

-

-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

-#endif

-}

-

-// predux_max

-

-// predux_min

-

-// predux_mul

-

-#ifdef EIGEN_VECTORIZE_FMA

-

-// pmadd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fmadd_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pmsub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fmsub_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pnmadd

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fnmadd_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-// pnmsub

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

-  return _mm512_fnmsub_ph(a, b, c);

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

-  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

-  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

-}

-

-#endif

-

-// pnegate

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {

-  return psub(pzero(a), a);

-}

-

-// pconj

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {

-  return a;

-}

-

-// psqrt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {

-  return _mm512_sqrt_ph(a);

-}

-

-// prsqrt

-

-template <>

-EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {

-  return _mm512_rsqrt_ph(a);

-}

-

-// preciprocal

-

-template <>

-EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {

-  return _mm512_rcp_ph(a);

-}

-

-// ptranspose

-

-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {

-  __m512i t[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 16; i++) {

-    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

-    t[2 * i + 1] =

-        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

-  }

-

-  __m512i p[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 8; i++) {

-    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);

-    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);

-    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);

-    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);

-  }

-

-  __m512i q[32];

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 4; i++) {

-    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);

-    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);

-    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);

-    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);

-    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);

-    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);

-    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);

-    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);

-  }

-

-  __m512i f[32];

-

-#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \

-  do {                                                                                              \

-    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \

-    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \

-    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \

-    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \

-    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \

-    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \

-    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \

-    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \

-  } while (false);

-

-  PACKET32H_TRANSPOSE_HELPER(0, 0);

-  PACKET32H_TRANSPOSE_HELPER(1, 1);

-  PACKET32H_TRANSPOSE_HELPER(2, 2);

-  PACKET32H_TRANSPOSE_HELPER(3, 3);

-

-  PACKET32H_TRANSPOSE_HELPER(1, 0);

-  PACKET32H_TRANSPOSE_HELPER(2, 0);

-  PACKET32H_TRANSPOSE_HELPER(3, 0);

-  PACKET32H_TRANSPOSE_HELPER(2, 1);

-  PACKET32H_TRANSPOSE_HELPER(3, 1);

-  PACKET32H_TRANSPOSE_HELPER(3, 2);

-

-  PACKET32H_TRANSPOSE_HELPER(0, 1);

-  PACKET32H_TRANSPOSE_HELPER(0, 2);

-  PACKET32H_TRANSPOSE_HELPER(0, 3);

-  PACKET32H_TRANSPOSE_HELPER(1, 2);

-  PACKET32H_TRANSPOSE_HELPER(1, 3);

-  PACKET32H_TRANSPOSE_HELPER(2, 3);

-

-#undef PACKET32H_TRANSPOSE_HELPER

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 32; i++) {

-    a.packet[i] = _mm512_castsi512_ph(f[i]);

-  }

-}

-

-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {

-  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;

-  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

-  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

-  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

-  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

-

-  p0 = _mm512_unpacklo_epi32(t0, t2);

-  p1 = _mm512_unpackhi_epi32(t0, t2);

-  p2 = _mm512_unpacklo_epi32(t1, t3);

-  p3 = _mm512_unpackhi_epi32(t1, t3);

-

-  a0 = p0;

-  a1 = p1;

-  a2 = p2;

-  a3 = p3;

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);

-

-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);

-

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);

-

-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);

-

-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);

-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);

-

-  a.packet[0] = _mm512_castsi512_ph(a0);

-  a.packet[1] = _mm512_castsi512_ph(a1);

-  a.packet[2] = _mm512_castsi512_ph(a2);

-  a.packet[3] = _mm512_castsi512_ph(a3);

-}

-

-// preverse

-

-template <>

-EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {

-  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,

-                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),

-                               a);

-}

-

-// pscatter

-

-template <>

-EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {

-  EIGEN_ALIGN64 half aux[32];

-  pstore(aux, from);

-

-  EIGEN_UNROLL_LOOP

-  for (int i = 0; i < 32; i++) {

-    to[stride * i] = aux[i];

-  }

-}

-

-// pgather

-

-template <>

-EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {

-  return _mm512_castsi512_ph(_mm512_set_epi16(

-      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,

-      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,

-      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,

-      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,

-      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,

-      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,

-      from[1 * stride].x, from[0 * stride].x));

-}

-

-template <>

-EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);

-template <>

-EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);

-

-EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {

-  __m512d result = _mm512_undefined_pd();

-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);

-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);

-  return _mm512_castpd_ph(result);

-}

-

-EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {

-  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));

-  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));

-}

-

-// psin

-template <>

-EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = psin(low);

-  Packet16h highOut = psin(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pcos

-template <>

-EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pcos(low);

-  Packet16h highOut = pcos(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog

-template <>

-EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog(low);

-  Packet16h highOut = plog(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog2

-template <>

-EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog2(low);

-  Packet16h highOut = plog2(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// plog1p

-template <>

-EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = plog1p(low);

-  Packet16h highOut = plog1p(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pexp(low);

-  Packet16h highOut = pexp(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pexpm1

-template <>

-EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = pexpm1(low);

-  Packet16h highOut = pexpm1(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// ptanh

-template <>

-EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h lowOut = ptanh(low);

-  Packet16h highOut = ptanh(high);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pfrexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h exp1 = _mm256_undefined_si256();

-  Packet16h exp2 = _mm256_undefined_si256();

-

-  Packet16h lowOut = pfrexp(low, exp1);

-  Packet16h highOut = pfrexp(high, exp2);

-

-  exponent = combine2Packet16h(exp1, exp2);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-// pldexp

-template <>

-EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {

-  Packet16h low;

-  Packet16h high;

-  extract2Packet16h(a, low, high);

-

-  Packet16h exp1;

-  Packet16h exp2;

-  extract2Packet16h(exponent, exp1, exp2);

-

-  Packet16h lowOut = pldexp(low, exp1);

-  Packet16h highOut = pldexp(high, exp2);

-

-  return combine2Packet16h(lowOut, highOut);

-}

-

-}  // end namespace internal

-}  // end namespace Eigen

-

-#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H

+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+//
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
+#define EIGEN_PACKET_MATH_FP16_AVX512_H
+
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+typedef __m512h Packet32h;
+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
+
+template <>
+struct is_arithmetic<Packet8h> {
+  enum { value = true };
+};
+
+template <>
+struct packet_traits<half> : default_packet_traits {
+  typedef Packet32h type;
+  typedef Packet16h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 32,
+
+    HasCmp = 1,
+    HasAdd = 1,
+    HasSub = 1,
+    HasMul = 1,
+    HasDiv = 1,
+    HasNegate = 1,
+    HasAbs = 1,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 1,
+    HasSetLinear = 0,
+    HasLog = 1,
+    HasLog1p = 1,
+    HasExp = 1,
+    HasExpm1 = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    // These ones should be implemented in future
+    HasBessel = 0,
+    HasNdtri = 0,
+    HasSin = EIGEN_FAST_MATH,
+    HasCos = EIGEN_FAST_MATH,
+    HasTanh = EIGEN_FAST_MATH,
+    HasErf = 0,  // EIGEN_FAST_MATH,
+    HasBlend = 0
+  };
+};
+
+template <>
+struct unpacket_traits<Packet32h> {
+  typedef Eigen::half type;
+  typedef Packet16h half;
+  enum {
+    size = 32,
+    alignment = Aligned64,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet16h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 16,
+    alignment = Aligned32,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+template <>
+struct unpacket_traits<Packet8h> {
+  typedef Eigen::half type;
+  typedef Packet8h half;
+  enum {
+    size = 8,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+
+// Memory functions
+
+// pset1
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
+  // half/half_raw is bit compatible
+  return _mm512_set1_ph(numext::bit_cast<_Float16>(from));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) {
+  return _mm512_setzero_ph();
+}
+
+// pset1frombits
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
+  return _mm512_castsi512_ph(_mm512_set1_epi16(from));
+}
+
+// pfirst
+
+template <>
+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return half_impl::raw_uint16_to_half(
+      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
+#else
+  Eigen::half dest[32];
+  _mm512_storeu_ph(dest, from);
+  return dest[0];
+#endif
+}
+
+// pload
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
+}
+
+// ploadu
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
+}
+
+// pstore
+
+template <>
+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
+}
+
+// pstoreu
+
+template <>
+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
+}
+
+// ploaddup
+template <>
+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
+  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
+                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
+                               a);
+}
+
+// ploadquad
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
+  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
+  return _mm512_permutexvar_ph(
+      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
+      a);
+}
+
+// pabs
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
+  return _mm512_abs_ph(a);
+}
+
+// psignbit
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
+  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
+}
+
+// pmin
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_min_ph(a, b);
+}
+
+// pmax
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_max_ph(a, b);
+}
+
+// plset
+template <>
+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
+  return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+                                                          16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
+}
+
+// por
+
+template <>
+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pxor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pand
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pandnot
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
+  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
+}
+
+// pselect
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+  return _mm512_mask_blend_ph(mask32, a, b);
+}
+
+// pcmp_eq
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_le
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_lt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_lt_or_nan
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu)));
+}
+
+// padd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_add_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// psub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_sub_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pmul
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_mul_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pdiv
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
+  return _mm512_div_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pround
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
+  // Work-around for default std::round rounding mode.
+
+  // Mask for the sign bit
+  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
+  // The largest half-preicision float less than 0.5
+  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
+
+  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
+}
+
+// print
+
+template <>
+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
+}
+
+// pceil
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
+}
+
+// pfloor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
+}
+
+// ptrunc
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) {
+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO);
+}
+
+// predux
+template <>
+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
+  return (half)_mm512_reduce_add_ph(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
+  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
+  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
+}
+
+// predux_half_dowto4
+template <>
+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
+  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#else
+  Eigen::half data[32];
+  _mm512_storeu_ph(data, a);
+
+  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
+  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
+
+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#endif
+}
+
+// predux_max
+
+// predux_min
+
+// predux_mul
+
+#ifdef EIGEN_VECTORIZE_FMA
+
+// pmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+  return _mm512_fnmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+#endif
+
+// pnegate
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
+  return psub(pzero(a), a);
+}
+
+// pconj
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
+  return a;
+}
+
+// psqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_sqrt_ph(a);
+}
+
+// prsqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
+  return _mm512_rsqrt_ph(a);
+}
+
+// preciprocal
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
+  return _mm512_rcp_ph(a);
+}
+
+// ptranspose
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
+  __m512i t[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 16; i++) {
+    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+    t[2 * i + 1] =
+        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+  }
+
+  __m512i p[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 8; i++) {
+    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
+    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
+    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
+  }
+
+  __m512i q[32];
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 4; i++) {
+    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
+    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
+    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
+    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
+    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
+  }
+
+  __m512i f[32];
+
+#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \
+  do {                                                                                              \
+    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \
+    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
+    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
+    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
+    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
+    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
+    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
+    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
+  } while (false);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 0);
+  PACKET32H_TRANSPOSE_HELPER(1, 1);
+  PACKET32H_TRANSPOSE_HELPER(2, 2);
+  PACKET32H_TRANSPOSE_HELPER(3, 3);
+
+  PACKET32H_TRANSPOSE_HELPER(1, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 0);
+  PACKET32H_TRANSPOSE_HELPER(3, 0);
+  PACKET32H_TRANSPOSE_HELPER(2, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 1);
+  PACKET32H_TRANSPOSE_HELPER(3, 2);
+
+  PACKET32H_TRANSPOSE_HELPER(0, 1);
+  PACKET32H_TRANSPOSE_HELPER(0, 2);
+  PACKET32H_TRANSPOSE_HELPER(0, 3);
+  PACKET32H_TRANSPOSE_HELPER(1, 2);
+  PACKET32H_TRANSPOSE_HELPER(1, 3);
+  PACKET32H_TRANSPOSE_HELPER(2, 3);
+
+#undef PACKET32H_TRANSPOSE_HELPER
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    a.packet[i] = _mm512_castsi512_ph(f[i]);
+  }
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
+  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
+  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+
+  p0 = _mm512_unpacklo_epi32(t0, t2);
+  p1 = _mm512_unpackhi_epi32(t0, t2);
+  p2 = _mm512_unpacklo_epi32(t1, t3);
+  p3 = _mm512_unpackhi_epi32(t1, t3);
+
+  a0 = p0;
+  a1 = p1;
+  a2 = p2;
+  a3 = p3;
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
+
+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
+
+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
+
+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
+
+  a.packet[0] = _mm512_castsi512_ph(a0);
+  a.packet[1] = _mm512_castsi512_ph(a1);
+  a.packet[2] = _mm512_castsi512_ph(a2);
+  a.packet[3] = _mm512_castsi512_ph(a3);
+}
+
+// preverse
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
+  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+                               a);
+}
+
+// pscatter
+
+template <>
+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
+  EIGEN_ALIGN64 half aux[32];
+  pstore(aux, from);
+
+  EIGEN_UNROLL_LOOP
+  for (int i = 0; i < 32; i++) {
+    to[stride * i] = aux[i];
+  }
+}
+
+// pgather
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
+  return _mm512_castsi512_ph(_mm512_set_epi16(
+      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
+      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
+      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
+      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
+      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
+      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
+      from[1 * stride].x, from[0 * stride].x));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
+
+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
+  __m512d result = _mm512_undefined_pd();
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
+  return _mm512_castpd_ph(result);
+}
+
+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
+  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
+  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
+}
+
+// psin
+template <>
+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = psin(low);
+  Packet16h highOut = psin(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pcos
+template <>
+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pcos(low);
+  Packet16h highOut = pcos(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog
+template <>
+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog(low);
+  Packet16h highOut = plog(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog2
+template <>
+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog2(low);
+  Packet16h highOut = plog2(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// plog1p
+template <>
+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = plog1p(low);
+  Packet16h highOut = plog1p(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexp(low);
+  Packet16h highOut = pexp(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pexpm1
+template <>
+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = pexpm1(low);
+  Packet16h highOut = pexpm1(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// ptanh
+template <>
+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h lowOut = ptanh(low);
+  Packet16h highOut = ptanh(high);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pfrexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1 = _mm256_undefined_si256();
+  Packet16h exp2 = _mm256_undefined_si256();
+
+  Packet16h lowOut = pfrexp(low, exp1);
+  Packet16h highOut = pfrexp(high, exp2);
+
+  exponent = combine2Packet16h(exp1, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+// pldexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
+  Packet16h low;
+  Packet16h high;
+  extract2Packet16h(a, low, high);
+
+  Packet16h exp1;
+  Packet16h exp2;
+  extract2Packet16h(exponent, exp1, exp2);
+
+  Packet16h lowOut = pldexp(low, exp1);
+  Packet16h highOut = pldexp(high, exp2);
+
+  return combine2Packet16h(lowOut, highOut);
+}
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 8027cb5..482064e 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1514,16 +1514,13 @@
 
 template <typename Packet>
 EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet) * from) {
-  EIGEN_DEBUG_ALIGNED_LOAD
-#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
   EIGEN_DEBUG_UNALIGNED_LOAD
+#if defined(EIGEN_VECTORIZE_VSX)
   return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
 #else
-  Packet16uc MSQ, LSQ;
-  Packet16uc mask;
-  MSQ = vec_ld(0, (unsigned char*)from);   // most significant quadword
-  LSQ = vec_ld(15, (unsigned char*)from);  // least significant quadword
-  mask = vec_lvsl(0, from);                // create the permute mask
+  Packet16uc MSQ = vec_ld(0, (unsigned char*)from);   // most significant quadword
+  Packet16uc LSQ = vec_ld(15, (unsigned char*)from);  // least significant quadword
+  Packet16uc mask = vec_lvsl(0, from);                // create the permute mask
   // TODO: Add static_cast here
   return (Packet)vec_perm(MSQ, LSQ, mask);  // align the data
 #endif
@@ -1733,7 +1730,7 @@
 template <typename Packet>
 EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) {
   EIGEN_DEBUG_UNALIGNED_STORE
-#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
+#if defined(EIGEN_VECTORIZE_VSX)
   vec_xst(from, 0, to);
 #else
   // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -2069,7 +2066,7 @@
   input = padd<Packet4ui>(input, rounding_bias);
 
   const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
-#ifdef _ARCH_PWR9
+#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
   Packet4bi nan_selector = vec_test_data_class(p4f, __VEC_CLASS_FP_NAN);
   input = vec_sel(input, p4ui_nan, nan_selector);
 
@@ -2178,7 +2175,7 @@
   Packet8bi rounding_bias = vec_cmplt(lsb, p4f2);
   Packet8us input = psub<Packet8us>(p4f, reinterpret_cast<Packet8us>(rounding_bias));
 
-#ifdef _ARCH_PWR9
+#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
   Packet4bi nan_selector_lo = vec_test_data_class(lo, __VEC_CLASS_FP_NAN);
   Packet4bi nan_selector_hi = vec_test_data_class(hi, __VEC_CLASS_FP_NAN);
   Packet8us nan_selector =
@@ -3400,9 +3397,17 @@
   return reinterpret_cast<Packet2d>(vec_cmpeq(a, b));
 }
 template <>
+#ifdef __POWER8_VECTOR__
 EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
   return reinterpret_cast<Packet2l>(vec_cmpeq(a, b));
 }
+#else
+EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
+  Packet4i halves = reinterpret_cast<Packet4i>(vec_cmpeq(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(b)));
+  Packet4i flipped = vec_perm(halves, halves, p16uc_COMPLEX32_REV);
+  return reinterpret_cast<Packet2l>(pand(halves, flipped));
+}
+#endif
 template <>
 EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) {
   Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a, b));
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index a8cb228..95697f3 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -522,11 +522,6 @@
 #endif
 }
 
-union float32_bits {
-  unsigned int u;
-  float f;
-};
-
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
     (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
@@ -549,46 +544,45 @@
   return h;
 
 #else
-  float32_bits f;
-  f.f = ff;
-
-  const float32_bits f32infty = {255 << 23};
-  const float32_bits f16max = {(127 + 16) << 23};
-  const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
-  unsigned int sign_mask = 0x80000000u;
+  uint32_t f_bits = Eigen::numext::bit_cast<uint32_t>(ff);
+  const uint32_t f32infty_bits = {255 << 23};
+  const uint32_t f16max_bits = {(127 + 16) << 23};
+  const uint32_t denorm_magic_bits = {((127 - 15) + (23 - 10) + 1) << 23};
+  const uint32_t sign_mask = 0x80000000u;
   __half_raw o;
-  o.x = static_cast<numext::uint16_t>(0x0u);
+  o.x = static_cast<uint16_t>(0x0u);
 
-  unsigned int sign = f.u & sign_mask;
-  f.u ^= sign;
+  const uint32_t sign = f_bits & sign_mask;
+  f_bits ^= sign;
 
   // NOTE all the integer compares in this function can be safely
   // compiled into signed compares since all operands are below
   // 0x80000000. Important if you want fast straight SSE2 code
   // (since there's no unsigned PCMPGTD).
 
-  if (f.u >= f16max.u) {                         // result is Inf or NaN (all exponent bits set)
-    o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00;  // NaN->qNaN and Inf->Inf
-  } else {                                       // (De)normalized number or zero
-    if (f.u < (113 << 23)) {                     // resulting FP16 is subnormal or zero
+  if (f_bits >= f16max_bits) {                         // result is Inf or NaN (all exponent bits set)
+    o.x = (f_bits > f32infty_bits) ? 0x7e00 : 0x7c00;  // NaN->qNaN and Inf->Inf
+  } else {                                             // (De)normalized number or zero
+    if (f_bits < (113 << 23)) {                        // resulting FP16 is subnormal or zero
       // use a magic value to align our 10 mantissa bits at the bottom of
       // the float. as long as FP addition is round-to-nearest-even this
       // just works.
-      f.f += denorm_magic.f;
+      f_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(f_bits) +
+                                                 Eigen::numext::bit_cast<float>(denorm_magic_bits));
 
       // and one integer subtract of the bias later, we have our final float!
-      o.x = static_cast<numext::uint16_t>(f.u - denorm_magic.u);
+      o.x = static_cast<numext::uint16_t>(f_bits - denorm_magic_bits);
     } else {
-      unsigned int mant_odd = (f.u >> 13) & 1;  // resulting mantissa is odd
+      const uint32_t mant_odd = (f_bits >> 13) & 1;  // resulting mantissa is odd
 
       // update exponent, rounding bias part 1
       // Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
       // without arithmetic overflow.
-      f.u += 0xc8000fffU;
+      f_bits += 0xc8000fffU;
       // rounding bias part 2
-      f.u += mant_odd;
+      f_bits += mant_odd;
       // take the bits!
-      o.x = static_cast<numext::uint16_t>(f.u >> 13);
+      o.x = static_cast<numext::uint16_t>(f_bits >> 13);
     }
   }
 
@@ -611,24 +605,23 @@
 #elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   return static_cast<float>(h.x);
 #else
-  const float32_bits magic = {113 << 23};
-  const unsigned int shifted_exp = 0x7c00 << 13;  // exponent mask after shift
-  float32_bits o;
-
-  o.u = (h.x & 0x7fff) << 13;            // exponent/mantissa bits
-  unsigned int exp = shifted_exp & o.u;  // just the exponent
-  o.u += (127 - 15) << 23;               // exponent adjust
+  const float magic = Eigen::numext::bit_cast<float>(static_cast<uint32_t>(113 << 23));
+  const uint32_t shifted_exp = 0x7c00 << 13;  // exponent mask after shift
+  uint32_t o_bits = (h.x & 0x7fff) << 13;     // exponent/mantissa bits
+  const uint32_t exp = shifted_exp & o_bits;  // just the exponent
+  o_bits += (127 - 15) << 23;                 // exponent adjust
 
   // handle exponent special cases
-  if (exp == shifted_exp) {   // Inf/NaN?
-    o.u += (128 - 16) << 23;  // extra exp adjust
-  } else if (exp == 0) {      // Zero/Denormal?
-    o.u += 1 << 23;           // extra exp adjust
-    o.f -= magic.f;           // renormalize
+  if (exp == shifted_exp) {      // Inf/NaN?
+    o_bits += (128 - 16) << 23;  // extra exp adjust
+  } else if (exp == 0) {         // Zero/Denormal?
+    o_bits += 1 << 23;           // extra exp adjust
+    // renormalize
+    o_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(o_bits) - magic);
   }
 
-  o.u |= (h.x & 0x8000) << 16;  // sign bit
-  return o.f;
+  o_bits |= (h.x & 0x8000) << 16;  // sign bit
+  return Eigen::numext::bit_cast<float>(o_bits);
 #endif
 }
 
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index fcde64a..2488be4 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -497,7 +497,7 @@
 namespace internal {
 template <typename Scalar>
 struct stem_function {
-  typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef ComplexScalar type(ComplexScalar, int);
 };
 }  // namespace internal
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 2e3f59e..39a117e 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -698,6 +698,12 @@
 }
 
 template <typename A, typename B>
+inline constexpr int size_prefer_fixed(A a, B b) {
+  plain_enum_asserts(a, b);
+  return int(a) == Dynamic ? int(b) : int(a);
+}
+
+template <typename A, typename B>
 inline constexpr bool enum_eq_not_dynamic(A a, B b) {
   plain_enum_asserts(a, b);
   if ((int)a == Dynamic || (int)b == Dynamic) return false;
@@ -745,6 +751,9 @@
 constexpr bool is_constant_evaluated() { return false; }
 #endif
 
+template <typename Scalar>
+using make_complex_t = std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar>>;
+
 }  // end namespace internal
 
 }  // end namespace Eigen
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index cecbee8..a42bb0f 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -885,8 +885,12 @@
 };
 
 template <typename T, bool Vectorized>
-struct scalar_div_cost<std::complex<T>, Vectorized> {
-  enum { value = 2 * scalar_div_cost<T>::value + 6 * NumTraits<T>::MulCost + 3 * NumTraits<T>::AddCost };
+struct scalar_div_cost<T, Vectorized, std::enable_if_t<NumTraits<T>::IsComplex>> {
+  using RealScalar = typename NumTraits<T>::Real;
+  enum {
+    value =
+        2 * scalar_div_cost<RealScalar>::value + 6 * NumTraits<RealScalar>::MulCost + 3 * NumTraits<RealScalar>::AddCost
+  };
 };
 
 template <bool Vectorized>
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 60a24a8..50fa3b8 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -70,7 +70,7 @@
    * \c float or \c double) and just \c Scalar if #Scalar is
    * complex.
    */
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
 
   /** \brief Type for vector of eigenvalues as returned by eigenvalues().
    *
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index a33e46e..22433f2 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -75,7 +75,7 @@
    * \c float or \c double) and just \c Scalar if #Scalar is
    * complex.
    */
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
 
   /** \brief Type for the matrices in the Schur decomposition.
    *
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index f73d58f..9dba7bd 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -89,7 +89,7 @@
    * \c float or \c double) and just \c Scalar if #Scalar is
    * complex.
    */
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
 
   /** \brief Type for vector of eigenvalues as returned by eigenvalues().
    *
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index b114cfa..c0a61dc 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -83,7 +83,7 @@
    * \c float or \c double) and just \c Scalar if #Scalar is
    * complex.
    */
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
 
   /** \brief Type for vector of real scalar values eigenvalues as returned by betas().
    *
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index 3466f51..5915387 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -69,7 +69,7 @@
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };
   typedef typename MatrixType::Scalar Scalar;
-  typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef Eigen::Index Index;  ///< \deprecated since Eigen 3.3
 
   typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 5cef658..54a74e2 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -66,7 +66,7 @@
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };
   typedef typename MatrixType::Scalar Scalar;
-  typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef Eigen::Index Index;  ///< \deprecated since Eigen 3.3
 
   typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index e3154b4..8fdeb84 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -31,8 +31,6 @@
 template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
 bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, const Preconditioner& precond, Index& iters,
               typename Dest::RealScalar& tol_error) {
-  using std::abs;
-  using std::sqrt;
   typedef typename Dest::RealScalar RealScalar;
   typedef typename Dest::Scalar Scalar;
   typedef Matrix<Scalar, Dynamic, 1> VectorType;
@@ -43,14 +41,15 @@
   VectorType r = rhs - mat * x;
   VectorType r0 = r;
 
-  RealScalar r0_sqnorm = r0.squaredNorm();
-  RealScalar rhs_sqnorm = rhs.squaredNorm();
-  if (rhs_sqnorm == 0) {
+  RealScalar r0_norm = r0.stableNorm();
+  RealScalar r_norm = r0_norm;
+  RealScalar rhs_norm = rhs.stableNorm();
+  if (rhs_norm == 0) {
     x.setZero();
     return true;
   }
   Scalar rho(1);
-  Scalar alpha(1);
+  Scalar alpha(0);
   Scalar w(1);
 
   VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);
@@ -59,21 +58,22 @@
 
   VectorType s(n), t(n);
 
-  RealScalar tol2 = tol * tol * rhs_sqnorm;
-  RealScalar eps2 = NumTraits<Scalar>::epsilon() * NumTraits<Scalar>::epsilon();
+  RealScalar eps = NumTraits<Scalar>::epsilon();
   Index i = 0;
   Index restarts = 0;
 
-  while (r.squaredNorm() > tol2 && i < maxIters) {
+  while (r_norm > tol && i < maxIters) {
     Scalar rho_old = rho;
-
     rho = r0.dot(r);
-    if (abs(rho) < eps2 * r0_sqnorm) {
+    if (Eigen::numext::abs(rho) / Eigen::numext::maxi(r0_norm, r_norm) < eps * Eigen::numext::mini(r0_norm, r_norm)) {
       // The new residual vector became too orthogonal to the arbitrarily chosen direction r0
       // Let's restart with a new r0:
       r = rhs - mat * x;
       r0 = r;
-      rho = r0_sqnorm = r.squaredNorm();
+      rho = r.squaredNorm();
+      r0_norm = r.stableNorm();
+      alpha = Scalar(0);
+      w = Scalar(1);
       if (restarts++ == 0) i = 0;
     }
     Scalar beta = (rho / rho_old) * (alpha / w);
@@ -82,23 +82,38 @@
     y = precond.solve(p);
 
     v.noalias() = mat * y;
-
-    alpha = rho / r0.dot(v);
+    Scalar theta = r0.dot(v);
+    // For small angles ∠(r0, v) < eps, random restart.
+    RealScalar v_norm = v.stableNorm();
+    if (Eigen::numext::abs(theta) / Eigen::numext::maxi(r0_norm, v_norm) < eps * Eigen::numext::mini(r0_norm, v_norm)) {
+      r = rhs - mat * x;
+      r0.setRandom();
+      r0_norm = r0.stableNorm();
+      rho = Scalar(1);
+      alpha = Scalar(0);
+      w = Scalar(1);
+      if (restarts++ == 0) i = 0;
+      continue;
+    }
+    alpha = rho / theta;
     s = r - alpha * v;
 
     z = precond.solve(s);
     t.noalias() = mat * z;
 
     RealScalar tmp = t.squaredNorm();
-    if (tmp > RealScalar(0))
+    if (tmp > RealScalar(0)) {
       w = t.dot(s) / tmp;
-    else
+    } else {
       w = Scalar(0);
+    }
     x += alpha * y + w * z;
     r = s - w * t;
+    r_norm = r.stableNorm();
     ++i;
   }
-  tol_error = sqrt(r.squaredNorm() / rhs_sqnorm);
+
+  tol_error = r_norm / rhs_norm;
   iters = i;
   return true;
 }
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 466834a..3e57764 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -243,7 +243,10 @@
       the LU decomposition.
     */
   inline RealScalar rcond() const {
-    eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+    eigen_assert(m_isInitialized && "FullPivLU is not initialized.");
+    if (!isInvertible()) {
+      return RealScalar(0);
+    }
     return internal::rcond_estimate_helper(m_l1_norm, *this);
   }
 
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index da89857..22cf346 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>

-

-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

-

-  <!-- Fixed x Fixed Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

-      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : $T3</Size>

-          <ValuePointer>m_storage.m_data.array</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : $T3</Size>

-          <ValuePointer>m_storage.m_data.array</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- 2 x 2 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>

-      <DisplayString>[2, 2] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>

-  

-  <!-- 3 x 3 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>

-      <DisplayString>[3, 3] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>

-  

-  <!-- 4 x 4 Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>

-      <DisplayString>[4, 4] (fixed matrix)</DisplayString>

-      <Expand>

-        <Synthetic Name="[row 0]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 3]" Condition="Flags%2">

-          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>

-        </Synthetic>

-        <Synthetic Name="[row 3]" Condition="!(Flags%2)">

-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>

-        </Synthetic>        

-      </Expand>

-  </Type>  

-  

-  <!-- Dynamic x Dynamic Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      

-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Fixed x Dynamic Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic x Fixed Matrix -->

-  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>

-      <Expand>

-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

-          <Direction>Backward</Direction>

-          <Rank>2</Rank>

-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic Column Vector -->

-  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>

-      <Expand>

-        <Item Name="[size]">m_storage.m_cols</Item>

-        <ArrayItems>

-          <Size>m_storage.m_cols</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Dynamic Row Vector -->

-  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>

-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>

-      <Expand>

-        <Item Name="[size]">m_storage.m_rows</Item>

-        <ArrayItems>

-          <Size>m_storage.m_rows</Size>

-          <ValuePointer>m_storage.m_data</ValuePointer>

-        </ArrayItems>

-      </Expand>

-  </Type>

-  

-  <!-- Fixed Vector -->

-  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>

-      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-      </Expand>

-  </Type>

-  

-  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>

-      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-      </Expand>

-  </Type>

-  

-  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>

-      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-        <Item Name="[z]">m_storage.m_data.array[2]</Item>

-      </Expand>

-  </Type>

-  

-    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">

-      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>

-      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>

-      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

-      <Expand>

-        <Item Name="[x]">m_storage.m_data.array[0]</Item>

-        <Item Name="[y]">m_storage.m_data.array[1]</Item>

-        <Item Name="[z]">m_storage.m_data.array[2]</Item>

-        <Item Name="[w]">m_storage.m_data.array[3]</Item>

-      </Expand>

-  </Type>

-

-</AutoVisualizer>

+<?xml version="1.0" encoding="utf-8"?>
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+  <!-- Fixed x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- 2 x 2 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 3 x 3 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 4 x 4 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>  
+  
+  <!-- Dynamic x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Column Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_cols</Item>
+        <ArrayItems>
+          <Size>m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Row Vector -->
+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_rows</Item>
+        <ArrayItems>
+          <Size>m_storage.m_rows</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+      </Expand>
+  </Type>
+  
+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+        <Item Name="[w]">m_storage.m_data.array[3]</Item>
+      </Expand>
+  </Type>
+
+</AutoVisualizer>
diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 35ef580..273c10d 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@
-; ***************************************************************

-; * Eigen Visualizer

-; *

-; * Author: Hauke Heibel <hauke.heibel@gmail.com>

-; *

-; * Support the enhanced debugging of the following Eigen

-; * types (*: any, +:fixed dimension) :

-; *

-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>

-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>

-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>

-; * - Eigen::Matrix<*,-1,-1,*,*,*>

-; * - Eigen::Matrix<*,+,-1,*,*,*>

-; * - Eigen::Matrix<*,-1,+,*,*,*>

-; * - Eigen::Matrix<*,+,+,*,*,*>

-; *

-; * Matrices are displayed properly independently of the memory

-; * alignment (RowMajor vs. ColMajor).

-; *

-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure

-; * that your original autoexp.dat file is copied to a safe 

-; * place before proceeding with its modification.

-; ***************************************************************

-

-[Visualizer]

-

-; Fixed size 4-vectors

-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1],

-         z : ($c.m_storage.m_data.array)[2],

-         w : ($c.m_storage.m_data.array)[3]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        4,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 4),

-        ")"

-      )

-   )

-}

-

-; Fixed size 3-vectors

-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1],

-         z : ($c.m_storage.m_data.array)[2]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        3,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 3),

-        ")"

-      )

-   )

-}

-

-; Fixed size 2-vectors

-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0],

-         y : ($c.m_storage.m_data.array)[1]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        2,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 2),

-        ")"

-      )

-   )

-}

-

-; Fixed size 1-vectors

-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{

-   children

-   (

-      #(

-        [internals]: [$c,!],

-         x : ($c.m_storage.m_data.array)[0]

-      )

-   )

-

-   preview

-   (

-      #(

-        "[",

-        1,

-        "](",

-        #array(expr: $e.m_storage.m_data.array[$i], size: 1),

-        ")"

-      )

-   )

-}

-

-; Dynamic matrices (ColMajor and RowMajor support)

-Eigen::Matrix<*,-1,-1,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.m_storage.m_rows,

-         cols: $c.m_storage.m_cols,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.m_storage.m_rows,

-         ",",

-           $c.m_storage.m_cols,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.m_storage.m_rows*$c.m_storage.m_cols

-           ),

-         ")"

-      )

-   )

-}

-

-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,*,-1,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.RowsAtCompileTime,

-         cols: $c.m_storage.m_cols,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.RowsAtCompileTime,

-         ",",

-           $c.m_storage.m_cols,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols

-           ),

-         ")"

-      )

-   )

-}

-

-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,-1,*,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.m_storage.m_rows,

-         cols: $c.ColsAtCompileTime,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data)[$i],

-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.m_storage.m_rows,

-         ",",

-           $c.ColsAtCompileTime,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data)[$i],g],

-            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime

-           ),

-         ")"

-      )

-   )

-}

-

-; Fixed size matrix (ColMajor and RowMajor support)

-Eigen::Matrix<*,*,*,*,*,*>{

-  children

-   (

-      #(

-         [internals]: [$c,!],

-         rows: $c.RowsAtCompileTime,

-         cols: $c.ColsAtCompileTime,

-         ; Check for RowMajorBit

-         #if ($c.Flags & 0x1) (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

-             )

-         ) #else (

-             #array(

-                rank: 2,

-                base: 0,

-                expr: ($c.m_storage.m_data.array)[$i],

-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

-             )

-         )

-      )

-   )

-

-   preview

-   (

-     #(

-         "[",

-           $c.RowsAtCompileTime,

-         ",",

-           $c.ColsAtCompileTime,

-         "](",

-           #array(

-            expr :    [($c.m_storage.m_data.array)[$i],g],

-            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime

-           ),

-         ")"

-      )

-   )

-}

+; ***************************************************************
+; * Eigen Visualizer
+; *
+; * Author: Hauke Heibel <hauke.heibel@gmail.com>
+; *
+; * Support the enhanced debugging of the following Eigen
+; * types (*: any, +:fixed dimension) :
+; *
+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
+; * - Eigen::Matrix<*,-1,-1,*,*,*>
+; * - Eigen::Matrix<*,+,-1,*,*,*>
+; * - Eigen::Matrix<*,-1,+,*,*,*>
+; * - Eigen::Matrix<*,+,+,*,*,*>
+; *
+; * Matrices are displayed properly independently of the memory
+; * alignment (RowMajor vs. ColMajor).
+; *
+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
+; * that your original autoexp.dat file is copied to a safe 
+; * place before proceeding with its modification.
+; ***************************************************************
+
+[Visualizer]
+
+; Fixed size 4-vectors
+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1],
+         z : ($c.m_storage.m_data.array)[2],
+         w : ($c.m_storage.m_data.array)[3]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        4,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 4),
+        ")"
+      )
+   )
+}
+
+; Fixed size 3-vectors
+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1],
+         z : ($c.m_storage.m_data.array)[2]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        3,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 3),
+        ")"
+      )
+   )
+}
+
+; Fixed size 2-vectors
+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0],
+         y : ($c.m_storage.m_data.array)[1]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        2,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 2),
+        ")"
+      )
+   )
+}
+
+; Fixed size 1-vectors
+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
+   children
+   (
+      #(
+        [internals]: [$c,!],
+         x : ($c.m_storage.m_data.array)[0]
+      )
+   )
+
+   preview
+   (
+      #(
+        "[",
+        1,
+        "](",
+        #array(expr: $e.m_storage.m_data.array[$i], size: 1),
+        ")"
+      )
+   )
+}
+
+; Dynamic matrices (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,-1,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.m_storage.m_rows,
+         cols: $c.m_storage.m_cols,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.m_storage.m_rows,
+         ",",
+           $c.m_storage.m_cols,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.m_storage.m_rows*$c.m_storage.m_cols
+           ),
+         ")"
+      )
+   )
+}
+
+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,-1,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.RowsAtCompileTime,
+         cols: $c.m_storage.m_cols,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.RowsAtCompileTime,
+         ",",
+           $c.m_storage.m_cols,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols
+           ),
+         ")"
+      )
+   )
+}
+
+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,*,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.m_storage.m_rows,
+         cols: $c.ColsAtCompileTime,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data)[$i],
+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.m_storage.m_rows,
+         ",",
+           $c.ColsAtCompileTime,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data)[$i],g],
+            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime
+           ),
+         ")"
+      )
+   )
+}
+
+; Fixed size matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,*,*,*,*>{
+  children
+   (
+      #(
+         [internals]: [$c,!],
+         rows: $c.RowsAtCompileTime,
+         cols: $c.ColsAtCompileTime,
+         ; Check for RowMajorBit
+         #if ($c.Flags & 0x1) (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+             )
+         ) #else (
+             #array(
+                rank: 2,
+                base: 0,
+                expr: ($c.m_storage.m_data.array)[$i],
+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+             )
+         )
+      )
+   )
+
+   preview
+   (
+     #(
+         "[",
+           $c.RowsAtCompileTime,
+         ",",
+           $c.ColsAtCompileTime,
+         "](",
+           #array(
+            expr :    [($c.m_storage.m_data.array)[$i],g],
+            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime
+           ),
+         ")"
+      )
+   )
+}
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 439336c..0e582a7 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -112,11 +112,13 @@
 DISABLE_INDEX          = YES
 FULL_SIDEBAR           = NO
 ENUM_VALUES_PER_LINE   = 1
-FORMULA_FONTSIZE       = 12
+USE_MATHJAX            = @EIGEN_DOXY_USE_MATHJAX@
 MATHJAX_RELPATH        = https://cdn.jsdelivr.net/npm/mathjax@2
 MATHJAX_EXTENSIONS     = TeX/AMSmath \
                          TeX/AMSsymbols
 GENERATE_LATEX         = NO
+EXTRA_PACKAGES         = amssymb \
+                         amsmath
 MACRO_EXPANSION        = YES
 EXPAND_ONLY_PREDEF     = YES
 PREDEFINED             = EIGEN_EMPTY_STRUCT \
@@ -160,6 +162,7 @@
 TAGFILES               = ${EIGEN_DOXY_TAGFILES}
 GENERATE_TAGFILE       = ${Eigen_BINARY_DIR}/doc/${EIGEN_DOXY_PROJECT_NAME}.doxytags
 EXTERNAL_GROUPS        = NO
+EXTERNAL_PAGES         = NO
 HIDE_UNDOC_RELATIONS   = NO
 HAVE_DOT               = YES
 COLLABORATION_GRAPH    = NO
@@ -172,3 +175,4 @@
 DOT_GRAPH_MAX_NODES    = 300
 GENERATE_DEPRECATEDLIST = NO
 GENERATE_TODOLIST      = NO
+WARN_AS_ERROR          = FAIL_ON_WARNINGS_PRINT
diff --git a/test/CustomComplex.h b/test/CustomComplex.h
new file mode 100644
index 0000000..048f65b
--- /dev/null
+++ b/test/CustomComplex.h
@@ -0,0 +1,132 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2025 The Eigen Authors.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TEST_CUSTOM_COMPLEX_H
+#define EIGEN_TEST_CUSTOM_COMPLEX_H
+
+#include <ostream>
+#include <sstream>
+
+namespace custom_complex {
+
+template <typename Real>
+struct CustomComplex {
+  CustomComplex() : re{0}, im{0} {}
+  CustomComplex(const CustomComplex& other) = default;
+  CustomComplex(CustomComplex&& other) = default;
+  CustomComplex& operator=(const CustomComplex& other) = default;
+  CustomComplex& operator=(CustomComplex&& other) = default;
+  CustomComplex(Real x) : re{x}, im{0} {}
+  CustomComplex(Real x, Real y) : re{x}, im{y} {}
+
+  CustomComplex operator+(const CustomComplex& other) const { return CustomComplex(re + other.re, im + other.im); }
+
+  CustomComplex operator-() const { return CustomComplex(-re, -im); }
+
+  CustomComplex operator-(const CustomComplex& other) const { return CustomComplex(re - other.re, im - other.im); }
+
+  CustomComplex operator*(const CustomComplex& other) const {
+    return CustomComplex(re * other.re - im * other.im, re * other.im + im * other.re);
+  }
+
+  CustomComplex operator/(const CustomComplex& other) const {
+    // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),
+    // guards against over/under-flow.
+    const bool scale_imag = numext::abs(other.im) <= numext::abs(other.re);
+    const Real rscale = scale_imag ? Real(1) : other.re / other.im;
+    const Real iscale = scale_imag ? other.im / other.re : Real(1);
+    const Real denominator = other.re * rscale + other.im * iscale;
+    return CustomComplex((re * rscale + im * iscale) / denominator, (im * rscale - re * iscale) / denominator);
+  }
+
+  CustomComplex& operator+=(const CustomComplex& other) {
+    *this = *this + other;
+    return *this;
+  }
+  CustomComplex& operator-=(const CustomComplex& other) {
+    *this = *this - other;
+    return *this;
+  }
+  CustomComplex& operator*=(const CustomComplex& other) {
+    *this = *this * other;
+    return *this;
+  }
+  CustomComplex& operator/=(const CustomComplex& other) {
+    *this = *this / other;
+    return *this;
+  }
+
+  bool operator==(const CustomComplex& other) const {
+    return numext::equal_strict(re, other.re) && numext::equal_strict(im, other.im);
+  }
+  bool operator!=(const CustomComplex& other) const { return !(*this == other); }
+
+  friend CustomComplex operator+(const Real& a, const CustomComplex& b) { return CustomComplex(a + b.re, b.im); }
+
+  friend CustomComplex operator-(const Real& a, const CustomComplex& b) { return CustomComplex(a - b.re, -b.im); }
+
+  friend CustomComplex operator*(const Real& a, const CustomComplex& b) { return CustomComplex(a * b.re, a * b.im); }
+
+  friend CustomComplex operator*(const CustomComplex& a, const Real& b) { return CustomComplex(a.re * b, a.im * b); }
+
+  friend CustomComplex operator/(const CustomComplex& a, const Real& b) { return CustomComplex(a.re / b, a.im / b); }
+
+  friend std::ostream& operator<<(std::ostream& stream, const CustomComplex& x) {
+    std::stringstream ss;
+    ss << "(" << x.re << ", " << x.im << ")";
+    stream << ss.str();
+    return stream;
+  }
+
+  Real re;
+  Real im;
+};
+
+template <typename Real>
+Real real(const CustomComplex<Real>& x) {
+  return x.re;
+}
+template <typename Real>
+Real imag(const CustomComplex<Real>& x) {
+  return x.im;
+}
+template <typename Real>
+CustomComplex<Real> conj(const CustomComplex<Real>& x) {
+  return CustomComplex<Real>(x.re, -x.im);
+}
+template <typename Real>
+CustomComplex<Real> sqrt(const CustomComplex<Real>& x) {
+  return Eigen::internal::complex_sqrt(x);
+}
+template <typename Real>
+Real abs(const CustomComplex<Real>& x) {
+  return Eigen::numext::sqrt(x.re * x.re + x.im * x.im);
+}
+
+}  // namespace custom_complex
+
+template <typename Real>
+using CustomComplex = custom_complex::CustomComplex<Real>;
+
+namespace Eigen {
+template <typename Real>
+struct NumTraits<CustomComplex<Real>> : NumTraits<Real> {
+  enum { IsComplex = 1 };
+};
+
+namespace numext {
+template <typename Real>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const CustomComplex<Real>& x) {
+  return (numext::isfinite)(x.re) && (numext::isfinite)(x.im);
+}
+
+}  // namespace numext
+}  // namespace Eigen
+
+#endif  // EIGEN_TEST_CUSTOM_COMPLEX_H
diff --git a/test/bicgstab.cpp b/test/bicgstab.cpp
index 3f53e3e..7ff2f3d 100644
--- a/test/bicgstab.cpp
+++ b/test/bicgstab.cpp
@@ -26,8 +26,63 @@
   // CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ssor)     );
 }
 
+// https://gitlab.com/libeigen/eigen/-/issues/2856
+void test_2856() {
+  Eigen::MatrixXd D = Eigen::MatrixXd::Identity(14, 14);
+  D(6, 13) = 1;
+  D(13, 12) = 1;
+  using CSRMatrix = Eigen::SparseMatrix<double, Eigen::RowMajor>;
+  CSRMatrix A = D.sparseView();
+
+  Eigen::VectorXd b = Eigen::VectorXd::Zero(14);
+  b(12) = -1001;
+
+  Eigen::BiCGSTAB<CSRMatrix> solver;
+  solver.compute(A);
+  Eigen::VectorXd x = solver.solve(b);
+  Eigen::VectorXd expected = Eigen::VectorXd::Zero(14);
+  expected(6) = -1001;
+  expected(12) = -1001;
+  expected(13) = 1001;
+  VERIFY_IS_EQUAL(x, expected);
+
+  Eigen::VectorXd residual = b - A * x;
+  VERIFY(residual.isZero());
+}
+
+// https://gitlab.com/libeigen/eigen/-/issues/2899
+void test_2899() {
+  Eigen::MatrixXd A(4, 4);
+  A(0, 0) = 1;
+  A(1, 0) = -1.0 / 6;
+  A(1, 1) = 2.0 / 3;
+  A(1, 2) = -1.0 / 6;
+  A(1, 3) = -1.0 / 3;
+  A(2, 1) = -1.0 / 3;
+  A(2, 2) = 1;
+  A(2, 3) = -2.0 / 3;
+  A(3, 1) = -1.0 / 3;
+  A(3, 2) = -1.0 / 3;
+  A(3, 3) = 2.0 / 3;
+  Eigen::VectorXd b(4);
+  b(0) = 0;
+  b(1) = 1;
+  b(2) = 1;
+  b(3) = 1;
+  Eigen::BiCGSTAB<Eigen::MatrixXd> solver;
+  solver.compute(A);
+  Eigen::VectorXd x = solver.solve(b);
+  Eigen::VectorXd expected(4);
+  expected << 0, 15, 18, 18;
+  VERIFY_IS_APPROX(x, expected);
+  Eigen::VectorXd residual = b - A * x;
+  VERIFY(residual.isZero());
+}
+
 EIGEN_DECLARE_TEST(bicgstab) {
   CALL_SUBTEST_1((test_bicgstab_T<double, int>()));
   CALL_SUBTEST_2((test_bicgstab_T<std::complex<double>, int>()));
   CALL_SUBTEST_3((test_bicgstab_T<double, long int>()));
+  CALL_SUBTEST_4(test_2856());
+  CALL_SUBTEST_5(test_2899());
 }
diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp
index afb24b9..76846a9 100644
--- a/test/eigensolver_complex.cpp
+++ b/test/eigensolver_complex.cpp
@@ -12,6 +12,7 @@
 #include <limits>
 #include <Eigen/Eigenvalues>
 #include <Eigen/LU>
+#include "CustomComplex.h"
 
 template <typename MatrixType>
 bool find_pivot(typename MatrixType::Scalar tol, MatrixType& diffs, Index col = 0) {
@@ -165,5 +166,8 @@
   // Test problem size constructors
   CALL_SUBTEST_5(ComplexEigenSolver<MatrixXf> tmp(s));
 
+  // Test custom complex scalar type.
+  CALL_SUBTEST_6(eigensolver(Matrix<CustomComplex<double>, 5, 5>()));
+
   TEST_SET_BUT_UNUSED_VARIABLE(s)
 }
diff --git a/test/lu.cpp b/test/lu.cpp
index 1792c2b..b20bcfc 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -195,6 +195,29 @@
   VERIFY_RAISES_ASSERT(plu.inverse())
 }
 
+// Rank-deficient matrix returns 0.
+// https://gitlab.com/libeigen/eigen/-/issues/2889
+void test_2889() {
+  Eigen::MatrixXd A =
+      Eigen::MatrixXd({{0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000,
+                        0.34149999916553497, 0.0000000000000000, 0.79877008515664061},
+                       {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.29200000315904617,
+                        0.0000000000000000, -0.37149999849498272, -0.16425902650844920},
+                       {0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000,
+                        0.34149999916553497, 0.0000000000000000, 0.79877008515664061},
+                       {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.040500000119209290,
+                        0.0000000000000000, -0.30099999904632568, -0.081170580429391403},
+                       {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000,
+                        0.0000000000000000, 0.0000000000000000, -0.0000000000000000},
+                       {0.0000000000000000, 0.70710689672598170, 0.70710666564709435, 0.027000000700354562,
+                        0.025455838867477515, -0.025455847186317101, -0.0068972271572272821},
+                       {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000,
+                        0.0000000000000000, 0.0000000000000000, -0.0000000000000000}});
+  Eigen::FullPivLU<Eigen::MatrixXd> lu_factorization(A);
+  double rcond = lu_factorization.rcond();
+  VERIFY_IS_EQUAL(rcond, 0.0);
+}
+
 EIGEN_DECLARE_TEST(lu) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1(lu_non_invertible<Matrix3f>());
@@ -229,7 +252,9 @@
     CALL_SUBTEST_7((lu_non_invertible<Matrix<float, Dynamic, 16> >()));
 
     // Test problem size constructors
-    CALL_SUBTEST_9(PartialPivLU<MatrixXf>(10));
-    CALL_SUBTEST_9(FullPivLU<MatrixXf>(10, 20););
+    CALL_SUBTEST_8(PartialPivLU<MatrixXf>(10));
+    CALL_SUBTEST_8(FullPivLU<MatrixXf>(10, 20););
+
+    CALL_SUBTEST_9(test_2889());
   }
 }
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index dc1a5c7..724fa40 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -251,6 +251,24 @@
                         (Matrix1::Flags & RowMajorBit) ? PacketSize : 2 > (1, 2), DefaultTraversal, CompleteUnrolling));
     }
 
+    // the actual packet type used by the assignment evaluator is not necessarily PacketType for small fixed-size arrays
+    if (internal::unpacket_traits<typename internal::find_best_packet<Scalar, 2>::type>::size > 2) {
+      // the expression should not be vectorized if the size is too small
+      using Vector2 = Matrix<Scalar, 2, 1, ColMajor>;
+      using VectorMax3 = Matrix<Scalar, Dynamic, 1, ColMajor, 3, 1>;
+      VERIFY(test_assign(Vector2(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+      VERIFY(test_assign(VectorMax3(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+      VERIFY(test_assign(Vector2(), VectorMax3(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+      VERIFY(test_assign(VectorMax3(), VectorMax3(), LinearTraversal, NoUnrolling));
+    }
+
+    if (PacketSize > 1 && PacketSize < 8) {
+      // the size of the expression should be deduced at compile time by considering both the lhs and rhs
+      using Lhs = Matrix<Scalar, 7, Dynamic, ColMajor>;
+      using Rhs = Matrix<Scalar, Dynamic, 7, ColMajor>;
+      VERIFY(test_assign(Lhs(), Rhs(), -1, InnerUnrolling + CompleteUnrolling));
+    }
+
     VERIFY(
         test_redux(Matrix44c().template block<2 * PacketSize, 1>(1, 2), LinearVectorizedTraversal, CompleteUnrolling));
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 2a59530..aad1647 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -27,6 +27,10 @@
   static constexpr int NumDimensions = XprTraits::NumDimensions;
   static constexpr int Layout = XprTraits::Layout;
   typedef typename XprTraits::PointerType PointerType;
+  enum {
+    // Broadcast is read-only.
+    Flags = traits<XprType>::Flags & ~LvalueBit
+  };
 };
 
 template <typename Broadcast, typename XprType>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index 7521559..b9d6f37 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -15,7 +15,7 @@
 
 namespace Eigen {
 
-template <bool NeedUprade>
+template <bool IsReal>
 struct MakeComplex {
   template <typename T>
   EIGEN_DEVICE_FUNC T operator()(const T& val) const {
@@ -26,16 +26,8 @@
 template <>
 struct MakeComplex<true> {
   template <typename T>
-  EIGEN_DEVICE_FUNC std::complex<T> operator()(const T& val) const {
-    return std::complex<T>(val, 0);
-  }
-};
-
-template <>
-struct MakeComplex<false> {
-  template <typename T>
-  EIGEN_DEVICE_FUNC std::complex<T> operator()(const std::complex<T>& val) const {
-    return val;
+  EIGEN_DEVICE_FUNC internal::make_complex_t<T> operator()(const T& val) const {
+    return internal::make_complex_t<T>(val, T(0));
   }
 };
 
@@ -49,17 +41,17 @@
 
 template <>
 struct PartOf<RealPart> {
-  template <typename T>
-  T operator()(const std::complex<T>& val) const {
-    return val.real();
+  template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
+  typename NumTraits<T>::Real operator()(const T& val) const {
+    return Eigen::numext::real(val);
   }
 };
 
 template <>
 struct PartOf<ImagPart> {
-  template <typename T>
-  T operator()(const std::complex<T>& val) const {
-    return val.imag();
+  template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
+  typename NumTraits<T>::Real operator()(const T& val) const {
+    return Eigen::numext::imag(val);
   }
 };
 
@@ -67,8 +59,9 @@
 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
   typedef traits<XprType> XprTraits;
-  typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
+  typedef typename XprTraits::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef make_complex_t<Scalar> ComplexScalar;
   typedef typename XprTraits::Scalar InputScalar;
   typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
       OutputScalar;
@@ -109,7 +102,7 @@
  public:
   typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
       OutputScalar;
   typedef OutputScalar CoeffReturnType;
@@ -137,7 +130,7 @@
   typedef DSizes<Index, NumDims> Dimensions;
   typedef typename XprType::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
   typedef internal::traits<XprType> XprTraits;
   typedef typename XprTraits::Scalar InputScalar;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
index 7061f51..e12923d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -52,7 +52,13 @@
   typedef TensorEvaluator<Expr, Device> EvalType;
 
   TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
-    m_dims = m_impl.dimensions();
+    EIGEN_STATIC_ASSERT(
+        internal::array_size<Dimensions>::value == internal::array_size<typename EvalType::Dimensions>::value,
+        "Dimension sizes must match.");
+    const auto& other_dims = m_impl.dimensions();
+    for (std::size_t i = 0; i < m_dims.size(); ++i) {
+      m_dims[i] = other_dims[i];
+    }
     m_impl.evalSubExprsIfNeeded(NULL);
   }
   virtual ~TensorLazyEvaluatorReadOnly() { m_impl.cleanup(); }
@@ -86,14 +92,12 @@
   EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { return this->m_impl.coeffRef(index); }
 };
 
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value),
-                                                      TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                                                      TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > {
+template <typename Dimensions, typename Expr, typename Device, bool IsWritable>
+class TensorLazyEvaluator : public std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
+                                                      TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>> {
  public:
-  typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value),
-                             TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
-                             TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >
+  typedef std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
+                             TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>>
       Base;
   typedef typename Base::Scalar Scalar;
 
@@ -101,24 +105,15 @@
   virtual ~TensorLazyEvaluator() {}
 };
 
-}  // namespace internal
-
-/** \class TensorRef
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A reference to a tensor expression
- * The expression will be evaluated lazily (as much as possible).
- *
- */
-template <typename PlainObjectType>
-class TensorRef : public TensorBase<TensorRef<PlainObjectType> > {
+template <typename Derived>
+class TensorRefBase : public TensorBase<Derived> {
  public:
-  typedef TensorRef<PlainObjectType> Self;
+  typedef typename traits<Derived>::PlainObjectType PlainObjectType;
   typedef typename PlainObjectType::Base Base;
-  typedef typename Eigen::internal::nested<Self>::type Nested;
-  typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
-  typedef typename internal::traits<PlainObjectType>::Index Index;
-  typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
+  typedef typename Eigen::internal::nested<Derived>::type Nested;
+  typedef typename traits<PlainObjectType>::StorageKind StorageKind;
+  typedef typename traits<PlainObjectType>::Index Index;
+  typedef typename traits<PlainObjectType>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef typename Base::CoeffReturnType CoeffReturnType;
   typedef Scalar* PointerType;
@@ -138,33 +133,17 @@
   };
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -----------===//
-  typedef internal::TensorBlockNotImplemented TensorBlock;
+  typedef TensorBlockNotImplemented TensorBlock;
   //===------------------------------------------------------------------===//
 
-  EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {}
+  EIGEN_STRONG_INLINE TensorRefBase() : m_evaluator(NULL) {}
 
-  template <typename Expression>
-  EIGEN_STRONG_INLINE TensorRef(const Expression& expr)
-      : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) {
-    m_evaluator->incrRefCount();
-  }
-
-  template <typename Expression>
-  EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
-    unrefEvaluator();
-    m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice());
-    m_evaluator->incrRefCount();
-    return *this;
-  }
-
-  ~TensorRef() { unrefEvaluator(); }
-
-  TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) {
+  TensorRefBase(const TensorRefBase& other) : TensorBase<Derived>(other), m_evaluator(other.m_evaluator) {
     eigen_assert(m_evaluator->refCount() > 0);
     m_evaluator->incrRefCount();
   }
 
-  TensorRef& operator=(const TensorRef& other) {
+  TensorRefBase& operator=(const TensorRefBase& other) {
     if (this != &other) {
       unrefEvaluator();
       m_evaluator = other.m_evaluator;
@@ -174,6 +153,28 @@
     return *this;
   }
 
+  template <typename Expression,
+            typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>>
+  EIGEN_STRONG_INLINE TensorRefBase(const Expression& expr)
+      : m_evaluator(new TensorLazyEvaluator<Dimensions, Expression, DefaultDevice,
+                                            /*IsWritable=*/!std::is_const<PlainObjectType>::value &&
+                                                bool(is_lvalue<Expression>::value)>(expr, DefaultDevice())) {
+    m_evaluator->incrRefCount();
+  }
+
+  template <typename Expression,
+            typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>>
+  EIGEN_STRONG_INLINE TensorRefBase& operator=(const Expression& expr) {
+    unrefEvaluator();
+    m_evaluator = new TensorLazyEvaluator < Dimensions, Expression, DefaultDevice,
+    /*IsWritable=*/!std::is_const<PlainObjectType>::value&& bool(is_lvalue<Expression>::value) >
+        (expr, DefaultDevice());
+    m_evaluator->incrRefCount();
+    return *this;
+  }
+
+  ~TensorRefBase() { unrefEvaluator(); }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); }
@@ -188,12 +189,6 @@
     const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
     return coeff(indices);
   }
-  template <typename... IndexTypes>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) {
-    const std::size_t num_indices = (sizeof...(otherIndices) + 1);
-    const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
-    return coeffRef(indices);
-  }
 
   template <std::size_t NumIndices>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const {
@@ -212,6 +207,70 @@
     }
     return m_evaluator->coeff(index);
   }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); }
+
+ protected:
+  TensorLazyBaseEvaluator<Dimensions, Scalar>* evaluator() { return m_evaluator; }
+
+ private:
+  EIGEN_STRONG_INLINE void unrefEvaluator() {
+    if (m_evaluator) {
+      m_evaluator->decrRefCount();
+      if (m_evaluator->refCount() == 0) {
+        delete m_evaluator;
+      }
+    }
+  }
+
+  TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
+};
+
+}  // namespace internal
+
+/**
+ * \ingroup CXX11_Tensor_Module
+ *
+ * \brief A reference to a tensor expression
+ * The expression will be evaluated lazily (as much as possible).
+ *
+ */
+template <typename PlainObjectType>
+class TensorRef : public internal::TensorRefBase<TensorRef<PlainObjectType>> {
+  typedef internal::TensorRefBase<TensorRef<PlainObjectType>> Base;
+
+ public:
+  using Scalar = typename Base::Scalar;
+  using Dimensions = typename Base::Dimensions;
+
+  EIGEN_STRONG_INLINE TensorRef() : Base() {}
+
+  template <typename Expression>
+  EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {
+    EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
+                        "Expression must be mutable to create a mutable TensorRef<Expression>.  Did you mean "
+                        "TensorRef<const Expression>?)");
+  }
+
+  template <typename Expression>
+  EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
+    EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
+                        "Expression must be mutable to create a mutable TensorRef<Expression>.  Did you mean "
+                        "TensorRef<const Expression>?)");
+    return Base::operator=(expr).derived();
+  }
+
+  TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
+
+  template <typename... IndexTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) {
+    const std::size_t num_indices = (sizeof...(otherIndices) + 1);
+    const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
+    return coeffRef(indices);
+  }
+
   template <std::size_t NumIndices>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) {
     const Dimensions& dims = this->dimensions();
@@ -227,24 +286,37 @@
         index = index * dims[i] + indices[i];
       }
     }
-    return m_evaluator->coeffRef(index);
+    return Base::evaluator()->coeffRef(index);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return Base::evaluator()->coeffRef(index); }
+};
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); }
+/**
+ * \ingroup CXX11_Tensor_Module
+ *
+ * \brief A reference to a constant tensor expression
+ * The expression will be evaluated lazily (as much as possible).
+ *
+ */
+template <typename PlainObjectType>
+class TensorRef<const PlainObjectType> : public internal::TensorRefBase<TensorRef<const PlainObjectType>> {
+  typedef internal::TensorRefBase<TensorRef<const PlainObjectType>> Base;
 
- private:
-  EIGEN_STRONG_INLINE void unrefEvaluator() {
-    if (m_evaluator) {
-      m_evaluator->decrRefCount();
-      if (m_evaluator->refCount() == 0) {
-        delete m_evaluator;
-      }
-    }
+ public:
+  EIGEN_STRONG_INLINE TensorRef() : Base() {}
+
+  template <typename Expression>
+  EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {}
+
+  template <typename Expression>
+  EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
+    return Base::operator=(expr).derived();
   }
 
-  internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
+  TensorRef(const TensorRef& other) : Base(other) {}
+
+  TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
 };
 
 // evaluator for rvalues
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 017b4ff..f5954d6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -95,8 +95,9 @@
   typedef typename MakePointer<Scalar>::Type PointerType;
 };
 
-template <typename PlainObjectType>
-struct traits<TensorRef<PlainObjectType> > : public traits<PlainObjectType> {
+template <typename PlainObjectType_>
+struct traits<TensorRef<PlainObjectType_> > : public traits<PlainObjectType_> {
+  typedef PlainObjectType_ PlainObjectType;
   typedef traits<PlainObjectType> BaseTraits;
   typedef typename BaseTraits::Scalar Scalar;
   typedef typename BaseTraits::StorageKind StorageKind;
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 182bd2e..6f6df3e 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -111,12 +111,13 @@
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::StorageIndex StorageIndex;
   typedef typename MatrixType::RealScalar RealScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef Preconditioner_ Preconditioner;
   typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
   typedef Matrix<RealScalar, Dynamic, Dynamic> DenseRealMatrix;
   typedef Matrix<Scalar, Dynamic, 1> DenseVector;
   typedef Matrix<RealScalar, Dynamic, 1> DenseRealVector;
-  typedef Matrix<std::complex<RealScalar>, Dynamic, 1> ComplexVector;
+  typedef Matrix<ComplexScalar, Dynamic, 1> ComplexVector;
 
   /** Default constructor. */
   DGMRES()
@@ -389,15 +390,15 @@
   Index j = 0;
   while (j < it - 1) {
     if (T(j + 1, j) == Scalar(0)) {
-      eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0));
+      eig(j) = ComplexScalar(T(j, j), RealScalar(0));
       j++;
     } else {
-      eig(j) = std::complex<RealScalar>(T(j, j), T(j + 1, j));
-      eig(j + 1) = std::complex<RealScalar>(T(j, j + 1), T(j + 1, j + 1));
+      eig(j) = ComplexScalar(T(j, j), T(j + 1, j));
+      eig(j + 1) = ComplexScalar(T(j, j + 1), T(j + 1, j + 1));
       j++;
     }
   }
-  if (j < it - 1) eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0));
+  if (j < it - 1) eig(j) = ComplexScalar(T(j, j), RealScalar(0));
   return eig;
 }
 
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index ff955e1..a28aa96 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -23,8 +23,10 @@
  *
  * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$.
  */
-template <typename RealScalar>
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct MatrixExponentialScalingOp {
+  using RealScalar = typename NumTraits<Scalar>::Real;
+
   /** \brief Constructor.
    *
    * \param[in] squarings  The integer \f$ s \f$ in this document.
@@ -35,20 +37,30 @@
    *
    * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
    */
-  inline const RealScalar operator()(const RealScalar& x) const {
+  inline const Scalar operator()(const Scalar& x) const {
     using std::ldexp;
-    return ldexp(x, -m_squarings);
+    return Scalar(ldexp(Eigen::numext::real(x), -m_squarings), ldexp(Eigen::numext::imag(x), -m_squarings));
   }
 
-  typedef std::complex<RealScalar> ComplexScalar;
+ private:
+  int m_squarings;
+};
+
+template <typename Scalar>
+struct MatrixExponentialScalingOp<Scalar, /*IsComplex=*/false> {
+  /** \brief Constructor.
+   *
+   * \param[in] squarings  The integer \f$ s \f$ in this document.
+   */
+  MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) {}
 
   /** \brief Scale a matrix coefficient.
    *
    * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
    */
-  inline const ComplexScalar operator()(const ComplexScalar& x) const {
+  inline const Scalar operator()(const Scalar& x) const {
     using std::ldexp;
-    return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings));
+    return ldexp(x, -m_squarings);
   }
 
  private:
@@ -220,6 +232,7 @@
 
 template <typename MatrixType>
 struct matrix_exp_computeUV<MatrixType, float> {
+  using Scalar = typename traits<MatrixType>::Scalar;
   template <typename ArgType>
   static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
     using std::frexp;
@@ -234,7 +247,7 @@
       const float maxnorm = 3.925724783138660f;
       frexp(l1norm / maxnorm, &squarings);
       if (squarings < 0) squarings = 0;
-      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings));
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
       matrix_exp_pade7(A, U, V);
     }
   }
@@ -242,12 +255,12 @@
 
 template <typename MatrixType>
 struct matrix_exp_computeUV<MatrixType, double> {
-  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  using Scalar = typename traits<MatrixType>::Scalar;
   template <typename ArgType>
   static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
     using std::frexp;
     using std::pow;
-    const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    const double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
     squarings = 0;
     if (l1norm < 1.495585217958292e-002) {
       matrix_exp_pade3(arg, U, V);
@@ -258,10 +271,10 @@
     } else if (l1norm < 2.097847961257068e+000) {
       matrix_exp_pade9(arg, U, V);
     } else {
-      const RealScalar maxnorm = 5.371920351148152;
+      const double maxnorm = 5.371920351148152;
       frexp(l1norm / maxnorm, &squarings);
       if (squarings < 0) squarings = 0;
-      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings));
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
       matrix_exp_pade13(A, U, V);
     }
   }
@@ -271,6 +284,7 @@
 struct matrix_exp_computeUV<MatrixType, long double> {
   template <typename ArgType>
   static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
+    using Scalar = typename traits<MatrixType>::Scalar;
 #if LDBL_MANT_DIG == 53  // double precision
     matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings);
 
@@ -295,7 +309,7 @@
       const long double maxnorm = 4.0246098906697353063L;
       frexp(l1norm / maxnorm, &squarings);
       if (squarings < 0) squarings = 0;
-      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
       matrix_exp_pade13(A, U, V);
     }
 
@@ -315,7 +329,7 @@
       const long double maxnorm = 3.2579440895405400856599663723517L;
       frexp(l1norm / maxnorm, &squarings);
       if (squarings < 0) squarings = 0;
-      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
       matrix_exp_pade17(A, U, V);
     }
 
@@ -335,7 +349,7 @@
       const long double maxnorm = 2.884233277829519311757165057717815L;
       frexp(l1norm / maxnorm, &squarings);
       if (squarings < 0) squarings = 0;
-      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
       matrix_exp_pade17(A, U, V);
     }
 
@@ -382,9 +396,7 @@
 void matrix_exp_compute(const ArgType& arg, ResultType& result, false_type)  // default
 {
   typedef typename ArgType::PlainObject MatrixType;
-  typedef typename traits<MatrixType>::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef typename std::complex<RealScalar> ComplexScalar;
+  typedef make_complex_t<typename traits<MatrixType>::Scalar> ComplexScalar;
   result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
 }
 
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 68336a5..0c18ad6 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -382,7 +382,7 @@
     static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime;
     static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime;
 
-    typedef std::complex<Scalar> ComplexScalar;
+    typedef internal::make_complex_t<Scalar> ComplexScalar;
     typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix;
 
     ComplexMatrix CA = A.template cast<ComplexScalar>();
@@ -476,7 +476,7 @@
     typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
     typedef internal::remove_all_t<NestedEvalType> NestedEvalTypeClean;
     typedef internal::traits<NestedEvalTypeClean> Traits;
-    typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+    typedef internal::make_complex_t<Scalar> ComplexScalar;
     typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime>
         DynMatrixType;
 
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index 4228166..398971e 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -330,7 +330,7 @@
     typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
     typedef internal::remove_all_t<DerivedEvalType> DerivedEvalTypeClean;
     typedef internal::traits<DerivedEvalTypeClean> Traits;
-    typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+    typedef internal::make_complex_t<Scalar> ComplexScalar;
     typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime>
         DynMatrixType;
     typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index bff619a..a420ee7 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -91,7 +91,7 @@
   enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime };
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef Block<MatrixType, Dynamic, Dynamic> ResultType;
 
   const MatrixType& m_A;
@@ -380,7 +380,7 @@
   Index cols() const { return m_A.cols(); }
 
  private:
-  typedef std::complex<RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<Scalar> ComplexScalar;
   typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime>
       ComplexMatrix;
 
@@ -628,7 +628,7 @@
 class MatrixComplexPowerReturnValue : public ReturnByValue<MatrixComplexPowerReturnValue<Derived> > {
  public:
   typedef typename Derived::PlainObject PlainObject;
-  typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
+  typedef internal::make_complex_t<typename Derived::Scalar> ComplexScalar;
 
   /**
    * \brief Constructor.
@@ -685,7 +685,7 @@
 }
 
 template <typename Derived>
-const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const {
+const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const internal::make_complex_t<Scalar>& p) const {
   return MatrixComplexPowerReturnValue<Derived>(derived(), p);
 }
 
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index 8c0ce3b..aa357a4 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -35,7 +35,7 @@
 
   typedef Scalar_ Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef std::complex<RealScalar> RootType;
+  typedef internal::make_complex_t<Scalar> RootType;
   typedef Matrix<RootType, Deg_, 1> RootsType;
 
   typedef DenseIndex Index;
@@ -308,7 +308,7 @@
   typedef std::conditional_t<NumTraits<Scalar>::IsComplex, ComplexEigenSolver<CompanionMatrixType>,
                              EigenSolver<CompanionMatrixType> >
       EigenSolverType;
-  typedef std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> > ComplexScalar;
+  typedef internal::make_complex_t<Scalar_> ComplexScalar;
 
  public:
   /** Computes the complex roots of a new polynomial. */
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index 0672572..55d4291 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -138,7 +138,7 @@
   TensorMap<Tensor<const T, 1>> m(b, 1);
   DSizes<DenseIndex, 1> offsets;
   offsets[0] = 0;
-  TensorRef<Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions()));
+  TensorRef<const Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions()));
   VERIFY_IS_EQUAL(slice_ref(0), 42);
 }
 
diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp
index d5ff196..cf09749 100644
--- a/unsupported/test/cxx11_tensor_ref.cpp
+++ b/unsupported/test/cxx11_tensor_ref.cpp
@@ -49,8 +49,8 @@
   Tensor<int, 1> input2(6);
   input2.setRandom();
 
-  TensorRef<Tensor<int, 1>> ref3(input1 + input2);
-  TensorRef<Tensor<int, 1>> ref4 = input1 + input2;
+  TensorRef<const Tensor<int, 1>> ref3(input1 + input2);
+  TensorRef<const Tensor<int, 1>> ref4 = input1 + input2;
 
   VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data());
   VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data());
@@ -144,7 +144,7 @@
 
   Tensor<float, 3> result(3, 5, 7);
   result.setRandom();
-  TensorRef<Tensor<float, 3>> result_ref(result);
+  TensorRef<const Tensor<float, 3>> result_ref(result);
 
   Tensor<float, 3> bias(3, 5, 7);
   bias.setRandom();
@@ -192,7 +192,7 @@
   paddings[2] = std::make_pair(3, 4);
   paddings[3] = std::make_pair(0, 0);
   DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3);
-  TensorRef<Tensor<const float, 4>> ref(m.pad(paddings));
+  TensorRef<const Tensor<const float, 4>> ref(m.pad(paddings));
   array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial;
   trivial[0] = std::make_pair(0, 0);
   trivial[1] = std::make_pair(0, 0);