Update Eigen to commit:a30ecb7221a46824b85cad5f9016efe6e5871d69 CHANGELOG ========= a30ecb722 - Don't use the fast implementation if EIGEN_GPU_CC, since integer_packet is not defined for float4 used by the GPU compiler (even on host). 5a0a165c0 - fix broken asserts 0b5873893 - Fix two corner cases in the new implementation of logistic sigmoid. 5d7ffe2ca - drop COPYING.GPL a32e6a404 - Explicit type casting 8d81a2339 - Reduce usage of reserved names c61b3cb0d - Fix IterativeSolverBase referring to itself as ConjugateGradient 80ccacc71 - Fix accuracy of logistic sigmoid 8b8125c57 - Make sure the scalar and vectorized path for array.exp() return consistent values. c9df98b07 - Fix Gcc8.5 warning about missing base class initialisation (#2404) 47eac2107 - Make fixed-size Matrix and Array trivially copyable after C++20 c4b1dd2f6 - Add support for Cray, Fujitsu, and Intel ICX compilers 96dc37a03 - Some fixes/cleanups for numeric_limits & fix for related bug in psqrt ed27d988c - Fixes #i2411 7b5a8b6bc - Improve plog: 20% speedup for float + handle denormals a491c7f89 - Allow specifying inner & outer stride for CWiseUnaryView - fixes #2398 27a78e4f9 - Some serialization API changes were made in commit... 9210e71fb - ensure that eigen::internal::size is not found by ADL, rename to ssize and... 7244a74ab - Add bounds checking to Eigen serializer ba91839d7 - Remove user survey from Doxygen header a4098ac67 - Fix duplicate include guard *ALTIVEC_H -> *ZVECTOR_H 22a347b9d - Remove unused EIGEN_HAS_STATIC_ARRAY_TEMPLATE d705eb5f8 - Revert "Select AVX2 even if the data size is not a multiple of 8" 8eab7b688 - Improve exp<float>(): Don't flush denormal results +4% speedup. 6e95c0cd9 - Add missing internal namespace d3675b2e7 - Add vectorization_logic_1 test to list of CI smoketests c06c3e52a - Include immintrin.h if F16C is available and vectorization is disabled f7a056bf0 - Small fixes 2a6594de2 - Small cleanup of GDB pretty printer code dee6428a7 - fixed clang warnings about alignment change and floating point precision d0b4b75fb - Simplify logical_xor() e93a07177 - Fix a bug introduced in !751. PiperOrigin-RevId: 423164395 Change-Id: I8c7351eccfca04dccb07ca7ec6cfb0cbb3cd5a83

commit: e783d933fde3ec0640a5bd254ddbb3ae5d23a726 [log] [tgz]
author: Rasmus Munk Larsen <rmlarsen@google.com> Thu Jan 20 14:51:11 2022 -0800
committer: Antonio Sanchez <cantonios@google.com> Mon Sep 26 15:57:10 2022 -0700
tree: 825873f71c9b7e7666a4b19b93dacd1120153ddc
parent: 51987b83142c8ddc4e6cfd6ef3c14dc4bab140e2 [diff]
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index b652852..1242d19 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h

@@ -21,6 +21,10 @@
   typedef ArrayXpr XprKind;
   typedef ArrayBase<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > XprBase;
 };
+
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct has_trivially_copyable_storage<Array<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
+    : has_trivially_copyable_storage<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> > {};
 }
 
 /** \class Array
@@ -120,6 +124,12 @@
       return Base::_set(other);
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(
+        const Array& other) requires internal::has_trivially_copyable_storage<Array>::value = default;
+#endif
+
     /** Default constructor.
       *
       * For fixed-size matrices, does nothing.
@@ -159,6 +169,13 @@
       return *this;
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC Array(Array&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Array>::value = default;
+    EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Array>::value = default;
+#endif
+
     /** \copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
      *
      * Example: \include Array_variadic_ctor_cxx11.cpp
@@ -266,6 +283,12 @@
             : Base(other)
     { }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const Array& other) requires internal::has_trivially_copyable_storage<Array>::value =
+        default;
+#endif
+
   private:
     struct PrivateType {};
   public:

diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 28397e5..2d6fe42 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h

@@ -112,6 +112,11 @@
       return derived();
     }
     
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayBase& operator=(
+        const ArrayBase& other) requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     /** Set all the entries to \a value.
       * \sa DenseBase::setConstant(), DenseBase::fill() */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE

diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index a8d8b19..913a967 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h

@@ -235,17 +235,17 @@
     internal::variable_if_dynamic<Index, Subs>   m_subs;
 };
 
-template<typename _CoefficientsType,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
 class BandMatrixWrapper;
 
-template<typename _CoefficientsType,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
-struct traits<BandMatrixWrapper<_CoefficientsType,Rows_,Cols_,Supers_,Subs_,Options_> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+struct traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
-  typedef typename _CoefficientsType::Scalar Scalar;
-  typedef typename _CoefficientsType::StorageKind StorageKind;
-  typedef typename _CoefficientsType::StorageIndex StorageIndex;
+  typedef typename CoefficientsType_::Scalar Scalar;
+  typedef typename CoefficientsType_::StorageKind StorageKind;
+  typedef typename CoefficientsType_::StorageIndex StorageIndex;
   enum {
-    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
+    CoeffReadCost = internal::traits<CoefficientsType_>::CoeffReadCost,
     RowsAtCompileTime = Rows_,
     ColsAtCompileTime = Cols_,
     MaxRowsAtCompileTime = Rows_,
@@ -256,11 +256,11 @@
     Options = Options_,
     DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
   };
-  typedef _CoefficientsType CoefficientsType;
+  typedef CoefficientsType_ CoefficientsType;
 };
 
-template<typename _CoefficientsType,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
-class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,Rows_,Cols_,Supers_,Subs_,Options_> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
   public:
 
@@ -339,9 +339,9 @@
   typedef BandShape Shape;
 };
 
-template<typename _CoefficientsType,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
-struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,Rows_,Cols_,Supers_,Subs_,Options_> >
-  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,Rows_,Cols_,Supers_,Subs_,Options_> >
+template<typename CoefficientsType_,int Rows_, int Cols_, int Supers_, int Subs_,int Options_>
+struct evaluator_traits<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
+  : public evaluator_traits_base<BandMatrixWrapper<CoefficientsType_,Rows_,Cols_,Supers_,Subs_,Options_> >
 {
   typedef BandShape Shape;
 };

diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 585323c..afbea86 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h

@@ -23,7 +23,7 @@
   typedef typename traits<XprType>::StorageKind StorageKind;
   typedef typename traits<XprType>::XprKind XprKind;
   typedef typename ref_selector<XprType>::type XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  typedef typename remove_reference<XprTypeNested>::type XprTypeNested_;
   enum{
     MatrixRows = traits<XprType>::RowsAtCompileTime,
     MatrixCols = traits<XprType>::ColsAtCompileTime,

diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 4a20312..ac8c8e5 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h

@@ -812,11 +812,11 @@
 
 // -------------------- CwiseUnaryView --------------------
 
-template<typename UnaryOp, typename ArgType>
-struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
-  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
+template<typename UnaryOp, typename ArgType, typename StrideType>
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType, StrideType>, IndexBased>
+  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType> >
 {
-  typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+  typedef CwiseUnaryView<UnaryOp, ArgType, StrideType> XprType;
 
   enum {
     CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
@@ -902,7 +902,8 @@
       m_innerStride(map.innerStride()),
       m_outerStride(map.outerStride())
   {
-    EIGEN_STATIC_ASSERT(check_implication(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+    EIGEN_STATIC_ASSERT(check_implication((evaluator<Derived>::Flags & PacketAccessBit) != 0,
+                                          internal::inner_stride_at_compile_time<Derived>::ret == 1),
                         PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }

diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index a3828d1..ea491c6 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h

@@ -45,10 +45,10 @@
                                       typename traits<Rhs>::StorageIndex>::type StorageIndex;
   typedef typename Lhs::Nested LhsNested;
   typedef typename Rhs::Nested RhsNested;
-  typedef typename remove_reference<LhsNested>::type _LhsNested;
-  typedef typename remove_reference<RhsNested>::type _RhsNested;
+  typedef typename remove_reference<LhsNested>::type LhsNested_;
+  typedef typename remove_reference<RhsNested>::type RhsNested_;
   enum {
-    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
+    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,LhsNested_::Flags & RowMajorBit,RhsNested_::Flags & RowMajorBit>::value
   };
 };
 } // end namespace internal
@@ -102,8 +102,8 @@
 
     typedef typename internal::ref_selector<LhsType>::type LhsNested;
     typedef typename internal::ref_selector<RhsType>::type RhsNested;
-    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
-    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+    typedef typename internal::remove_reference<LhsNested>::type LhsNested_;
+    typedef typename internal::remove_reference<RhsNested>::type RhsNested_;
 
 #if EIGEN_COMP_MSVC
     //Required for Visual Studio or the Copy constructor will probably not get inlined!
@@ -131,10 +131,10 @@
 
     /** \returns the left hand side nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const _LhsNested& lhs() const { return m_lhs; }
+    const LhsNested_& lhs() const { return m_lhs; }
     /** \returns the right hand side nested expression */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const _RhsNested& rhs() const { return m_rhs; }
+    const RhsNested_& rhs() const { return m_rhs; }
     /** \returns the functor representing the binary operation */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const BinaryOp& functor() const { return m_functor; }

diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h
index 52a0ae7..393279bc 100644
--- a/Eigen/src/Core/CwiseTernaryOp.h
+++ b/Eigen/src/Core/CwiseTernaryOp.h

@@ -43,10 +43,10 @@
   typedef typename Arg1::Nested Arg1Nested;
   typedef typename Arg2::Nested Arg2Nested;
   typedef typename Arg3::Nested Arg3Nested;
-  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
-  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
-  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
-  enum { Flags = _Arg1Nested::Flags & RowMajorBit };
+  typedef typename remove_reference<Arg1Nested>::type Arg1Nested_;
+  typedef typename remove_reference<Arg2Nested>::type Arg2Nested_;
+  typedef typename remove_reference<Arg3Nested>::type Arg3Nested_;
+  enum { Flags = Arg1Nested_::Flags & RowMajorBit };
 };
 }  // end namespace internal
 
@@ -115,9 +115,9 @@
   typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
   typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
   typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
-  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
-  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
-  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
+  typedef typename internal::remove_reference<Arg1Nested>::type Arg1Nested_;
+  typedef typename internal::remove_reference<Arg2Nested>::type Arg2Nested_;
+  typedef typename internal::remove_reference<Arg3Nested>::type Arg3Nested_;
 
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
@@ -165,13 +165,13 @@
 
   /** \returns the first argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg1Nested& arg1() const { return m_arg1; }
+  const Arg1Nested_& arg1() const { return m_arg1; }
   /** \returns the first argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg2Nested& arg2() const { return m_arg2; }
+  const Arg2Nested_& arg2() const { return m_arg2; }
   /** \returns the third argument nested expression */
   EIGEN_DEVICE_FUNC
-  const _Arg3Nested& arg3() const { return m_arg3; }
+  const Arg3Nested_& arg3() const { return m_arg3; }
   /** \returns the functor representing the ternary operation */
   EIGEN_DEVICE_FUNC
   const TernaryOp& functor() const { return m_functor; }

diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index f2c6518..d9985c0 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h

@@ -24,9 +24,9 @@
                      UnaryOp(const typename XprType::Scalar&)
                    >::type Scalar;
   typedef typename XprType::Nested XprTypeNested;
-  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  typedef typename remove_reference<XprTypeNested>::type XprTypeNested_;
   enum {
-    Flags = _XprTypeNested::Flags & RowMajorBit
+    Flags = XprTypeNested_::Flags & RowMajorBit
   };
 };
 }

diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 9fc1dcd..fabb3f8 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h

@@ -15,32 +15,37 @@
 namespace Eigen {
 
 namespace internal {
-template<typename ViewOp, typename MatrixType>
-struct traits<CwiseUnaryView<ViewOp, MatrixType> >
+template<typename ViewOp, typename MatrixType, typename StrideType>
+struct traits<CwiseUnaryView<ViewOp, MatrixType, StrideType> >
  : traits<MatrixType>
 {
   typedef typename result_of<
                      ViewOp(const typename traits<MatrixType>::Scalar&)
                    >::type Scalar;
   typedef typename MatrixType::Nested MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNested_;
   enum {
     FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
+    Flags = traits<MatrixTypeNested_>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
     // need to cast the sizeof's from size_t to int explicitly, otherwise:
     // "error: no integral type can represent all of the enumerator values
-    InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
-                             ? int(Dynamic)
-                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
-    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
-                             ? int(Dynamic)
-                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                             ? (MatrixTypeInnerStride == Dynamic
+                               ? int(Dynamic)
+                               : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
+                             : int(StrideType::InnerStrideAtCompileTime),
+
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                             ? (outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                               ? int(Dynamic)
+                               : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)))
+                             : int(StrideType::OuterStrideAtCompileTime)
   };
 };
 }
 
-template<typename ViewOp, typename MatrixType, typename StorageKind>
+template<typename ViewOp, typename MatrixType, typename StrideType, typename StorageKind>
 class CwiseUnaryViewImpl;
 
 /** \class CwiseUnaryView
@@ -56,12 +61,12 @@
   *
   * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
   */
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
+template<typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>
 {
   public:
 
-    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType, StrideType, typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
     typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
     typedef typename internal::remove_all<MatrixType>::type NestedExpression;
@@ -93,22 +98,22 @@
 };
 
 // Generic API dispatcher
-template<typename ViewOp, typename XprType, typename StorageKind>
+template<typename ViewOp, typename XprType, typename StrideType, typename StorageKind>
 class CwiseUnaryViewImpl
-  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type
 {
 public:
-  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType, StrideType> >::type Base;
 };
 
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
-  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
+template<typename ViewOp, typename MatrixType, typename StrideType>
+class CwiseUnaryViewImpl<ViewOp,MatrixType,StrideType,Dense>
+  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType, StrideType> >::type
 {
   public:
 
-    typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
-    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
+    typedef CwiseUnaryView<ViewOp, MatrixType,StrideType> Derived;
+    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType,StrideType> >::type Base;
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
@@ -118,12 +123,16 @@
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const
     {
-      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+      return StrideType::InnerStrideAtCompileTime != 0
+             ? int(StrideType::InnerStrideAtCompileTime)
+             : derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const
     {
-      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+      return StrideType::OuterStrideAtCompileTime != 0
+             ? int(StrideType::OuterStrideAtCompileTime)
+             : derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
   protected:
     EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)

diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index d62c851..6d5a100 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h

@@ -275,6 +275,11 @@
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator=(const DenseBase& other);
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseBase& operator=(
+        const DenseBase& other) requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     Derived& operator=(const EigenBase<OtherDerived> &other);

diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index 6d8df3d..2bf4527 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h

@@ -20,8 +20,8 @@
   *
   * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
   *
-  * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
-  * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
+  * \tparam MatrixType the type of the object in which we are taking a sub/main/super diagonal
+  * \tparam DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
   *              A positive value means a superdiagonal, a negative value means a subdiagonal.
   *              You can also use DynamicIndex so the index can be set at runtime.
   *
@@ -40,7 +40,7 @@
  : traits<MatrixType>
 {
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNested_;
   typedef typename MatrixType::StorageKind StorageKind;
   enum {
     RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
@@ -54,7 +54,7 @@
                                            MatrixType::MaxColsAtCompileTime - plain_enum_max( DiagIndex, 0))),
     MaxColsAtCompileTime = 1,
     MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
+    Flags = (unsigned int)MatrixTypeNested_::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
     InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
     OuterStrideAtCompileTime = 0

diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index d5053d6..453f69b 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h

@@ -118,9 +118,9 @@
   *
   * \brief Represents a diagonal matrix with its storage
   *
-  * \param Scalar_ the type of coefficients
-  * \param SizeAtCompileTime the dimension of the matrix, or Dynamic
-  * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
+  * \tparam Scalar_ the type of coefficients
+  * \tparam SizeAtCompileTime the dimension of the matrix, or Dynamic
+  * \tparam MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
   *        to SizeAtCompileTime. Most of the time, you do not need to specify it.
   *
   * \sa class DiagonalWrapper
@@ -261,7 +261,7 @@
   *
   * \brief Expression of a diagonal matrix
   *
-  * \param DiagonalVectorType_ the type of the vector of diagonal coefficients
+  * \tparam DiagonalVectorType_ the type of the vector of diagonal coefficients
   *
   * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
   * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()

diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 387b6ce..a7c20f5 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h

@@ -125,8 +125,8 @@
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested_eval<Derived,2>::type _Nested;
-  _Nested n(derived());
+  typedef typename internal::nested_eval<Derived,2>::type Nested_;
+  Nested_ n(derived());
   RealScalar z = n.squaredNorm();
   // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
   if(z>RealScalar(0))
@@ -168,8 +168,8 @@
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
-  typedef typename internal::nested_eval<Derived,3>::type _Nested;
-  _Nested n(derived());
+  typedef typename internal::nested_eval<Derived,3>::type Nested_;
+  Nested_ n(derived());
   RealScalar w = n.cwiseAbs().maxCoeff();
   RealScalar z = (n/w).squaredNorm();
   if(z>RealScalar(0))

diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 33b667d..0d24a00 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h

@@ -52,17 +52,17 @@
 
 template<typename Lhs, typename Rhs> struct product_type
 {
-  typedef typename remove_all<Lhs>::type _Lhs;
-  typedef typename remove_all<Rhs>::type _Rhs;
+  typedef typename remove_all<Lhs>::type Lhs_;
+  typedef typename remove_all<Rhs>::type Rhs_;
   enum {
-    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
-    Rows    = traits<_Lhs>::RowsAtCompileTime,
-    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
-    Cols    = traits<_Rhs>::ColsAtCompileTime,
-    MaxDepth = min_size_prefer_fixed(traits<_Lhs>::MaxColsAtCompileTime,
-                                     traits<_Rhs>::MaxRowsAtCompileTime),
-    Depth = min_size_prefer_fixed(traits<_Lhs>::ColsAtCompileTime,
-                                  traits<_Rhs>::RowsAtCompileTime)
+    MaxRows = traits<Lhs_>::MaxRowsAtCompileTime,
+    Rows    = traits<Lhs_>::RowsAtCompileTime,
+    MaxCols = traits<Rhs_>::MaxColsAtCompileTime,
+    Cols    = traits<Rhs_>::ColsAtCompileTime,
+    MaxDepth = min_size_prefer_fixed(traits<Lhs_>::MaxColsAtCompileTime,
+                                     traits<Rhs_>::MaxRowsAtCompileTime),
+    Depth = min_size_prefer_fixed(traits<Lhs_>::ColsAtCompileTime,
+                                  traits<Rhs_>::RowsAtCompileTime)
   };
 
   // the splitting into different lines of code here, introducing the _select enums and the typedef below,

diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h
index d27ec80..e7ca88b 100644
--- a/Eigen/src/Core/IndexedView.h
+++ b/Eigen/src/Core/IndexedView.h

@@ -124,10 +124,10 @@
   {}
 
   /** \returns number of rows */
-  Index rows() const { return internal::size(m_rowIndices); }
+  Index rows() const { return internal::index_list_size(m_rowIndices); }
 
   /** \returns number of columns */
-  Index cols() const { return internal::size(m_colIndices); }
+  Index cols() const { return internal::index_list_size(m_colIndices); }
 
   /** \returns the nested expression */
   const typename internal::remove_all<XprType>::type&

diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 44c0e25..77051a0 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h

@@ -52,6 +52,14 @@
     Alignment = actual_alignment
   };
 };
+
+template<typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct has_trivially_copyable_storage<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_> >
+{
+  // Must be identical to the type of PlainObjectBase::m_storage.
+  typedef DenseStorage<Scalar_, internal::size_at_compile_time<MaxRows_, MaxCols_>::ret, Rows_, Cols_, Options_> Storage;
+  static const bool value = std::is_trivially_copyable<Storage>::value;
+};
 }
 
 /** \class Matrix
@@ -210,6 +218,12 @@
       return Base::_set(other);
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix& operator=(
+        const Matrix& other) requires internal::has_trivially_copyable_storage<Matrix>::value = default;
+#endif
+
     /** \internal
       * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
       *
@@ -279,6 +293,13 @@
       return *this;
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(Matrix&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Matrix>::value = default;
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Matrix>::value = default;
+#endif
+
     /** \copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&... args)
      *
      * Example: \include Matrix_variadic_ctor_cxx11.cpp
@@ -404,6 +425,12 @@
     EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
     { }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const Matrix& other) requires internal::has_trivially_copyable_storage<Matrix>::value =
+        default;
+#endif
+
     /** \brief Copy constructor for generic expressions.
       * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */

diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 70d0cf7..89a4110 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h

@@ -140,6 +140,11 @@
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator=(const MatrixBase& other);
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MatrixBase& operator=(
+        const MatrixBase& other) requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     // We cannot inherit here via Base::operator= since it is causing
     // trouble with MSVC.
 

diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h
index a179688..b6f31f9 100644
--- a/Eigen/src/Core/PartialReduxEvaluator.h
+++ b/Eigen/src/Core/PartialReduxEvaluator.h

@@ -154,16 +154,16 @@
                   : TraversalSize==0 ? 1
                   : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
     
-    _ArgFlags = evaluator<ArgType>::Flags,
+    ArgFlags_ = evaluator<ArgType>::Flags,
 
-    _Vectorizable =  bool(int(_ArgFlags)&PacketAccessBit)
+    Vectorizable_ =  bool(int(ArgFlags_)&PacketAccessBit)
                   && bool(MemberOp::Vectorizable)
-                  && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
+                  && (Direction==int(Vertical) ? bool(ArgFlags_&RowMajorBit) : (ArgFlags_&RowMajorBit)==0)
                   && (TraversalSize!=0),
                   
     Flags = (traits<XprType>::Flags&RowMajorBit)
           | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
-          | (_Vectorizable ? PacketAccessBit : 0)
+          | (Vectorizable_ ? PacketAccessBit : 0)
           | LinearAccessBit,
     
     Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized

diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index a6910e2..1b4195a 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h

@@ -391,20 +391,20 @@
 
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int _PacketAccess>
-struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,_PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
  : traits<Matrix<StorageIndex_,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
   typedef PermutationStorage StorageKind;
-  typedef Map<const Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
+  typedef Map<const Matrix<StorageIndex_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, PacketAccess_> IndicesType;
   typedef StorageIndex_ StorageIndex;
   typedef void Scalar;
 };
 }
 
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int _PacketAccess>
-class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,_PacketAccess>
-  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,_PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_>
+  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, StorageIndex_>,PacketAccess_> >
 {
     typedef PermutationBase<Map> Base;
     typedef internal::traits<Map> Traits;

diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 4367ea5..33ae507 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h

@@ -463,6 +463,12 @@
       return _set(other);
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase& operator=(
+        const PlainObjectBase& other) requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     /** \sa MatrixBase::lazyAssign() */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
@@ -514,10 +520,27 @@
       return *this;
     }
 
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Derived>::value = default;
+
+    EIGEN_DEVICE_FUNC
+    PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
+        requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     /** Copy constructor */
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
       : Base(), m_storage(other.m_storage) { }
+
+#if EIGEN_COMP_HAS_P0848R3
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(
+        const PlainObjectBase& other) requires internal::has_trivially_copyable_storage<Derived>::value = default;
+#endif
+
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
       : m_storage(size, rows, cols)

diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index aff3572..5f9ffe8 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h

@@ -841,19 +841,19 @@
     StorageOrder_ = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
                   : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
                   : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
-    _SameStorageOrder = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
+    SameStorageOrder_ = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor),
 
-    _ScalarAccessOnDiag =  !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft)
+    ScalarAccessOnDiag_ =  !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft)
                            ||(int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
-    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+    SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
     // FIXME currently we need same types, but in the future the next rule should be the one
-    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
-    _Vectorizable =   bool(int(MatrixFlags)&PacketAccessBit)
-                  &&  _SameTypes
-                  && (_SameStorageOrder || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
-                  && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
-    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
-    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+    //Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ && bool(int(DiagFlags)&PacketAccessBit))),
+    Vectorizable_ =   bool(int(MatrixFlags)&PacketAccessBit)
+                  &&  SameTypes_
+                  && (SameStorageOrder_ || (MatrixFlags&LinearAccessBit)==LinearAccessBit)
+                  && (ScalarAccessOnDiag_ || (bool(int(DiagFlags)&PacketAccessBit))),
+    LinearAccessMask_ = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+    Flags = ((HereditaryBits|LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
     Alignment = evaluator<MatrixType>::Alignment,
 
     AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)

diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 3e1d99c..c31dfcc 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h

@@ -16,16 +16,16 @@
 
 namespace internal {
 
-template<typename _PlainObjectType, int Options_, typename _StrideType>
-struct traits<Ref<_PlainObjectType, Options_, _StrideType> >
-  : public traits<Map<_PlainObjectType, Options_, _StrideType> >
+template<typename PlainObjectType_, int Options_, typename StrideType_>
+struct traits<Ref<PlainObjectType_, Options_, StrideType_> >
+  : public traits<Map<PlainObjectType_, Options_, StrideType_> >
 {
-  typedef _PlainObjectType PlainObjectType;
-  typedef _StrideType StrideType;
+  typedef PlainObjectType_ PlainObjectType;
+  typedef StrideType_ StrideType;
   enum {
     Options = Options_,
-    Flags = traits<Map<_PlainObjectType, Options_, _StrideType> >::Flags | NestByRefBit,
-    Alignment = traits<Map<_PlainObjectType, Options_, _StrideType> >::Alignment
+    Flags = traits<Map<PlainObjectType_, Options_, StrideType_> >::Flags | NestByRefBit,
+    Alignment = traits<Map<PlainObjectType_, Options_, StrideType_> >::Alignment
   };
 
   template<typename Derived> struct match {

diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index 0ee9a10..6b5f9fe 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h

@@ -23,7 +23,7 @@
   typedef typename traits<MatrixType>::StorageKind StorageKind;
   typedef typename traits<MatrixType>::XprKind XprKind;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNested_;
   enum {
     RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
                       ? Dynamic
@@ -64,7 +64,7 @@
   : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
 {
     typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
-    typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename internal::traits<Replicate>::MatrixTypeNested_ MatrixTypeNested_;
   public:
 
     typedef typename internal::dense_xpr_base<Replicate>::type Base;
@@ -96,7 +96,7 @@
     inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
 
     EIGEN_DEVICE_FUNC
-    const _MatrixTypeNested& nestedExpression() const
+    const MatrixTypeNested_& nestedExpression() const
     {
       return m_matrix;
     }

diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index 7ad5023..dbc6929 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h

@@ -26,13 +26,13 @@
   typedef typename traits<MatrixType>::StorageKind StorageKind;
   typedef typename traits<MatrixType>::XprKind XprKind;
   typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNested_;
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
+    Flags = MatrixTypeNested_::Flags & (RowMajorBit | LvalueBit)
   };
 };
 

diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index f2a2a03..d9ed2b2 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h

@@ -19,9 +19,9 @@
   *
   * \brief Expression of a coefficient wise version of the C++ ternary operator ?:
   *
-  * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
-  * \param ThenMatrixType the type of the \em then expression
-  * \param ElseMatrixType the type of the \em else expression
+  * \tparam ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
+  * \tparam ThenMatrixType the type of the \em then expression
+  * \tparam ElseMatrixType the type of the \em else expression
   *
   * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
   * It is the return type of DenseBase::select() and most of the time this is the only way it is used.

diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 6b11247..7096058 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h

@@ -20,8 +20,8 @@
   *
   * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
   *
-  * \param MatrixType the type of the dense matrix storing the coefficients
-  * \param TriangularPart can be either \c #Lower or \c #Upper
+  * \tparam MatrixType the type of the dense matrix storing the coefficients
+  * \tparam TriangularPart can be either \c #Lower or \c #Upper
   *
   * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
   * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()

diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h
index 6996ca2..84a9773 100644
--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h

@@ -201,11 +201,11 @@
 
 
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int _PacketAccess>
-struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,_PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename StorageIndex_, int PacketAccess_>
+struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_>,PacketAccess_> >
  : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,StorageIndex_> >
 {
-  typedef Map<const Matrix<StorageIndex_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
+  typedef Map<const Matrix<StorageIndex_,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, PacketAccess_> IndicesType;
   typedef StorageIndex_ StorageIndex;
   typedef TranspositionsStorage StorageKind;
 };

diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 9376a79..28f515f 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h

@@ -155,8 +155,8 @@
   *
   * \brief Expression of a triangular part in a matrix
   *
-  * \param MatrixType the type of the object in which we are taking the triangular part
-  * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
+  * \tparam MatrixType the type of the object in which we are taking the triangular part
+  * \tparam Mode the kind of triangular matrix expression to construct. Can be #Upper,
   *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
   *             This is in fact a bit field; it must have either #Upper or #Lower,
   *             and additionally it may have #UnitDiag or #ZeroDiag or neither.

diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 17b9d0b..d517dff 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h

@@ -101,17 +101,22 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f psqrt<Packet8f>(const Packet8f& _x) {
-  Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
-  Packet8f denormal_mask = pandnot(
-      pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
-      pcmp_lt(_x, pzero(_x)));
+  const Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
+  const Packet8f negative_mask = pcmp_lt(_x, pzero(_x));
+  const Packet8f denormal_mask =
+      pandnot(pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
+              negative_mask);
 
   // Compute approximate reciprocal sqrt.
-  Packet8f x = _mm256_rsqrt_ps(_x);
+  Packet8f rs = _mm256_rsqrt_ps(_x);
+  // Flush negative arguments to zero. This is a workaround which ensures
+  // that sqrt of a negative denormal returns -NaN, despite _mm256_rsqrt_ps
+  // returning -Inf for such values.
+  const Packet8f x_flushed = pandnot(_x, negative_mask);
   // Do a single step of Newton's iteration.
-  x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));
+  rs = pmul(rs, pmadd(minus_half_x, pmul(rs,rs), pset1<Packet8f>(1.5f)));
   // Flush results for denormals to zero.
-  return pandnot(pmul(_x,x), denormal_mask);
+  return pandnot(pmul(x_flushed, rs), denormal_mask);
 }
 
 #else
@@ -131,10 +136,10 @@
 #if EIGEN_FAST_MATH
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
-  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
-  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+  EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+  EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+  EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+  EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
 
   Packet8f neg_half = pmul(_x, p8f_minus_half);
 
@@ -164,14 +169,14 @@
 #else
 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
-  _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+  EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
   return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
 }
 #endif
 
 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
-  _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+  EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
   return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
 }
 

diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index fdf1afb..ef73d35 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h

@@ -42,16 +42,16 @@
 template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
 template<> struct is_arithmetic<Packet8bf> { enum { value = true }; };
 
-#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
   const Packet8f p8f_##NAME = pset1<Packet8f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
   const Packet4d p4d_##NAME = pset1<Packet4d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
   const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
 
-#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
   const Packet8i p8i_##NAME = pset1<Packet8i>(X)
 
 // Use the packet_traits defined in AVX512/PacketMath.h instead if we're going

diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 26ab168..54be6cf 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h

@@ -19,22 +19,22 @@
 // Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
 #if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG  || EIGEN_COMP_MSVC >= 1923
 
-#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
   const Packet16f p16f_##NAME = pset1<Packet16f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
   const Packet16f p16f_##NAME =  preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
 
-#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
   const Packet8d p8d_##NAME = pset1<Packet8d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
   const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
 
-#define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
   const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
+#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
   const Packet16bf p16bf_##NAME =  preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
 
 template <>
@@ -73,21 +73,21 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
 pexp<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
-  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
+  EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+  EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+  EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
-  _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
+  EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
+  EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
 
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
-  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
+  EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
 
   // Clamp x.
   Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
@@ -98,7 +98,7 @@
 
   // Get r = x - m*ln(2). Note that we can do this without losing more than one
   // ulp precision due to the FMA instruction.
-  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
+  EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
   Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
   Packet16f r2 = pmul(r, r);
   Packet16f r3 = pmul(r2, r);
@@ -227,9 +227,9 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
 prsqrt<Packet16f>(const Packet16f& _x) {
-  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
-  _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+  EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
+  EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+  EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
 
   Packet16f neg_half = pmul(_x, p16f_minus_half);
 
@@ -257,7 +257,7 @@
 
 template <>
 EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
-  _EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
+  EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
   return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
 }
 #endif
@@ -270,9 +270,9 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
 prsqrt<Packet8d>(const Packet8d& _x) {
-  _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
-  _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
-  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
+  EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+  EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+  EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
 
   Packet8d neg_half = pmul(_x, p8d_minus_half);
 
@@ -307,7 +307,7 @@
 #else
 template <>
 EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {
-  _EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
+  EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
   return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
 }
 #endif

diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index c6b632f..efb0242 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h

@@ -41,34 +41,34 @@
 
 // We don't want to write the same code all the time, but we need to reuse the constants
 // and it doesn't really work to declare them global, so we define macros instead
-#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = {X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = vec_splat_s32(X)
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4ui(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4ui(NAME,X) \
   Packet4ui p4ui_##NAME = {X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet8us(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet8us(NAME,X) \
   Packet8us p8us_##NAME = {X, X, X, X, X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet16uc(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet16uc(NAME,X) \
   Packet16uc p16uc_##NAME = {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = pset1<Packet2l>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
 
 #define DST_CHAN 1
@@ -76,15 +76,15 @@
 #define __UNPACK_TYPE__(PACKETNAME) typename unpacket_traits<PACKETNAME>::type 
 
 // These constants are endian-agnostic
-static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
-static _EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u);
-static _EIGEN_DECLARE_CONST_FAST_Packet4ui(PREV0DOT5, 0x3EFFFFFFu);
-static _EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1}
-static _EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1);
+static EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
+static EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u);
+static EIGEN_DECLARE_CONST_FAST_Packet4ui(PREV0DOT5, 0x3EFFFFFFu);
+static EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1);
 static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
 #ifndef __VSX__
 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
@@ -1182,7 +1182,7 @@
 template<> EIGEN_STRONG_INLINE Packet16c pabs(const Packet16c& a) { return vec_abs(a); }
 template<> EIGEN_STRONG_INLINE Packet16uc pabs(const Packet16uc& a) { return a; }
 template<> EIGEN_STRONG_INLINE Packet8bf  pabs(const Packet8bf& a) {
-  _EIGEN_DECLARE_CONST_FAST_Packet8us(abs_mask,0x7FFF);
+  EIGEN_DECLARE_CONST_FAST_Packet8us(abs_mask,0x7FFF);
   return pand<Packet8us>(p8us_abs_mask, a);
 }
 
@@ -1194,38 +1194,38 @@
 { return vec_sl(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
 template<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_left(const Packet4f& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   Packet4ui r = vec_sl(reinterpret_cast<Packet4ui>(a), p4ui_mask);
   return reinterpret_cast<Packet4f>(r);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4f plogical_shift_right(const Packet4f& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   Packet4ui r = vec_sr(reinterpret_cast<Packet4ui>(a), p4ui_mask);
   return reinterpret_cast<Packet4f>(r);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(const Packet4ui& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   return vec_sr(a, p4ui_mask);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N);
   return vec_sl(a, p4ui_mask);
 }
 
 template<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_left(const Packet8us& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
   return vec_sl(a, p8us_mask);
 }
 template<int N> EIGEN_STRONG_INLINE Packet8us plogical_shift_right(const Packet8us& a)
 {
-  const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
+  const EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N);
   return vec_sr(a, p8us_mask);
 }
 
@@ -1234,7 +1234,7 @@
 }
 
 EIGEN_STRONG_INLINE Packet4f Bf16ToF32Odd(const Packet8bf& bf){
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
   return pand<Packet4f>(
     reinterpret_cast<Packet4f>(bf.m_val),
     reinterpret_cast<Packet4f>(p4ui_high_mask)
@@ -1244,7 +1244,7 @@
 // Simple interleaving of bool masks, prevents true values from being
 // converted to NaNs.
 EIGEN_STRONG_INLINE Packet8bf F32ToBf16Bool(Packet4f even, Packet4f odd) {
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(high_mask, 0xFFFF0000);
   Packet4f bf_odd, bf_even;
   bf_odd = pand(reinterpret_cast<Packet4f>(p4ui_high_mask), odd);
   bf_even = plogical_shift_right<16>(even);
@@ -1256,18 +1256,18 @@
   Packet4ui lsb = plogical_shift_right<16>(input);
   lsb = pand<Packet4ui>(lsb, reinterpret_cast<Packet4ui>(p4i_ONE));
 
-  _EIGEN_DECLARE_CONST_FAST_Packet4ui(BIAS,0x7FFFu);
+  EIGEN_DECLARE_CONST_FAST_Packet4ui(BIAS,0x7FFFu);
   Packet4ui rounding_bias = padd<Packet4ui>(lsb, p4ui_BIAS);
   input = padd<Packet4ui>(input, rounding_bias);
 
   //Test NaN and Subnormal - Begin
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(exp_mask, 0x7F800000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(exp_mask, 0x7F800000);
   Packet4ui exp = pand<Packet4ui>(p4ui_exp_mask, reinterpret_cast<Packet4ui>(p4f));
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mantissa_mask, 0x7FFFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(mantissa_mask, 0x7FFFFF);
   Packet4ui mantissa = pand<Packet4ui>(p4ui_mantissa_mask, reinterpret_cast<Packet4ui>(p4f));
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(max_exp, 0x7F800000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(max_exp, 0x7F800000);
   Packet4bi is_max_exp = vec_cmpeq(exp, p4ui_max_exp);
   Packet4bi is_zero_exp = vec_cmpeq(exp, reinterpret_cast<Packet4ui>(p4i_ZERO));
 
@@ -1282,7 +1282,7 @@
       reinterpret_cast<Packet4ui>(is_mant_zero)
   );
 
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
   input = vec_sel(input, p4ui_nan, nan_selector);
   input = vec_sel(input, reinterpret_cast<Packet4ui>(p4f), subnormal_selector);
   //Test NaN and Subnormal - End
@@ -2210,7 +2210,7 @@
   Packet4f float_odd = Bf16ToF32Odd(a);
   Packet4ui int_even = pcast<Packet4f, Packet4ui>(float_even);
   Packet4ui int_odd = pcast<Packet4f, Packet4ui>(float_odd);
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
   Packet4ui low_even = pand<Packet4ui>(int_even, p4ui_low_mask);
   Packet4ui low_odd = pand<Packet4ui>(int_odd, p4ui_low_mask);
 
@@ -2233,7 +2233,7 @@
 
 template<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8us, Packet8bf>(const Packet8us& a) {
   //short -> int -> float -> bfloat16
-  const _EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
+  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
   Packet4ui int_cast = reinterpret_cast<Packet4ui>(a);
   Packet4ui int_even = pand<Packet4ui>(int_cast, p4ui_low_mask);
   Packet4ui int_odd = plogical_shift_right<16>(int_cast);

diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index 36df320..fb99d86 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h

@@ -137,21 +137,25 @@
   static EIGEN_CONSTEXPR const bool has_infinity = true;
   static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
   static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
-  static EIGEN_CONSTEXPR const float_denorm_style has_denorm = std::denorm_absent;
+  static EIGEN_CONSTEXPR const float_denorm_style has_denorm = std::denorm_present;
   static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
   static EIGEN_CONSTEXPR const std::float_round_style round_style = numeric_limits<float>::round_style;
-  static EIGEN_CONSTEXPR const bool is_iec559 = false;
+  static EIGEN_CONSTEXPR const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." BFloat16 has finite precision.
   static EIGEN_CONSTEXPR const bool is_bounded = true;
   static EIGEN_CONSTEXPR const bool is_modulo = false;
   static EIGEN_CONSTEXPR const int digits = 8;
   static EIGEN_CONSTEXPR const int digits10 = 2;
   static EIGEN_CONSTEXPR const int max_digits10 = 4;
-  static EIGEN_CONSTEXPR const int radix = 2;
+  static EIGEN_CONSTEXPR const int radix = numeric_limits<float>::radix;
   static EIGEN_CONSTEXPR const int min_exponent = numeric_limits<float>::min_exponent;
   static EIGEN_CONSTEXPR const int min_exponent10 = numeric_limits<float>::min_exponent10;
   static EIGEN_CONSTEXPR const int max_exponent = numeric_limits<float>::max_exponent;
   static EIGEN_CONSTEXPR const int max_exponent10 = numeric_limits<float>::max_exponent10;
   static EIGEN_CONSTEXPR const bool traps = numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
   static EIGEN_CONSTEXPR const bool tinyness_before = numeric_limits<float>::tinyness_before;
 
   static EIGEN_CONSTEXPR Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); }
@@ -161,7 +165,7 @@
   static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); }
   static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); }
   static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); }
-  static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); }
+  static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0); }
   static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); }
 };
 

diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 5f2a130..137daa8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h

@@ -170,33 +170,14 @@
 EIGEN_UNUSED
 Packet plog_impl_float(const Packet _x)
 {
-  Packet x = _x;
-
   const Packet cst_1              = pset1<Packet>(1.0f);
-  const Packet cst_neg_half       = pset1<Packet>(-0.5f);
-  // The smallest non denormalized float number.
-  const Packet cst_min_norm_pos   = pset1frombits<Packet>( 0x00800000u);
   const Packet cst_minus_inf      = pset1frombits<Packet>( 0xff800000u);
   const Packet cst_pos_inf        = pset1frombits<Packet>( 0x7f800000u);
 
-  // Polynomial coefficients.
   const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
-  const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
-  const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
-  const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
-  const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
-  const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
-  const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
-  const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
-  const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
-  const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
-
-  // Truncate input values to the minimum positive normal.
-  x = pmax(x, cst_min_norm_pos);
-
-  Packet e;
+  Packet e, x;
   // extract significant in the range [0.5,1) and exponent
-  x = pfrexp(x,e);
+  x = pfrexp(_x,e);
 
   // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
   // and shift by -1. The values are then centered around 0, which improves
@@ -211,24 +192,22 @@
   e = psub(e, pand(cst_1, mask));
   x = padd(x, tmp);
 
-  Packet x2 = pmul(x, x);
-  Packet x3 = pmul(x2, x);
+  // Polynomial coefficients for rational (3,3) r(x) = p(x)/q(x)
+  // approximating log(1+x) on [sqrt(0.5)-1;sqrt(2)-1].
+  const Packet cst_p1 = pset1<Packet>(1.0000000190281136f);
+  const Packet cst_p2 = pset1<Packet>(1.0000000190281063f);
+  const Packet cst_p3 = pset1<Packet>(0.18256296349849254f);
+  const Packet cst_q1 = pset1<Packet>(1.4999999999999927f);
+  const Packet cst_q2 = pset1<Packet>(0.59923249590823520f);
+  const Packet cst_q3 = pset1<Packet>(0.049616247954120038f);
 
-  // Evaluate the polynomial approximant of degree 8 in three parts, probably
-  // to improve instruction-level parallelism.
-  Packet y, y1, y2;
-  y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
-  y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
-  y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
-  y  = pmadd(y, x, cst_cephes_log_p2);
-  y1 = pmadd(y1, x, cst_cephes_log_p5);
-  y2 = pmadd(y2, x, cst_cephes_log_p8);
-  y  = pmadd(y, x3, y1);
-  y  = pmadd(y, x3, y2);
-  y  = pmul(y, x3);
-
-  y = pmadd(cst_neg_half, x2, y);
-  x = padd(x, y);
+  Packet p = pmadd(x, cst_p3, cst_p2);
+  p = pmadd(x, p, cst_p1);
+  p = pmul(x, p);
+  Packet q = pmadd(x, cst_q3, cst_q2);
+  q = pmadd(x, q, cst_q1);
+  q = pmadd(x, q, cst_1);
+  x = pdiv(p, q);
 
   // Add the logarithm of the exponent back to the result of the interpolation.
   if (base2) {
@@ -284,8 +263,6 @@
 
   const Packet cst_1              = pset1<Packet>(1.0);
   const Packet cst_neg_half       = pset1<Packet>(-0.5);
-  // The smallest non denormalized double.
-  const Packet cst_min_norm_pos   = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));
   const Packet cst_minus_inf      = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));
   const Packet cst_pos_inf        = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));
 
@@ -307,9 +284,6 @@
   const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
   const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
 
-  // Truncate input values to the minimum positive normal.
-  x = pmax(x, cst_min_norm_pos);
-
   Packet e;
   // extract significant in the range [0.5,1) and exponent
   x = pfrexp(x,e);
@@ -435,24 +409,24 @@
 // Exponential function. Works by writing "x = m*log(2) + r" where
 // "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
 // "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+// exp(r) is computed using a 6th order minimax polynomial approximation.
 template <typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 EIGEN_UNUSED
 Packet pexp_float(const Packet _x)
 {
   const Packet cst_zero   = pset1<Packet>(0.0f);
-  const Packet cst_1      = pset1<Packet>(1.0f);
+  const Packet cst_one    = pset1<Packet>(1.0f);
   const Packet cst_half   = pset1<Packet>(0.5f);
-  const Packet cst_exp_hi = pset1<Packet>( 88.723f);
-  const Packet cst_exp_lo = pset1<Packet>(-88.723f);
+  const Packet cst_exp_hi = pset1<Packet>(88.723f);
+  const Packet cst_exp_lo = pset1<Packet>(-104.f);
 
   const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
-  const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
-  const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
-  const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
-  const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
-  const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
-  const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
+  const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
+  const Packet cst_p3 = pset1<Packet>(0.16666518151760101318359375f);
+  const Packet cst_p4 = pset1<Packet>(4.166965186595916748046875e-2f);
+  const Packet cst_p5 = pset1<Packet>(8.36894474923610687255859375e-3f);
+  const Packet cst_p6 = pset1<Packet>(1.37449637986719608306884765625e-3f);
 
   // Clamp x.
   Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
@@ -470,18 +444,15 @@
   Packet r = pmadd(m, cst_cephes_exp_C1, x);
   r = pmadd(m, cst_cephes_exp_C2, r);
 
-  Packet r2 = pmul(r, r);
-  Packet r3 = pmul(r2, r);
-
-  // Evaluate the polynomial approximant,improved by instruction-level parallelism.
-  Packet y, y1, y2;
-  y  = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
-  y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
-  y2 = padd(r, cst_1);
-  y  = pmadd(y, r, cst_cephes_exp_p2);
-  y1 = pmadd(y1, r, cst_cephes_exp_p5);
-  y  = pmadd(y, r3, y1);
-  y  = pmadd(y, r2, y2);
+  // Evaluate the 6th order polynomial approximation to exp(r)
+  // with r in the interval [-ln(2)/2;ln(2)/2].
+  const Packet r2 = pmul(r, r);
+  Packet p_even = pmadd(r2, cst_p6, cst_p4);
+  const Packet p_odd = pmadd(r2, cst_p5, cst_p3);
+  p_even = pmadd(r2, p_even, cst_p2);
+  const Packet p_low = padd(r, cst_one);
+  Packet y = pmadd(r, p_odd, p_even);
+  y = pmadd(r2, y, p_low);
 
   // Return 2^m * exp(r).
   // TODO: replace pldexp with faster implementation since y in [-1, 1).
@@ -494,7 +465,7 @@
 Packet pexp_double(const Packet _x)
 {
   Packet x = _x;
-  const Packet cst_zero = pset1<Packet>(0.0f);
+  const Packet cst_zero = pset1<Packet>(0.0);
   const Packet cst_1 = pset1<Packet>(1.0);
   const Packet cst_2 = pset1<Packet>(2.0);
   const Packet cst_half = pset1<Packet>(0.5);

diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 4155335..876045e 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h

@@ -218,29 +218,33 @@
   static EIGEN_CONSTEXPR const float_denorm_style has_denorm = denorm_present;
   static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
   static EIGEN_CONSTEXPR const std::float_round_style round_style = std::round_to_nearest;
-  static EIGEN_CONSTEXPR const bool is_iec559 = false;
-  static EIGEN_CONSTEXPR const bool is_bounded = false;
+  static EIGEN_CONSTEXPR const bool is_iec559 = true;
+  // The C++ standard defines this as "true if the set of values representable
+  // by the type is finite." Half has finite precision.
+  static EIGEN_CONSTEXPR const bool is_bounded = true;
   static EIGEN_CONSTEXPR const bool is_modulo = false;
   static EIGEN_CONSTEXPR const int digits = 11;
   static EIGEN_CONSTEXPR const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
   static EIGEN_CONSTEXPR const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
-  static EIGEN_CONSTEXPR const int radix = 2;
+  static EIGEN_CONSTEXPR const int radix = numeric_limits<float>::radix;
   static EIGEN_CONSTEXPR const int min_exponent = -13;
   static EIGEN_CONSTEXPR const int min_exponent10 = -4;
   static EIGEN_CONSTEXPR const int max_exponent = 16;
   static EIGEN_CONSTEXPR const int max_exponent10 = 4;
-  static EIGEN_CONSTEXPR const bool traps = true;
-  static EIGEN_CONSTEXPR const bool tinyness_before = false;
+  static EIGEN_CONSTEXPR const bool traps = numeric_limits<float>::traps;
+  // IEEE754: "The implementer shall choose how tininess is detected, but shall
+  // detect tininess in the same way for all operations in radix two"
+  static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
 
-  static EIGEN_CONSTEXPR Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
+  static EIGEN_CONSTEXPR Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x0400); }
   static EIGEN_CONSTEXPR Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
   static EIGEN_CONSTEXPR Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
-  static EIGEN_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
+  static EIGEN_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x1400); }
   static EIGEN_CONSTEXPR Eigen::half round_error() { return Eigen::half_impl::raw_uint16_to_half(0x3800); }
   static EIGEN_CONSTEXPR Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
   static EIGEN_CONSTEXPR Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
   static EIGEN_CONSTEXPR Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); }
-  static EIGEN_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+  static EIGEN_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x0001); }
 };
 
 // If std::numeric_limits<T> is specialized, should also specialize

diff --git a/Eigen/src/Core/arch/MSA/MathFunctions.h b/Eigen/src/Core/arch/MSA/MathFunctions.h
index bbebde5..c3295a51 100644
--- a/Eigen/src/Core/arch/MSA/MathFunctions.h
+++ b/Eigen/src/Core/arch/MSA/MathFunctions.h

@@ -35,20 +35,20 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
 plog<Packet4f>(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   // Convert negative argument into NAN (quiet negative, to be specific).
   Packet4f zero = (Packet4f)__builtin_msa_ldi_w(0);
@@ -125,19 +125,19 @@
 pexp<Packet4f>(const Packet4f& _x) {
   // Limiting single-precision pexp's argument to [-128, +128] lets pexp
   // reach 0 and INFINITY naturally.
-  static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
-  static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   Packet4f x = _x;
 
@@ -176,21 +176,21 @@
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
 ptanh<Packet4f>(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
   // The monomial coefficients of the numerator polynomial (odd).
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
-  static _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
+  static EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
   // The monomial coefficients of the denominator polynomial (even).
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
 
   Packet4f x = pabs(_x);
   Packet4i tiny_mask = __builtin_msa_fclt_w(x, p4f_tanh_tiny);
@@ -231,19 +231,19 @@
 
 template <bool sine>
 Packet4f psincos_inner_msa_float(const Packet4f& _x) {
-  static _EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);  // Approx. (2**24) / (4/Pi).
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
-  static _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
-  static _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);  // 4/Pi.
-  static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-  static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);  // Approx. (2**24) / (4/Pi).
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
+  static EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
+  static EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);  // 4/Pi.
+  static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
 
   Packet4f x = pabs(_x);
 
@@ -328,21 +328,21 @@
 pexp<Packet2d>(const Packet2d& _x) {
   // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp
   // reach 0 and INFINITY naturally.
-  static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
-  static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
-  static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
-  static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
-  static _EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
+  static EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
+  static EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+  static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+  static EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+  static EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
+  static EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
 
   Packet2d x = _x;
 

diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index 3dc95a1..f03dbed 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h

@@ -53,9 +53,9 @@
 typedef v4i32 Packet4i;
 typedef v4u32 Packet4ui;
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
-#define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
+#define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
 
 inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
   os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
@@ -823,9 +823,9 @@
 typedef v2i64 Packet2l;
 typedef v2u64 Packet2ul;
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
-#define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
+#define EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
 
 inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
   os << "[ " << value[0] << ", " << value[1] << " ]";

diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index fd6b14b..008dd7a 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h

@@ -376,7 +376,7 @@
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
 
 // See bug 1325, clang fails to call vld1q_u64.
-#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
+#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE
   static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
 #else
   const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };

diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 707f7d7..dda0a1e 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h

@@ -139,13 +139,13 @@
 #define vec4f_duplane(a, p) \
   vdupq_lane_f32(vget_low_f32(a), p)
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   const Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
 #if EIGEN_ARCH_ARM64

diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 61e9406..60308ce 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h

@@ -137,7 +137,7 @@
 
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
-  std::complex<float> res;
+  alignas(alignof(__m64)) std::complex<float> res;
   _mm_storel_pi((__m64*)&res, a.v);
   return res;
 }

diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 4bdb9af..5a063d3 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h

@@ -75,8 +75,6 @@
   return pcos_float(_x);
 }
 
-#if EIGEN_FAST_MATH
-
 // Functions for sqrt.
 // The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
 // of Newton's method, at a cost of 1-2 bits of precision as opposed to the
@@ -85,11 +83,13 @@
 // it can be inlined and pipelined with other computations, further reducing its
 // effective latency. This is similar to Quake3's fast inverse square root.
 // For detail see here: http://www.beyond3d.com/content/articles/8/
-template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+#if EIGEN_FAST_MATH
+template<>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4f psqrt<Packet4f>(const Packet4f& _x)
 {
-  Packet4f minus_half_x = pmul(_x, pset1<Packet4f>(-0.5f));
-  Packet4f denormal_mask = pandnot(
+  const Packet4f minus_half_x = pmul(_x, pset1<Packet4f>(-0.5f));
+  const Packet4f denormal_mask = pandnot(
       pcmp_lt(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())),
       pcmp_lt(_x, pzero(_x)));
 
@@ -118,10 +118,10 @@
 
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
-  _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
-  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
-  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000u);
-  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000u);
+  EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+  EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+  EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000u);
+  EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000u);
 
   Packet4f neg_half = pmul(_x, p4f_minus_half);
 

diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 45e219c..a843226 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h

@@ -108,16 +108,16 @@
 #define vec2d_duplane(a,p) \
   vec2d_swizzle2(a,a,(p<<1)|p)
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   const Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   const Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
 

diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index acb04fb..df5c8d4 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h

@@ -8,8 +8,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_COMPLEX32_ALTIVEC_H
-#define EIGEN_COMPLEX32_ALTIVEC_H
+#ifndef EIGEN_COMPLEX32_ZVECTOR_H
+#define EIGEN_COMPLEX32_ZVECTOR_H
 
 #include "../../InternalHeaderCheck.h"
 
@@ -425,4 +425,4 @@
 
 } // end namespace Eigen
 
-#endif // EIGEN_COMPLEX32_ALTIVEC_H
+#endif // EIGEN_COMPLEX32_ZVECTOR_H

diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h
index c86e185..7ee14ab 100644
--- a/Eigen/src/Core/arch/ZVector/MathFunctions.h
+++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h

@@ -13,8 +13,8 @@
  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
  */
 
-#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
-#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#ifndef EIGEN_MATH_FUNCTIONS_ZVECTOR_H
+#define EIGEN_MATH_FUNCTIONS_ZVECTOR_H
 
 #include "../../InternalHeaderCheck.h"
 
@@ -23,69 +23,69 @@
 namespace internal {
 
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
-static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
-static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
-static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+static EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+static EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+static EIGEN_DECLARE_CONST_Packet4i(23, 23);
 
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
 
 /* the smallest non denormalized float number */
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
-static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
+static EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
   
 /* natural logarithm computed for 4 simultaneous float
   return NaN for x <= 0
 */
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
-static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+static EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
+static EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
 
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
-static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+static EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
 #endif
 
-static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
-static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
-static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
 
-static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
-static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
+static EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
 
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
-static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
 
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet2d pexp<Packet2d>(const Packet2d& _x)
@@ -232,4 +232,4 @@
 
 }  // end namespace Eigen
 
-#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#endif  // EIGEN_MATH_FUNCTIONS_ZVECTOR_H

diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 3d81aed..26b6f0d 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h

@@ -66,48 +66,48 @@
 // We don't want to write the same code all the time, but we need to reuse the constants
 // and it doesn't really work to declare them global, so we define macros instead
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
 
-#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
   Packet4i p4i_##NAME = pset1<Packet4i>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
   Packet2d p2d_##NAME = pset1<Packet2d>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
   Packet2l p2l_##NAME = pset1<Packet2l>(X)
 
 // These constants are endian-agnostic
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
 
-static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
-static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
-static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+static EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
 
 static Packet2d p2d_ONE = { 1.0, 1.0 };
 static Packet2d p2d_ZERO_ = { numext::bit_cast<double>(0x8000000000000000ull),
                               numext::bit_cast<double>(0x8000000000000000ull) };
 
 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
-#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
 
-#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
-#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+#define EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
   const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
 
-static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
-static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
+static EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
+static EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
 static Packet4f p4f_MZERO = { 0x80000000, 0x80000000, 0x80000000, 0x80000000};
 #endif
 
@@ -119,9 +119,9 @@
 static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
 
 // Mask alignment
-#define _EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
+#define EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
 
-#define _EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+#define EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & EIGEN_MASK_ALIGNMENT)
 
 // Handle endianness properly while loading constants
 // Define global static constants:

diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 044f7dd..d56aae5 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h

@@ -278,7 +278,7 @@
   */
 template<typename Scalar> struct scalar_exp_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return internal::pexp(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
 };
@@ -1026,87 +1026,105 @@
   template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Packet packetOp(const Packet& x) const {
     const Packet one = pset1<Packet>(T(1));
-    return pdiv(one, padd(one, pexp(pnegate(x))));
+    const Packet inf = pset1<Packet>(NumTraits<T>::infinity());
+    const Packet e = pexp(x);
+    const Packet inf_mask = pcmp_eq(e, inf);
+    return pselect(inf_mask, one, pdiv(e, padd(one, e)));
   }
 };
 
-#ifndef EIGEN_GPU_COMPILE_PHASE
+// TODO(rmlarsen): Enable the following on host when integer_packet is defined 
+// for the relevant packet types.
+#ifdef EIGEN_GPU_CC
+
 /** \internal
   * \brief Template specialization of the logistic function for float.
-  *
-  *  Uses just a 9/10-degree rational interpolant which
-  *  interpolates 1/(1+exp(-x)) - 0.5 up to a couple of ulps in the range
-  *  [-9, 18]. Below -9 we use the more accurate approximation
-  *  1/(1+exp(-x)) ~= exp(x), and above 18 the logistic function is 1 within
-  *  one ulp. The shifted logistic is interpolated because it was easier to
-  *  make the fit converge.
-  *
+  * Computes S(x) = exp(x) / (1 + exp(x)), where exp(x) is implemented
+  * using an algorithm partly adopted from the implementation of
+  * pexp_float. See the individual steps described in the code below.
+  * Note that compared to pexp, we use an additional outer multiplicative
+  * range reduction step using the identity exp(x) = exp(x/2)^2.
+  * This prevert us from having to call ldexp on values that could produce
+  * a denormal result, which allows us to call the faster implementation in
+  * pldexp_fast_impl<Packet>::run(p, m).
+  * The final squaring, however, doubles the error bound on the final
+  * approximation. Exhaustive testing shows that we have a worst case error
+  * of 4.5 ulps (compared to computing S(x) in double precision), which is
+  * acceptable.
   */
 template <>
 struct scalar_logistic_op<float> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {
-    return packetOp(x);
+    // Truncate at the first point where the interpolant is exactly one.
+    const float cst_exp_hi = 16.6355324f;
+    const float e = numext::exp(numext::mini(x, cst_exp_hi));
+    return e / (1.0f + e);
   }
 
-  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Packet packetOp(const Packet& _x) const {
-    const Packet cutoff_lower = pset1<Packet>(-9.f);
-    const Packet lt_mask = pcmp_lt<Packet>(_x, cutoff_lower);
-    const bool any_small = predux_any(lt_mask);
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+  packetOp(const Packet& _x) const {
+    const Packet cst_zero = pset1<Packet>(0.0f);
+    const Packet cst_one = pset1<Packet>(1.0f);
+    const Packet cst_half = pset1<Packet>(0.5f);
+    // Truncate at the first point where the interpolant is exactly one.
+    const Packet cst_exp_hi = pset1<Packet>(16.6355324f);
+    const Packet cst_exp_lo = pset1<Packet>(-104.f);
 
-    // The upper cut-off is the smallest x for which the rational approximation evaluates to 1.
-    // Choosing this value saves us a few instructions clamping the results at the end.
-#ifdef EIGEN_VECTORIZE_FMA
-    const Packet cutoff_upper = pset1<Packet>(15.7243833541870117f);
-#else
-    const Packet cutoff_upper = pset1<Packet>(15.6437711715698242f);
-#endif
-    const Packet x = pmin(_x, cutoff_upper);
+    // Clamp x to the non-trivial range where S(x). Outside this
+    // interval the correctly rounded value of S(x) is either zero
+    // or one.
+    Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
+    Packet x = pmin(_x, cst_exp_hi);
 
-    // The monomial coefficients of the numerator polynomial (odd).
-    const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01f);
-    const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03f);
-    const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05f);
-    const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07f);
-    const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11f);
+    // 1. Multiplicative range reduction:
+    // Reduce the range of x by a factor of 2. This avoids having
+    // to compute exp(x) accurately where the result is a denormalized
+    // value.
+    x = pmul(x, cst_half);
 
-    // The monomial coefficients of the denominator polynomial (even).
-    const Packet beta_0 = pset1<Packet>(9.93151921023180e-01f);
-    const Packet beta_2 = pset1<Packet>(1.16817656904453e-01f);
-    const Packet beta_4 = pset1<Packet>(1.70198817374094e-03f);
-    const Packet beta_6 = pset1<Packet>(6.29106785017040e-06f);
-    const Packet beta_8 = pset1<Packet>(5.76102136993427e-09f);
-    const Packet beta_10 = pset1<Packet>(6.10247389755681e-13f);
+    // 2. Subtractive range reduction:
+    // Express exp(x) as exp(m*ln(2) + r) = 2^m*exp(r), start by extracting
+    // m = floor(x/ln(2) + 0.5), such that x = m*ln(2) + r.
+    const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
+    Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
+    // Get r = x - m*ln(2). We use a trick from Cephes where the term
+    // m*ln(2) is subtracted out in two parts, m*C1+m*C2 = m*ln(2),
+    // to avoid accumulating truncation errors.
+    const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
+    const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
+    Packet r = pmadd(m, cst_cephes_exp_C1, x);
+    r = pmadd(m, cst_cephes_exp_C2, r);
 
-    // Since the polynomials are odd/even, we need x^2.
-    const Packet x2 = pmul(x, x);
+    // 3. Compute an approximation to exp(r) using a degree 5 minimax polynomial.
+    // We compute even and odd terms separately to increase instruction level
+    // parallelism.
+    Packet r2 = pmul(r, r);
+    const Packet cst_p2 = pset1<Packet>(0.49999141693115234375f);
+    const Packet cst_p3 = pset1<Packet>(0.16666877269744873046875f);
+    const Packet cst_p4 = pset1<Packet>(4.1898667812347412109375e-2f);
+    const Packet cst_p5 = pset1<Packet>(8.33471305668354034423828125e-3f);
 
-    // Evaluate the numerator polynomial p.
-    Packet p = pmadd(x2, alpha_9, alpha_7);
-    p = pmadd(x2, p, alpha_5);
-    p = pmadd(x2, p, alpha_3);
-    p = pmadd(x2, p, alpha_1);
-    p = pmul(x, p);
+    const Packet p_even = pmadd(r2, cst_p4, cst_p2);
+    const Packet p_odd = pmadd(r2, cst_p5, cst_p3);
+    const Packet p_low = padd(r, cst_one);
+    Packet p = pmadd(r, p_odd, p_even);
+    p = pmadd(r2, p, p_low);
 
-    // Evaluate the denominator polynomial q.
-    Packet q = pmadd(x2, beta_10, beta_8);
-    q = pmadd(x2, q, beta_6);
-    q = pmadd(x2, q, beta_4);
-    q = pmadd(x2, q, beta_2);
-    q = pmadd(x2, q, beta_0);
-    // Divide the numerator by the denominator and shift it up.
-    const Packet logistic = padd(pdiv(p, q), pset1<Packet>(0.5f));
-    if (EIGEN_PREDICT_FALSE(any_small)) {
-      const Packet exponential = pexp(_x);
-      return pselect(lt_mask, exponential, logistic);
-    } else {
-      return logistic;
-    }
+    // 4. Undo subtractive range reduction exp(m*ln(2) + r) = 2^m * exp(r).
+    Packet e = pldexp_fast_impl<Packet>::run(p, m);
+
+    // 5. Undo multiplicative range reduction by using exp(r) = exp(r/2)^2.
+    e = pmul(e, e);
+
+    // Return exp(x) / (1 + exp(x))
+    return pselect(zero_mask, cst_zero, pdiv(e, padd(cst_one, e)));
   }
 };
 #endif  // #ifndef EIGEN_GPU_COMPILE_PHASE
 
+
 template <typename T>
 struct functor_traits<scalar_logistic_op<T> > {
   enum {

diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 938f5fb..7580887 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h

@@ -378,12 +378,12 @@
 template <typename T1, typename T2, typename T3>
 struct packet_conditional<GEBPPacketHalf, T1, T2, T3> { typedef T2 type; };
 
-#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)         \
+#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size)       \
   typedef typename packet_conditional<packet_size,                 \
                                       typename packet_traits<name ## Scalar>::type, \
                                       typename packet_traits<name ## Scalar>::half, \
                                       typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
-  prefix ## name ## Packet
+  name ## Packet ## postfix
 
 #define PACKET_DECL_COND(name, packet_size)                        \
   typedef typename packet_conditional<packet_size,                 \
@@ -392,12 +392,12 @@
                                       typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
   name ## Packet
 
-#define PACKET_DECL_COND_SCALAR_PREFIX(prefix, packet_size)        \
+#define PACKET_DECL_COND_SCALAR_POSTFIX(postfix, packet_size)      \
   typedef typename packet_conditional<packet_size,                 \
                                       typename packet_traits<Scalar>::type, \
                                       typename packet_traits<Scalar>::half, \
                                       typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \
-  prefix ## ScalarPacket
+  ScalarPacket ## postfix
 
 #define PACKET_DECL_COND_SCALAR(packet_size)                       \
   typedef typename packet_conditional<packet_size,                 \
@@ -424,17 +424,17 @@
   typedef RhsScalar_ RhsScalar;
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Rhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
 
   enum {
     ConjLhs = ConjLhs_,
     ConjRhs = ConjRhs_,
-    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
 
@@ -459,9 +459,9 @@
   };
 
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef typename conditional<Vectorizable,LhsPacket_,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,RhsPacket_,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,ResPacket_,ResScalar>::type ResPacket;
   typedef LhsPacket LhsPacket4Packing;
 
   typedef QuadPacket<RhsPacket> RhsPacketx4;
@@ -553,17 +553,17 @@
   typedef RealScalar RhsScalar;
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Rhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
 
   enum {
     ConjLhs = ConjLhs_,
     ConjRhs = false,
-    Vectorizable = unpacket_traits<_LhsPacket>::vectorizable && unpacket_traits<_RhsPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
     nr = 4,
@@ -578,9 +578,9 @@
     RhsProgress = 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef typename conditional<Vectorizable,LhsPacket_,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,RhsPacket_,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,ResPacket_,ResScalar>::type ResPacket;
   typedef LhsPacket LhsPacket4Packing;
 
   typedef QuadPacket<RhsPacket> RhsPacketx4;
@@ -765,9 +765,9 @@
   typedef std::complex<RealScalar>  RhsScalar;
   typedef std::complex<RealScalar>  ResScalar;
   
-  PACKET_DECL_COND_PREFIX(_, Lhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Rhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
   PACKET_DECL_COND(Real, PacketSize_);
   PACKET_DECL_COND_SCALAR(PacketSize_);
 
@@ -776,8 +776,8 @@
     ConjRhs = ConjRhs_,
     Vectorizable = unpacket_traits<RealPacket>::vectorizable
                 && unpacket_traits<ScalarPacket>::vectorizable,
-    ResPacketSize   = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
+    ResPacketSize   = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
     RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,
     RealPacketSize  = Vectorizable ? unpacket_traits<RealPacket>::size : 1,
 
@@ -931,25 +931,25 @@
   typedef Scalar      RhsScalar;
   typedef Scalar      ResScalar;
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Rhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Res, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Real, PacketSize_);
-  PACKET_DECL_COND_SCALAR_PREFIX(_, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Real, PacketSize_);
+  PACKET_DECL_COND_SCALAR_POSTFIX(_, PacketSize_);
 
-#undef PACKET_DECL_COND_SCALAR_PREFIX
-#undef PACKET_DECL_COND_PREFIX
+#undef PACKET_DECL_COND_SCALAR_POSTFIX
+#undef PACKET_DECL_COND_POSTFIX
 #undef PACKET_DECL_COND_SCALAR
 #undef PACKET_DECL_COND
 
   enum {
     ConjLhs = false,
     ConjRhs = ConjRhs_,
-    Vectorizable = unpacket_traits<_RealPacket>::vectorizable
-                && unpacket_traits<_ScalarPacket>::vectorizable,
-    LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-    RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-    ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1,
+    Vectorizable = unpacket_traits<RealPacket_>::vectorizable
+                && unpacket_traits<ScalarPacket_>::vectorizable,
+    LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+    RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+    ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
     
     NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
     // FIXME: should depend on NumberOfRegisters
@@ -960,9 +960,9 @@
     RhsProgress = 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef typename conditional<Vectorizable,LhsPacket_,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,RhsPacket_,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,ResPacket_,ResScalar>::type ResPacket;
   typedef LhsPacket LhsPacket4Packing;
   typedef QuadPacket<RhsPacket> RhsPacketx4;
   typedef ResPacket AccPacket;

diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 465294b..9728f30 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h

@@ -213,13 +213,13 @@
     typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type ActualLhs_;
     typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
     
     typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type ActualRhs_;
     typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
@@ -229,19 +229,19 @@
 
     enum {
       StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
-      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
-      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+      UseLhsDirectly = ActualLhs_::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = ActualRhs_::InnerStrideAtCompileTime==1
     };
     
     internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
     ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
       (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
-    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    if(!UseLhsDirectly) Map<typename ActualLhs_::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
     
     internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
     ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
       (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
-    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    if(!UseRhsDirectly) Map<typename ActualRhs_::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     
     
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
@@ -259,13 +259,13 @@
     typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type ActualLhs_;
     typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
     
     typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
     typedef internal::blas_traits<Rhs> RhsBlasTraits;
     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type ActualRhs_;
     typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
 
     typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
@@ -275,8 +275,8 @@
 
     enum {
       IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
-      LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
-      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
+      LhsIsRowMajor = ActualLhs_::Flags&RowMajorBit ? 1 : 0,
+      RhsIsRowMajor = ActualRhs_::Flags&RowMajorBit ? 1 : 0,
       SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
     };
 
@@ -286,7 +286,7 @@
     Index depth = actualLhs.cols();
 
     typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
-          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
+          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, ActualRhs_::MaxColsAtCompileTime> BlockingType;
 
     BlockingType blocking(size, size, depth, 1, false);
 

diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index b775dbe..e7dd61d 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h

@@ -36,31 +36,31 @@
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
-#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size)                        \
+#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size)                        \
   typedef typename gemv_packet_cond<packet_size,                                  \
                                     typename packet_traits<name ## Scalar>::type, \
                                     typename packet_traits<name ## Scalar>::half, \
                                     typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
-  prefix ## name ## Packet
+  name ## Packet ## postfix
 
-  PACKET_DECL_COND_PREFIX(_, Lhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Rhs, PacketSize_);
-  PACKET_DECL_COND_PREFIX(_, Res, PacketSize_);
-#undef PACKET_DECL_COND_PREFIX
+  PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
+  PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
+#undef PACKET_DECL_COND_POSTFIX
 
 public:
   enum {
-        Vectorizable = unpacket_traits<_LhsPacket>::vectorizable &&
-        unpacket_traits<_RhsPacket>::vectorizable &&
-        int(unpacket_traits<_LhsPacket>::size)==int(unpacket_traits<_RhsPacket>::size),
-        LhsPacketSize = Vectorizable ? unpacket_traits<_LhsPacket>::size : 1,
-        RhsPacketSize = Vectorizable ? unpacket_traits<_RhsPacket>::size : 1,
-        ResPacketSize = Vectorizable ? unpacket_traits<_ResPacket>::size : 1
+        Vectorizable = unpacket_traits<LhsPacket_>::vectorizable &&
+        unpacket_traits<RhsPacket_>::vectorizable &&
+        int(unpacket_traits<LhsPacket_>::size)==int(unpacket_traits<RhsPacket_>::size),
+        LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
+        RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
+        ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1
   };
 
-  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
-  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
-  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+  typedef typename conditional<Vectorizable,LhsPacket_,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,RhsPacket_,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,ResPacket_,ResScalar>::type ResPacket;
 };
 
 

diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 629ce1d..026bc19 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h

@@ -57,14 +57,14 @@
     typedef typename MatrixType::Scalar Scalar;
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
-    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+    typedef typename internal::remove_all<ActualOtherType>::type ActualOtherType_;
     typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum {
       StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
-      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
+      UseOtherDirectly = ActualOtherType_::InnerStrideAtCompileTime==1
     };
     internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
 
@@ -72,7 +72,7 @@
       (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
       
     if(!UseOtherDirectly)
-      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
+      Map<typename ActualOtherType_::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
     
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                               OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
@@ -89,21 +89,21 @@
     typedef typename MatrixType::Scalar Scalar;
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
-    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+    typedef typename internal::remove_all<ActualOtherType>::type ActualOtherType_;
     typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum {
       IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
-      OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
+      OtherIsRowMajor = ActualOtherType_::Flags&RowMajorBit ? 1 : 0
     };
 
     Index size = mat.cols();
     Index depth = actualOther.cols();
 
     typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
-              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
+              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, ActualOtherType_::MaxColsAtCompileTime> BlockingType;
 
     BlockingType blocking(size, size, depth, 1, false);
 

diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 3153009..44c3381 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h

@@ -65,12 +65,12 @@
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
-  typedef typename internal::remove_all<ActualUType>::type _ActualUType;
+  typedef typename internal::remove_all<ActualUType>::type ActualUType_;
   typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
 
   typedef internal::blas_traits<DerivedV> VBlasTraits;
   typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
-  typedef typename internal::remove_all<ActualVType>::type _ActualVType;
+  typedef typename internal::remove_all<ActualVType>::type ActualVType_;
   typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
 
   // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
@@ -82,8 +82,8 @@
   if (IsRowMajor)
     actualAlpha = numext::conj(actualAlpha);
 
-  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
-  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
+  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), ActualUType_>::type>::type UType;
+  typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), ActualVType_>::type>::type VType;
   internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
     (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
     ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);

diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 2268593..d2dd702 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h

@@ -405,7 +405,7 @@
 {
   typedef typename traits<XprType>::Scalar Scalar;
   typedef const XprType& ExtractType;
-  typedef XprType _ExtractType;
+  typedef XprType ExtractType_;
   enum {
     IsComplex = NumTraits<Scalar>::IsComplex,
     IsTransposed = false,
@@ -418,7 +418,7 @@
   };
   typedef typename conditional<bool(HasUsableDirectAccess),
     ExtractType,
-    typename _ExtractType::PlainObject
+    typename ExtractType_::PlainObject
     >::type DirectLinearAccessType;
   static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
   static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
@@ -500,8 +500,8 @@
   typedef typename NestedXpr::Scalar Scalar;
   typedef blas_traits<NestedXpr> Base;
   typedef Transpose<NestedXpr> XprType;
-  typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
-  typedef Transpose<const typename Base::_ExtractType> _ExtractType;
+  typedef Transpose<const typename Base::ExtractType_>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
+  typedef Transpose<const typename Base::ExtractType_> ExtractType_;
   typedef typename conditional<bool(Base::HasUsableDirectAccess),
     ExtractType,
     typename ExtractType::PlainObject

diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index ba2049b..4ba1370 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h

@@ -420,9 +420,11 @@
   // We can use the optimized fp16 to float and float to fp16 conversion routines
   #define EIGEN_HAS_FP16_C
 
-  #if EIGEN_COMP_CLANG
-    // Workaround for clang: The FP16C intrinsics for clang are included by
-    // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
+  #if EIGEN_COMP_GNUC
+    // Make sure immintrin.h is included, even if e.g. vectorization is
+    // explicitly disabled (see also issue #2395).
+    // Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
+    // as opposed to emmintrin.h as suggested by Intel:
     // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
     #include <immintrin.h>
   #endif

diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index d7e8a13..7e1c76b 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h

@@ -54,7 +54,7 @@
     #pragma clang diagnostic ignored "-Wc11-extensions"
   #endif
 
-#elif defined __GNUC__
+#elif defined __GNUC__ && !defined(__FUJITSU)
 
   #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic push

diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 2f33599..f3df372 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h

@@ -42,6 +42,11 @@
 
 template< typename T> struct evaluator;
 
+template<typename T> struct has_trivially_copyable_storage
+{
+  static const bool value = false;
+};
+
 } // end namespace internal
 
 template<typename T> struct NumTraits;
@@ -78,7 +83,6 @@
 template<typename MatrixType> class Conjugate;
 template<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;
 template<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;
-template<typename ViewOp,    typename MatrixType>         class CwiseUnaryView;
 template<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;
 template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>  class CwiseTernaryOp;
 template<typename Decomposition, typename Rhstype>        class Solve;
@@ -87,7 +91,7 @@
 template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
 
 template<typename Derived> class DiagonalBase;
-template<typename _DiagonalVectorType> class DiagonalWrapper;
+template<typename DiagonalVectorType_> class DiagonalWrapper;
 template<typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
 template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
 template<typename MatrixType, int Index = 0> class Diagonal;
@@ -108,6 +112,7 @@
 template<typename Derived> class RefBase;
 template<typename PlainObjectType, int Options = 0,
          typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+template<typename ViewOp,    typename MatrixType, typename StrideType = Stride<0,0>>         class CwiseUnaryView;
 
 template<typename Derived> class TriangularBase;
 template<typename MatrixType, unsigned int Mode> class TriangularView;
@@ -272,8 +277,8 @@
 template<typename Scalar,int Dim> class AlignedBox;
 template<typename Scalar, int Options = AutoAlign> class Quaternion;
 template<typename Scalar,int Dim,int Mode,int Options_=AutoAlign> class Transform;
-template <typename Scalar_, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
-template <typename Scalar_, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template <typename Scalar_, int AmbientDim_, int Options=AutoAlign> class ParametrizedLine;
+template <typename Scalar_, int AmbientDim_, int Options=AutoAlign> class Hyperplane;
 template<typename Scalar> class UniformScaling;
 template<typename MatrixType,int Direction> class Homogeneous;
 

diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 0aafb9e..d0ac15a 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h

@@ -91,13 +91,20 @@
   #define EIGEN_COMP_LLVM 0
 #endif
 
-/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel icc compiler, 0 otherwise
 #if defined(__INTEL_COMPILER)
   #define EIGEN_COMP_ICC __INTEL_COMPILER
 #else
   #define EIGEN_COMP_ICC 0
 #endif
 
+/// \internal EIGEN_COMP_CLANGICC set to __INTEL_CLANG_COMPILER if the compiler is Intel icx compiler, 0 otherwise
+#if defined(__INTEL_CLANG_COMPILER)
+  #define EIGEN_COMP_CLANGICC __INTEL_CLANG_COMPILER
+#else
+  #define EIGEN_COMP_CLANGICC 0
+#endif
+
 /// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
 #if defined(__MINGW32__)
   #define EIGEN_COMP_MINGW 1
@@ -197,9 +204,45 @@
   #define EIGEN_COMP_EMSCRIPTEN 0
 #endif
 
+/// \internal EIGEN_COMP_FCC set to FCC version if the compiler is Fujitsu Compiler (traditional mode)
+/// \note The Fujitsu C/C++ compiler uses the traditional mode based
+/// on EDG g++ 6.1 by default or if envoked with the -Nnoclang flag
+#if defined(__FUJITSU)
+  #define EIGEN_COMP_FCC (__FCC_major__*100+__FCC_minor__*10+__FCC_patchlevel__)
+#else
+  #define EIGEN_COMP_FCC 0
+#endif
+
+/// \internal EIGEN_COMP_CLANGFCC set to FCC version if the compiler is Fujitsu Compiler (Clang mode)
+/// \note The Fujitsu C/C++ compiler uses the non-traditional mode
+/// based on Clang 7.1.0 if envoked with the -Nclang flag
+#if defined(__CLANG_FUJITSU)
+  #define EIGEN_COMP_CLANGFCC (__FCC_major__*100+__FCC_minor__*10+__FCC_patchlevel__)
+#else
+  #define EIGEN_COMP_CLANGFCC 0
+#endif
+
+/// \internal EIGEN_COMP_CPE set to CPE version if the compiler is HPE Cray Compiler (GCC based)
+/// \note This is the SVE-enabled C/C++ compiler from the HPE Cray
+/// Programming Environment (CPE) based on Cray GCC 8.1
+#if defined(_CRAYC) && !defined(__clang__)
+  #define EIGEN_COMP_CPE (_RELEASE_MAJOR*100+_RELEASE_MINOR*10+_RELEASE_PATCHLEVEL)
+#else
+  #define EIGEN_COMP_CPE 0
+#endif
+
+/// \internal EIGEN_COMP_CLANGCPE set to CPE version if the compiler is HPE Cray Compiler (Clang based)
+/// \note This is the C/C++ compiler from the HPE Cray Programming
+/// Environment (CPE) based on Cray Clang 11.0 without SVE-support
+#if defined(_CRAYC) && defined(__clang__)
+  #define EIGEN_COMP_CLANGCPE (_RELEASE_MAJOR*100+_RELEASE_MINOR*10+_RELEASE_PATCHLEVEL)
+#else
+  #define EIGEN_COMP_CLANGCPE 0
+#endif
+
 
 /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
-#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_CLANGICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN || EIGEN_COMP_FCC || EIGEN_COMP_CLANGFCC || EIGEN_COMP_CPE || EIGEN_COMP_CLANGCPE)
   #define EIGEN_COMP_GNUC_STRICT 1
 #else
   #define EIGEN_COMP_GNUC_STRICT 0
@@ -616,6 +659,7 @@
   #define EIGEN_COMP_CXXVER 03
 #endif
 
+
 // The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features
 // but in practice we should not rely on them but rather on the availability of
 // individual features as defined later.
@@ -1044,6 +1088,16 @@
     }
 #endif
 
+/**
+ * \internal
+ * \brief Macro for conditionally trivial special member functions for supporting trivially copyable types.
+ * This feature is officially known as C++20's P0848R3 and is enabled on supported compilers.
+ */
+#if (EIGEN_COMP_CXXVER >= 20) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MSVC_STRICT)
+#define EIGEN_COMP_HAS_P0848R3 1
+#else
+#define EIGEN_COMP_HAS_P0848R3 0
+#endif
 
 /**
  * \internal

diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index b641f97..c0232c2 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h

@@ -212,20 +212,33 @@
   enum { value = N };
 };
 
+
 /** \internal
-  * Analogue of the std::size free function.
-  * It returns the size of the container or view \a x of type \c T
+  * Analogue of the std::ssize free function.
+  * It returns the signed size of the container or view \a x of type \c T
   *
   * It currently supports:
   *  - any types T defining a member T::size() const
   *  - plain C arrays as T[N]
   *
+  * For C++20, this function just forwards to `std::ssize`, or any ADL discoverable `ssize` function.
   */
-template<typename T>
-EIGEN_CONSTEXPR Index size(const T& x) { return x.size(); }
+#if EIGEN_COMP_CXXVER < 20
+template <typename T>
+EIGEN_CONSTEXPR auto index_list_size(const T& x) {
+  using R = std::common_type_t<std::ptrdiff_t, std::make_signed_t<decltype(x.size())>>;
+  return static_cast<R>(x.size());
+}
 
-template<typename T,std::size_t N>
-EIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; }
+template<typename T, std::ptrdiff_t N>
+EIGEN_CONSTEXPR std::ptrdiff_t index_list_size(const T (&)[N]) { return N; }
+#else
+template <typename T>
+EIGEN_CONSTEXPR auto index_list_size(T&& x) {
+  using std::ssize;
+  return ssize(std::forward<T>(x));
+}
+#endif // EIGEN_COMP_CXXVER
 
 /** \internal
   * Convenient struct to get the result type of a nullary, unary, binary, or
@@ -646,7 +659,7 @@
 
 /// \internal Calculate logical XOR at compile time
 inline constexpr bool logical_xor(bool a, bool b) {
-  return (a || b) && !(a && b);
+  return a != b;
 }
 
 /// \internal Calculate logical IMPLIES at compile time

diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h
index f830de1..7021e6d 100644
--- a/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/Eigen/src/Core/util/ReenableStupidWarnings.h

@@ -12,7 +12,7 @@
     #pragma warning pop
   #elif defined __clang__
     #pragma clang diagnostic pop
-  #elif defined __GNUC__  &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+  #elif defined __GNUC__  &&  !defined(__FUJITSU) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic pop
   #endif
 

diff --git a/Eigen/src/Core/util/Serializer.h b/Eigen/src/Core/util/Serializer.h
index 7ec5eed..b77c5de 100644
--- a/Eigen/src/Core/util/Serializer.h
+++ b/Eigen/src/Core/util/Serializer.h

@@ -45,11 +45,14 @@
   
   /**
    * Serializes a value to a byte buffer.
-   * \param dest the destination buffer.
-   * \param T the value to serialize.
+   * \param dest the destination buffer; if this is nullptr, does nothing.
+   * \param end the end of the destination buffer.
+   * \param value the value to serialize.
    * \return the next memory address past the end of the serialized data.
    */
-  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, const T& value) {
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end, const T& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + sizeof(value) > end)) return nullptr;
     EIGEN_USING_STD(memcpy)
     memcpy(dest, &value, sizeof(value));
     return dest + sizeof(value);
@@ -57,11 +60,14 @@
   
   /**
    * Deserializes a value from a byte buffer.
-   * \param src the source buffer.
+   * \param src the source buffer; if this is nullptr, does nothing.
+   * \param end the end of the source buffer.
    * \param value the value to populate.
-   * \return the next unprocessed memory address.
+   * \return the next unprocessed memory address; nullptr if parsing errors are detected.
    */
-  EIGEN_DEVICE_FUNC uint8_t* deserialize(uint8_t* src, T& value) const {
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, T& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(value) > end)) return nullptr;
     EIGEN_USING_STD(memcpy)
     memcpy(&value, src, sizeof(value));
     return src + sizeof(value);
@@ -84,7 +90,9 @@
     return sizeof(Header) + sizeof(Scalar) * value.size();
   }
   
-  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, const Derived& value) {
+  EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end, const Derived& value) {
+    if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
     const size_t header_bytes = sizeof(Header);
     const size_t data_bytes = sizeof(Scalar) * value.size();
     Header header = {value.rows(), value.cols()};
@@ -95,14 +103,17 @@
     return dest + data_bytes;
   }
   
-  EIGEN_DEVICE_FUNC uint8_t* deserialize(uint8_t* src, Derived& value) const {
+  EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, Derived& value) const {
+    if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+    if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
     const size_t header_bytes = sizeof(Header);
     Header header;
     EIGEN_USING_STD(memcpy)
     memcpy(&header, src, header_bytes);
     src += header_bytes;
-    value.resize(header.rows, header.cols);
     const size_t data_bytes = sizeof(Scalar) * header.rows * header.cols;
+    if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+    value.resize(header.rows, header.cols);
     memcpy(value.data(), src, data_bytes);
     return src + data_bytes;
   }
@@ -134,17 +145,17 @@
   }
   
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  uint8_t* serialize(uint8_t* dest, const T1& value, const Ts&... args) {
+  uint8_t* serialize(uint8_t* dest, uint8_t* end, const T1& value, const Ts&... args) {
     Serializer serializer;
-    dest = serializer.serialize(dest, value);
-    return serialize_impl<N-1, Ts...>::serialize(dest, args...);
+    dest = serializer.serialize(dest, end, value);
+    return serialize_impl<N-1, Ts...>::serialize(dest, end, args...);
   }
   
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  uint8_t* deserialize(uint8_t* src, T1& value, Ts&... args) {
+  const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, T1& value, Ts&... args) {
     Serializer serializer;
-    src = serializer.deserialize(src, value);
-    return serialize_impl<N-1, Ts...>::deserialize(src, args...);
+    src = serializer.deserialize(src, end, value);
+    return serialize_impl<N-1, Ts...>::deserialize(src, end, args...);
   }
 };
 
@@ -155,10 +166,10 @@
   size_t serialize_size() { return 0; }
   
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  uint8_t* serialize(uint8_t* dest) { return dest; }
+  uint8_t* serialize(uint8_t* dest, uint8_t* /*end*/) { return dest; }
   
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  uint8_t* deserialize(uint8_t* src) { return src; }
+  const uint8_t* deserialize(const uint8_t* src, const uint8_t* /*end*/) { return src; }
 };
 
 }  // namespace internal
@@ -179,27 +190,29 @@
 /**
  * Serialize a set of values to the byte buffer.
  * 
- * \param dest output byte buffer.
+ * \param dest output byte buffer; if this is nullptr, does nothing.
+ * \param end the end of the output byte buffer.
  * \param args ... arguments to serialize in sequence.
  * \return the next address after all serialized values.
  */
 template<typename... Args>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-uint8_t* serialize(uint8_t* dest, const Args&... args) {
-  return internal::serialize_impl<sizeof...(args), Args...>::serialize(dest, args...);
+uint8_t* serialize(uint8_t* dest, uint8_t* end, const Args&... args) {
+  return internal::serialize_impl<sizeof...(args), Args...>::serialize(dest, end, args...);
 }
 
 /**
  * Deserialize a set of values from the byte buffer.
  * 
- * \param src input byte buffer.
+ * \param src input byte buffer; if this is nullptr, does nothing.
+ * \param end the end of input byte buffer.
  * \param args ... arguments to deserialize in sequence.
- * \return the next address after all parsed values.
+ * \return the next address after all parsed values; nullptr if parsing errors are detected.
  */
 template<typename... Args>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-uint8_t* deserialize(uint8_t* src, Args&... args) {
-  return internal::serialize_impl<sizeof...(args), Args...>::deserialize(src, args...);
+const uint8_t* deserialize(const uint8_t* src, const uint8_t* end, Args&... args) {
+  return internal::serialize_impl<sizeof...(args), Args...>::deserialize(src, end, args...);
 }
 
 }  // namespace Eigen

diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index a1314e9..73bad6c 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h

@@ -180,32 +180,8 @@
 
 template<typename T> struct unpacket_traits;
 
-// If we vectorize regardless of alignment, pick the full-sized packet if:
-//
-// * The size is large enough;
-// * Picking it will result in less operations than picking the half size.
-//   Consider the case where the size is 12, the full packet is 8, and the
-//   half packet is 4. If we pick the full packet we'd have 1 + 4 operations,
-//   but only 3 operations if we pick the half-packet.
-//
-// The reason why we only do this with EIGEN_UNALIGNED_VECTORIZE is that if
-// we chose packets which do not divide the data size exactly we're going to
-// be left with some possibly unaligned data at the end.
-#if EIGEN_UNALIGNED_VECTORIZE
-template<int Size, typename PacketType,
-         bool Stop =
-           Size==Dynamic ||
-           (Size >= unpacket_traits<PacketType>::size &&
-             // If the packet size is 1 we're always good -- it will always divide things perfectly.
-             // We have this check since otherwise 1/2 would be 0 in the division below.
-             (unpacket_traits<PacketType>::size == 1 ||
-               (Size/unpacket_traits<PacketType>::size + Size%unpacket_traits<PacketType>::size) <=
-               (Size/(unpacket_traits<PacketType>::size/2) + Size%(unpacket_traits<PacketType>::size/2)))) ||
-           is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
-#else
 template<int Size, typename PacketType,
          bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
-#endif
 struct find_best_packet_helper;
 
 template< int Size, typename PacketType>
@@ -527,8 +503,8 @@
 template<typename XprType, typename CastType> struct cast_return_type
 {
   typedef typename XprType::Scalar CurrentScalarType;
-  typedef typename remove_all<CastType>::type _CastType;
-  typedef typename _CastType::Scalar NewScalarType;
+  typedef typename remove_all<CastType>::type CastType_;
+  typedef typename CastType_::Scalar NewScalarType;
   typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
                               const XprType&,CastType>::type type;
 };
commit	e783d933fde3ec0640a5bd254ddbb3ae5d23a726	[log] [tgz]
author	Rasmus Munk Larsen <rmlarsen@google.com>	Thu Jan 20 14:51:11 2022 -0800
committer	Antonio Sanchez <cantonios@google.com>	Mon Sep 26 15:57:10 2022 -0700
tree	825873f71c9b7e7666a4b19b93dacd1120153ddc
parent	51987b83142c8ddc4e6cfd6ef3c14dc4bab140e2 [diff]