Update Eigen to commit:b3bf8d6a13585ff248c079402654647d298de60b

CHANGELOG
=========
b3bf8d6a1 - Try to reduce size of GEBP kernel for non-ARM targets.
13b69fc1b - Try to reduce compilation time/memory for GEBP kernel using EIGEN_IF_CONSTEXPR
3c4637640 - Remove unused typedef.
ed8cda3ce - Move EIGEN_NEON_GEBP_NR macro to the right place in GeneralBlockPanelKernel.h
e2ea86651 - Add a macro to set the nr trait in the BEBP kernel for NEON.
23299632c - Use 3px8/2px8/1px8/1x8 gebp_kernel on arm64-neon
7b2901e2a - Add vectorized integer division for int32 with AVX512, AVX or SSE.
5ffe7b92e - [ROCm] fixed gpuGetDevice unused message
f913a4067 - Revert "Add AVX int32_t pdiv"
273e0c884 - Revert "Add constexpr, test for C++14 constexpr."
ea84e7ad6 - Add AVX int32_t pdiv

PiperOrigin-RevId: 477587641
Change-Id: Ia98922011c3750ff540a831d4a5cb61cda8fc2fc
diff --git a/Eigen/Core b/Eigen/Core
index 623d735..48c2121 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -177,6 +177,10 @@
 #include "src/Core/arch/Default/TypeCasting.h"
 #include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
 
+#ifndef EIGEN_GPU_COMPILE_PHASE
+  #include <csignal>
+#endif
+
 #if defined EIGEN_VECTORIZE_AVX512
   #if defined EIGEN_VECTORIZE_AVX512FP16
     #include "src/Core/arch/AVX512/PacketMathFP16.h"
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index ee82214..7be8971 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -130,7 +130,7 @@
       *
       * \sa resize(Index,Index)
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array() : Base()
     {
       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 70be252..28397e5 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -148,7 +148,7 @@
       * \sa MatrixBase::array() */
     EIGEN_DEVICE_FUNC
     MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
 
 //     template<typename Dest>
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 34d4963..e65b8fb 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -59,7 +59,7 @@
 
     using Base::coeffRef;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
@@ -89,10 +89,10 @@
     }
 
     template<typename Dest>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline void evalTo(Dest& dst) const { dst = m_expression; }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const internal::remove_all_t<NestedExpressionType>&
     nestedExpression() const
     {
@@ -157,7 +157,7 @@
 
     using Base::coeffRef;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
@@ -186,7 +186,7 @@
       return m_expression.coeffRef(index);
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const internal::remove_all_t<NestedExpressionType>&
     nestedExpression() const
     {
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 7f988ec..dc716d3 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -63,7 +63,7 @@
 template<typename Derived>
 template <typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
   internal::call_assignment(derived(), other.derived());
   return derived();
@@ -71,7 +71,7 @@
 
 template<typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
 {
   internal::call_assignment(derived(), other.derived());
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index fe548b5..8fb1f81 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -208,7 +208,7 @@
     inner = Index % DstXprType::InnerSizeAtCompileTime
   };
 
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     kernel.assignCoeffByOuterInner(outer, inner);
     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
@@ -224,7 +224,7 @@
 template<typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel, Index outer)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
   {
     kernel.assignCoeffByOuterInner(outer, Index_);
     copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
@@ -234,7 +234,7 @@
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&, Index) { }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
 };
 
 /***********************
@@ -276,7 +276,7 @@
     DstAlignment = Kernel::AssignmentTraits::DstAlignment
   };
 
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
     enum { NextIndex = Index + unpacket_traits<PacketType>::size };
@@ -341,7 +341,7 @@
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
   {
     for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
       for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
@@ -354,7 +354,7 @@
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -364,7 +364,7 @@
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
 
@@ -484,7 +484,7 @@
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
@@ -494,7 +494,7 @@
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
   {
     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
     typedef typename Kernel::AssignmentTraits Traits;
@@ -630,7 +630,7 @@
   typedef typename AssignmentTraits::PacketType PacketType;
 
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
     : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
   {
@@ -650,19 +650,19 @@
   EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
 
   /// Assign src(row,col) to dst(row,col) through the assignment functor.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void assignCoeff(Index row, Index col)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
   {
     m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
   }
 
   /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void assignCoeff(Index index)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
   {
     m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
   }
 
   /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void assignCoeffByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
   {
     Index row = rowIndexByOuterInner(outer, inner);
     Index col = colIndexByOuterInner(outer, inner);
@@ -671,7 +671,7 @@
 
 
   template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void assignPacket(Index row, Index col)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
   {
     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
   }
@@ -683,14 +683,14 @@
   }
 
   template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void assignPacketByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
   {
     Index row = rowIndexByOuterInner(outer, inner);
     Index col = colIndexByOuterInner(outer, inner);
     assignPacket<StoreMode,LoadMode,PacketType>(row, col);
   }
 
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowIndexByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
   {
     typedef typename DstEvaluatorType::ExpressionTraits Traits;
     return int(Traits::RowsAtCompileTime) == 1 ? 0
@@ -699,7 +699,7 @@
       : inner;
   }
 
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index colIndexByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
   {
     typedef typename DstEvaluatorType::ExpressionTraits Traits;
     return int(Traits::ColsAtCompileTime) == 1 ? 0
@@ -747,7 +747,7 @@
 ***************************************************************************/
 
 template<typename DstXprType,typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
 {
   EIGEN_ONLY_USED_FOR_DEBUG(dst);
@@ -756,7 +756,7 @@
 }
 
 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
 {
   Index dstRows = src.rows();
@@ -831,13 +831,13 @@
 // does not has to bother about these annoying details.
 
 template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src)
 {
   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(const Dst& dst, const Src& src)
 {
   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
@@ -853,7 +853,7 @@
 }
 
 template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0)
 {
   call_assignment_no_alias(dst, src, func);
@@ -937,7 +937,7 @@
 }
 
 // forward declaration
-template<typename Dst, typename Src> EIGEN_CONSTEXPR void check_for_aliasing(const Dst &dst, const Src &src);
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
 
 // Generic Dense to Dense assignment
 // Note that the last template argument "Weak" is needed to make it possible to perform
@@ -945,7 +945,7 @@
 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
   {
 #ifndef EIGEN_NO_DEBUG
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 24b96c7..19c4b68 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -116,7 +116,7 @@
 
     /** Column or Row constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Block(XprType& xpr, Index i) : Impl(xpr,i)
     {
       eigen_assert( (i>=0) && (
@@ -126,7 +126,7 @@
 
     /** Fixed-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Block(XprType& xpr, Index startRow, Index startCol)
       : Impl(xpr, startRow, startCol)
     {
@@ -137,7 +137,7 @@
 
     /** Dynamic-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Block(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
@@ -161,9 +161,9 @@
   public:
     typedef Impl Base;
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
       : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
 };
@@ -186,7 +186,7 @@
 
     /** Column or Row constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index i)
       : m_xpr(xpr),
         // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
@@ -201,7 +201,7 @@
 
     /** Fixed-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
       : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
                     m_blockRows(BlockRows), m_blockCols(BlockCols)
@@ -209,7 +209,7 @@
 
     /** Dynamic-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
@@ -217,8 +217,8 @@
                     m_blockRows(blockRows), m_blockCols(blockCols)
     {}
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const { return m_blockRows.value(); }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const { return m_blockCols.value(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
 
     EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId)
@@ -296,7 +296,7 @@
     EIGEN_DEVICE_FUNC inline Index outerStride() const;
     #endif
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const internal::remove_all_t<XprTypeNested>& nestedExpression() const
     {
       return m_xpr;
@@ -344,7 +344,7 @@
 
     /** Column or Row constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr, Index i)
       : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
                                 || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
@@ -352,37 +352,31 @@
              BlockCols==1 ? 1 : xpr.cols()),
         m_xpr(xpr),
         m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
-        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
-        m_outerStride(internal::traits<BlockType>::HasSameStorageOrderAsXprType
-                    ? m_xpr.outerStride()
-                    : m_xpr.innerStride())
+        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
     {
+      init();
     }
 
     /** Fixed-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
       : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
-        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
-        m_outerStride(internal::traits<BlockType>::HasSameStorageOrderAsXprType
-                    ? m_xpr.outerStride()
-                    : m_xpr.innerStride())
+        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
     {
+      init();
     }
 
     /** Dynamic-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
       : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
-        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
-        m_outerStride(internal::traits<BlockType>::HasSameStorageOrderAsXprType
-                    ? m_xpr.outerStride()
-                    : m_xpr.innerStride())
+        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
     {
+      init();
     }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -428,15 +422,21 @@
     /** \internal used by allowAligned() */
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
-      : Base(data, blockRows, blockCols), m_xpr(xpr),
-        m_outerStride(internal::traits<BlockType>::HasSameStorageOrderAsXprType
-                    ? m_xpr.outerStride()
-                    : m_xpr.innerStride())
+      : Base(data, blockRows, blockCols), m_xpr(xpr)
     {
+      init();
     }
     #endif
 
   protected:
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    void init()
+    {
+      m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+                    ? m_xpr.outerStride()
+                    : m_xpr.innerStride();
+    }
+
     XprTypeNested m_xpr;
     const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
     const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index 80d96ba..20e5bd9 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -25,7 +25,7 @@
     j = (UnrollCount-1) % InnerSize
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived &mat)
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
   {
     return all_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) && mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
   }
@@ -34,13 +34,13 @@
 template<typename Derived, int InnerSize>
 struct all_unroller<Derived, 0, InnerSize>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived &/*mat*/) { return true; }
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
 };
 
 template<typename Derived, int InnerSize>
 struct all_unroller<Derived, Dynamic, InnerSize>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived &) { return false; }
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
 };
 
 template<typename Derived, int UnrollCount, int InnerSize>
@@ -52,7 +52,7 @@
     j = (UnrollCount-1) % InnerSize
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived &mat)
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
   {
     return any_unroller<Derived, UnrollCount-1, InnerSize>::run(mat) || mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i);
   }
@@ -61,13 +61,13 @@
 template<typename Derived, int InnerSize>
 struct any_unroller<Derived, 0, InnerSize>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived & /*mat*/) { return false; }
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
 };
 
 template<typename Derived, int InnerSize>
 struct any_unroller<Derived, Dynamic, InnerSize>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline bool run(const Derived &) { return false; }
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
 };
 
 } // end namespace internal
@@ -80,7 +80,7 @@
   * \sa any(), Cwise::operator<()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool DenseBase<Derived>::all() const
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
 {
   typedef internal::evaluator<Derived> Evaluator;
   enum {
@@ -104,7 +104,7 @@
   * \sa all()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool DenseBase<Derived>::any() const
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
 {
   typedef internal::evaluator<Derived> Evaluator;
   enum {
@@ -128,7 +128,7 @@
   * \sa all(), any()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Eigen::Index DenseBase<Derived>::count() const
+EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
 {
   return derived().template cast<bool>().template cast<Index>().sum();
 }
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 4022d7b..1729507 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -92,7 +92,7 @@
 struct evaluator : public unary_evaluator<T>
 {
   typedef unary_evaluator<T> Base;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const T& xpr) : Base(xpr) {}
 };
 
@@ -102,7 +102,7 @@
 struct evaluator<const T>
   : evaluator<T>
 {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
 };
 
@@ -120,10 +120,8 @@
   // noncopyable:
   // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization)
   // and make complex evaluator much larger than then should do.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR evaluator_base() = default;
-#if defined(EIGEN_GPUCC)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() = default;
-#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {}
 private:
   EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&);
   EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&);
@@ -139,7 +137,7 @@
 // this helper permits to completely eliminate m_outerStride if it is known at compiletime.
 template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data {
 public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr)
   {
 #ifndef EIGEN_INTERNAL_DEBUGGING
@@ -154,10 +152,9 @@
 
 template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> {
 public:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
-  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride)
-    : data(ptr), m_outerStride(outerStride) {}
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Index outerStride() const { return m_outerStride; }
   const Scalar *data;
 protected:
@@ -189,21 +186,21 @@
                                                                         : RowsAtCompileTime
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   evaluator()
     : m_d(0,OuterStrideAtCompileTime)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const PlainObjectType& m)
-    : m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride())
+    : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride())
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     if (IsRowMajor)
@@ -212,13 +209,13 @@
       return m_d.data[row + col * m_d.outerStride()];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_d.data[index];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     if (IsRowMajor)
@@ -227,7 +224,7 @@
       return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
     return const_cast<Scalar*>(m_d.data)[index];
@@ -280,10 +277,10 @@
 {
   typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   evaluator() {}
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& m)
     : evaluator<PlainObjectBase<XprType> >(m)
   { }
@@ -295,10 +292,10 @@
 {
   typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   evaluator() {}
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& m)
     : evaluator<PlainObjectBase<XprType> >(m)
   { }
@@ -318,31 +315,31 @@
     Alignment = evaluator<ArgType>::Alignment
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
 
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_argImpl.coeff(col, row);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_argImpl.coeff(index);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     return m_argImpl.coeffRef(col, row);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   typename XprType::Scalar& coeffRef(Index index)
   {
     return m_argImpl.coeffRef(index);
@@ -402,7 +399,7 @@
 template<typename Scalar,typename NullaryOp>
 struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
 {
-  template <typename IndexType> EIGEN_CONSTEXPR
+  template <typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
   template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
 };
@@ -411,7 +408,7 @@
 struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
 {
   template <typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
   template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
 };
 
@@ -516,7 +513,7 @@
     Alignment = AlignedMax
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit evaluator(const XprType& n)
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
     : m_functor(n.functor()), m_wrapper()
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -525,14 +522,14 @@
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
   template <typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(IndexType row, IndexType col) const
   {
     return m_wrapper(m_functor, row, col);
   }
 
   template <typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(IndexType index) const
   {
     return m_wrapper(m_functor,index);
@@ -573,7 +570,7 @@
     Alignment = evaluator<ArgType>::Alignment
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit unary_evaluator(const XprType& op) : m_d(op)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
@@ -582,13 +579,13 @@
 
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_d.func()(m_d.argImpl.coeff(row, col));
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_d.func()(m_d.argImpl.coeff(index));
@@ -613,9 +610,9 @@
   // this helper permits to completely eliminate the functor if it is empty
   struct Data
   {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const UnaryOp& func() const { return op; }
     UnaryOp op;
     evaluator<ArgType> argImpl;
@@ -730,7 +727,7 @@
   typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
   typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
 
@@ -759,7 +756,7 @@
     Alignment = plain_enum_min(evaluator<Lhs>::Alignment, evaluator<Rhs>::Alignment)
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit binary_evaluator(const XprType& xpr) : m_d(xpr)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
@@ -768,13 +765,13 @@
 
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col));
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index));
@@ -801,9 +798,9 @@
   // this helper permits to completely eliminate the functor if it is empty
   struct Data
   {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const BinaryOp& func() const { return op; }
     BinaryOp op;
     evaluator<Lhs> lhsImpl;
@@ -856,7 +853,7 @@
     return m_d.func()(m_d.argImpl.coeffRef(row, col));
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
     return m_d.func()(m_d.argImpl.coeffRef(index));
@@ -867,9 +864,9 @@
   // this helper permits to completely eliminate the functor if it is empty
   struct Data
   {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const UnaryOp& func() const { return op; }
     UnaryOp op;
     evaluator<ArgType> argImpl;
@@ -899,7 +896,7 @@
     CoeffReadCost = NumTraits<Scalar>::ReadCost
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit mapbase_evaluator(const XprType& map)
     : m_data(const_cast<PointerType>(map.data())),
       m_innerStride(map.innerStride()),
@@ -911,32 +908,32 @@
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_data[col * colStride() + row * rowStride()];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_data[index * m_innerStride.value()];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     return m_data[col * colStride() + row * rowStride()];
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
     return m_data[index * m_innerStride.value()];
   }
 
   template<int LoadMode, typename PacketType>
-  EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
     PointerType ptr = m_data + row * rowStride() + col * colStride();
@@ -1007,7 +1004,7 @@
     Alignment = int(MapOptions)&int(AlignedMask)
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit evaluator(const XprType& map)
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
     : mapbase_evaluator<XprType, PlainObjectType>(map)
   { }
 };
@@ -1025,7 +1022,7 @@
     Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& ref)
     : mapbase_evaluator<XprType, PlainObjectType>(ref)
   { }
@@ -1081,7 +1078,7 @@
     Alignment = plain_enum_min(evaluator<ArgType>::Alignment, Alignment0)
   };
   typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& block) : block_evaluator_type(block)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -1095,7 +1092,7 @@
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit block_evaluator(const XprType& block)
     : unary_evaluator<XprType>(block)
   {}
@@ -1107,7 +1104,7 @@
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit unary_evaluator(const XprType& block)
     : m_argImpl(block.nestedExpression()),
       m_startRow(block.startRow()),
@@ -1123,13 +1120,13 @@
     ForwardLinearAccess = (InnerPanel || int(XprType::IsRowMajor)==int(ArgType::IsRowMajor)) && bool(evaluator<ArgType>::Flags&LinearAccessBit)
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return linear_coeff_impl(index, bool_constant<ForwardLinearAccess>());
@@ -1185,12 +1182,12 @@
   }
 
 protected:
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const
   {
     return m_argImpl.coeff(m_linear_offset.value() + index);
   }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const
   {
     return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
@@ -1224,7 +1221,7 @@
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
   typedef typename XprType::Scalar Scalar;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit block_evaluator(const XprType& block)
     : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
   {
@@ -1392,25 +1389,25 @@
     Alignment = evaluator<ArgType>::Alignment
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
 
   typedef typename ArgType::Scalar Scalar;
   typedef typename ArgType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_argImpl.coeff(row, col);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_argImpl.coeff(index);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     return m_argImpl.coeffRef(row, col);
@@ -1460,7 +1457,7 @@
 {
   typedef MatrixWrapper<TArgType> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit unary_evaluator(const XprType& wrapper)
     : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
   { }
@@ -1472,7 +1469,7 @@
 {
   typedef ArrayWrapper<TArgType> XprType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit unary_evaluator(const XprType& wrapper)
     : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
   { }
@@ -1623,7 +1620,7 @@
     Alignment = 0
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit evaluator(const XprType& diagonal)
     : m_argImpl(diagonal.nestedExpression()),
       m_index(diagonal.index())
@@ -1632,13 +1629,13 @@
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index) const
   {
     return m_argImpl.coeff(row + rowOffset(), row + colOffset());
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
     return m_argImpl.coeff(index + rowOffset(), index + colOffset());
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 0710560..21a061a 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -111,7 +111,7 @@
     CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
 #endif
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
       : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
     {
@@ -130,13 +130,13 @@
     }
 
     /** \returns the left hand side nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const LhsNested_& lhs() const { return m_lhs; }
     /** \returns the right hand side nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const RhsNested_& rhs() const { return m_rhs; }
     /** \returns the functor representing the binary operation */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const BinaryOp& functor() const { return m_functor; }
 
   protected:
@@ -173,7 +173,7 @@
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived &
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
   call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index c509726..a62f54d 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -66,7 +66,7 @@
     typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
       : m_rows(rows), m_cols(cols), m_functor(func)
     {
@@ -82,7 +82,7 @@
     Index cols() const { return m_cols.value(); }
 
     /** \returns the functor representing the nullary operation */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const NullaryOp& functor() const { return m_functor; }
 
   protected:
@@ -107,7 +107,7 @@
   */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 const CwiseNullaryOp<CustomNullaryOp,typename DenseBase<Derived>::PlainObject>
 #else
@@ -138,7 +138,7 @@
   */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 #else
@@ -162,7 +162,7 @@
   */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 #else
@@ -187,7 +187,7 @@
   * \sa class CwiseNullaryOp
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
 {
   return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
@@ -209,7 +209,7 @@
   * \sa class CwiseNullaryOp
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(Index size, const Scalar& value)
 {
   return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
@@ -225,7 +225,7 @@
   * \sa class CwiseNullaryOp
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(const Scalar& value)
 {
   EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@@ -334,7 +334,7 @@
   * \sa setConstant(), Constant(), class CwiseNullaryOp
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void DenseBase<Derived>::fill(const Scalar& val)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
 {
   setConstant(val);
 }
@@ -344,7 +344,7 @@
   * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
 {
   return derived() = Constant(rows(), cols(), val);
 }
@@ -472,7 +472,7 @@
   * \sa Zero(), Zero(Index)
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero(Index rows, Index cols)
 {
   return Constant(rows, cols, Scalar(0));
@@ -495,7 +495,7 @@
   * \sa Zero(), Zero(Index,Index)
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero(Index size)
 {
   return Constant(size, Scalar(0));
@@ -512,7 +512,7 @@
   * \sa Zero(Index), Zero(Index,Index)
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero()
 {
   return Constant(Scalar(0));
@@ -628,7 +628,7 @@
   * \sa Ones(), Ones(Index), isOnes(), class Ones
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones(Index rows, Index cols)
 {
   return Constant(rows, cols, Scalar(1));
@@ -651,7 +651,7 @@
   * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones(Index newSize)
 {
   return Constant(newSize, Scalar(1));
@@ -668,7 +668,7 @@
   * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones()
 {
   return Constant(Scalar(1));
@@ -780,7 +780,7 @@
   * \sa Identity(), setIdentity(), isIdentity()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity(Index rows, Index cols)
 {
   return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
@@ -797,7 +797,7 @@
   * \sa Identity(Index,Index), setIdentity(), isIdentity()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE  EIGEN_CONSTEXPR const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity()
 {
   EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@@ -901,7 +901,7 @@
   * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
@@ -916,7 +916,7 @@
   * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return BasisReturnType(SquareMatrixType::Identity(),i);
@@ -929,7 +929,7 @@
   * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
 { return Derived::Unit(0); }
 
 /** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@@ -939,7 +939,7 @@
   * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
 { return Derived::Unit(1); }
 
 /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@@ -949,7 +949,7 @@
   * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
 { return Derived::Unit(2); }
 
 /** \returns an expression of the W axis unit vector (0,0,0,1)
@@ -959,7 +959,7 @@
   * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
 { return Derived::Unit(3); }
 
 /** \brief Set the coefficients of \c *this to the i-th unit (basis) vector
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index 0900ab1..ff7d0b9 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -63,7 +63,7 @@
     typedef typename internal::ref_selector<XprType>::type XprTypeNested;
     typedef internal::remove_all_t<XprType> NestedExpression;
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
       : m_xpr(xpr), m_functor(func) {}
 
@@ -73,16 +73,16 @@
     Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); }
 
     /** \returns the functor representing the unary operation */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const UnaryOp& functor() const { return m_functor; }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const internal::remove_all_t<XprTypeNested>&
     nestedExpression() const { return m_xpr; }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     internal::remove_all_t<XprTypeNested>&
     nestedExpression() { return m_xpr; }
 
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index a5b590a..6e17779 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -311,19 +311,19 @@
     CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
 
     typedef Transpose<Derived> TransposeReturnType;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     TransposeReturnType transpose();
     typedef Transpose<const Derived> ConstTransposeReturnType;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const ConstTransposeReturnType transpose() const;
     EIGEN_DEVICE_FUNC
     void transposeInPlace();
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(Index rows, Index cols, const Scalar& value);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(Index size, const Scalar& value);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(const Scalar& value);
 
     EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
@@ -336,25 +336,25 @@
     EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
     LinSpaced(const Scalar& low, const Scalar& high);
 
-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
     NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
     NullaryExpr(Index size, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
     NullaryExpr(const CustomNullaryOp& func);
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Zero(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Zero(Index size);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Zero();
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Ones(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Ones(Index size);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const ConstantReturnType Ones();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void fill(const Scalar& value);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Derived& setConstant(const Scalar& value);
+    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
     EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
     EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
     EIGEN_DEVICE_FUNC Derived& setZero();
@@ -392,7 +392,7 @@
       *
       * \warning Be careful with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE EvalReturnType eval() const
     {
       // Even though MSVC does not honor strong inlining when the return type
@@ -417,7 +417,7 @@
       *
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     void swap(PlainObjectBase<OtherDerived>& other)
     {
       eigen_assert(rows()==other.rows() && cols()==other.cols());
@@ -432,26 +432,26 @@
     template<bool Enable> EIGEN_DEVICE_FUNC
     inline std::conditional_t<Enable,ForceAlignedAccess<Derived>,Derived&> forceAlignedAccessIf();
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Scalar sum() const;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Scalar mean() const;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Scalar trace() const;
+    EIGEN_DEVICE_FUNC Scalar sum() const;
+    EIGEN_DEVICE_FUNC Scalar mean() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Scalar prod() const;
+    EIGEN_DEVICE_FUNC Scalar prod() const;
 
     template<int NaNPropagation>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar minCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
     template<int NaNPropagation>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar maxCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
 
 
     // By default, the fastest version with undefined NaN propagation semantics is
     // used.
     // TODO(rmlarsen): Replace with default template argument when we move to
     // c++11 or beyond.
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline typename internal::traits<Derived>::Scalar minCoeff() const {
+    EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar minCoeff() const {
       return minCoeff<PropagateFast>();
     }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline typename internal::traits<Derived>::Scalar maxCoeff() const {
+    EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar maxCoeff() const {
       return maxCoeff<PropagateFast>();
     }
 
@@ -491,7 +491,7 @@
     }
   
     template<typename BinaryOp>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Scalar redux(const BinaryOp& func) const;
 
     template<typename Visitor>
@@ -519,9 +519,9 @@
       return derived().coeff(0,0);
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all() const;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool any() const;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index count() const;
+    EIGEN_DEVICE_FUNC bool all() const;
+    EIGEN_DEVICE_FUNC bool any() const;
+    EIGEN_DEVICE_FUNC Index count() const;
 
     typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
     typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
@@ -536,10 +536,10 @@
     * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
     */
     //Code moved here due to a CUDA compiler bug
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline ConstRowwiseReturnType rowwise() const {
+    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
       return ConstRowwiseReturnType(derived());
     }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR RowwiseReturnType rowwise();
+    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
 
     /** \returns a VectorwiseOp wrapper of *this broadcasting and partial reductions
     *
@@ -548,10 +548,10 @@
     *
     * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
     */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline ConstColwiseReturnType colwise() const {
+    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
       return ConstColwiseReturnType(derived());
     }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR ColwiseReturnType colwise();
+    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
 
     typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
     static const RandomReturnType Random(Index rows, Index cols);
@@ -661,7 +661,7 @@
   protected:
     EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
     /** Default constructor. Do nothing. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseBase()
+    EIGEN_DEVICE_FUNC DenseBase()
     {
       /* Just checks for self-consistency of the flags.
        * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 9608bbb..7f0bcf4 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -95,7 +95,7 @@
       *
       * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
     {
       eigen_internal_assert(row >= 0 && row < rows()
@@ -114,7 +114,7 @@
       *
       * \sa operator()(Index,Index), operator[](Index)
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
     {
       eigen_assert(row >= 0 && row < rows()
@@ -137,7 +137,7 @@
       * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     coeff(Index index) const
     {
@@ -156,7 +156,7 @@
       * z() const, w() const
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     operator[](Index index) const
     {
@@ -176,7 +176,7 @@
       * z() const, w() const
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     operator()(Index index) const
     {
@@ -186,13 +186,13 @@
 
     /** equivalent to operator[](0).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     x() const { return (*this)[0]; }
 
     /** equivalent to operator[](1).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     y() const
     {
@@ -202,7 +202,7 @@
 
     /** equivalent to operator[](2).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     z() const
     {
@@ -212,7 +212,7 @@
 
     /** equivalent to operator[](3).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     w() const
     {
@@ -348,7 +348,7 @@
       return internal::evaluator<Derived>(derived()).coeffRef(row,col);
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     coeffRefByOuterInner(Index outer, Index inner)
     {
@@ -361,7 +361,7 @@
       * \sa operator[](Index)
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator()(Index row, Index col)
     {
@@ -386,7 +386,7 @@
       * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     coeffRef(Index index)
     {
@@ -403,7 +403,7 @@
       * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator[](Index index)
     {
@@ -422,7 +422,7 @@
       * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
       */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator()(Index index)
     {
@@ -432,13 +432,13 @@
 
     /** equivalent to operator[](0).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     x() { return (*this)[0]; }
 
     /** equivalent to operator[](1).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     y()
     {
@@ -448,7 +448,7 @@
 
     /** equivalent to operator[](2).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     z()
     {
@@ -458,7 +458,7 @@
 
     /** equivalent to operator[](3).  */
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     w()
     {
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 61c4862..5e2763e 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -27,7 +27,7 @@
 struct constructor_without_unaligned_array_assert {};
 
 template<typename T, int Size>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 void check_static_allocation_size()
 {
   // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
@@ -87,7 +87,7 @@
 {
   EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   plain_array()
   {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
@@ -106,7 +106,7 @@
 {
   EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   plain_array()
   {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
@@ -125,7 +125,7 @@
 {
   EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   plain_array()
   {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
@@ -144,7 +144,7 @@
 {
   EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   plain_array()
   {
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
@@ -162,7 +162,7 @@
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
   T array[1];
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR plain_array() = default;
+  EIGEN_DEVICE_FUNC plain_array() {}
   EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
 };
 
@@ -211,7 +211,7 @@
 {
     internal::plain_array<T,Size,Options_> m_data;
   public:
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage() {
+    EIGEN_DEVICE_FUNC DenseStorage() {
       EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
     }
     EIGEN_DEVICE_FUNC
@@ -241,27 +241,27 @@
     EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
     EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
     EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 
 // null matrix
 template<typename T, int Rows_, int Cols_, int Options_> class DenseStorage<T, 0, Rows_, Cols_, Options_>
 {
   public:
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage() {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage(const DenseStorage&) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage& operator=(const DenseStorage&) { return *this; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void swap(DenseStorage& ) {}
+    EIGEN_DEVICE_FUNC DenseStorage() {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
     EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
     EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const T *data() const { return 0; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR T *data() { return 0; }
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+    EIGEN_DEVICE_FUNC T *data() { return 0; }
 };
 
 // more specializations for null matrices; these are necessary to resolve ambiguities
@@ -320,10 +320,10 @@
     internal::plain_array<T,Size,Options_> m_data;
     Index m_rows;
   public:
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage() : m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
     EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR DenseStorage(const DenseStorage& other)
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
       : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
     {
       internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
@@ -344,12 +344,12 @@
       internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_);
       numext::swap(m_rows, other.m_rows);
     }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
+    EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return Cols_;}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void resize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 
 // dynamic-size matrix with fixed-size storage and fixed height
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index ce4d259..4af17dd 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -71,7 +71,7 @@
     typedef typename internal::dense_xpr_base<Diagonal>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
     {
       eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
@@ -79,7 +79,7 @@
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline Index rows() const
     {
       return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
@@ -146,14 +146,14 @@
       return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline const internal::remove_all_t<typename MatrixType::Nested>&
     nestedExpression() const
     {
       return m_matrix;
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline Index index() const
     {
       return m_index.value();
@@ -185,7 +185,7 @@
   *
   * \sa class Diagonal */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline typename MatrixBase<Derived>::DiagonalReturnType
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType
 MatrixBase<Derived>::diagonal()
 {
   return DiagonalReturnType(derived());
@@ -193,7 +193,7 @@
 
 /** This is the const version of diagonal(). */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline
+EIGEN_DEVICE_FUNC inline
 const typename MatrixBase<Derived>::ConstDiagonalReturnType
 MatrixBase<Derived>::diagonal() const
 {
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index c48db0b..0c13192 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -28,7 +28,7 @@
   typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
   typedef typename conj_prod::result_type ResScalar;
   EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_STRONG_INLINE
   static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.template binaryExpr<conj_prod>(b).sum();
@@ -41,7 +41,7 @@
   typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
   typedef typename conj_prod::result_type ResScalar;
   EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_STRONG_INLINE
   static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.transpose().template binaryExpr<conj_prod>(b).sum();
@@ -64,7 +64,7 @@
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_STRONG_INLINE
 typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
@@ -90,7 +90,7 @@
   * \sa dot(), norm(), lpNorm()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
   return numext::real((*this).cwiseAbs2().sum());
 }
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 01a25bc..105488d 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -44,14 +44,14 @@
   typedef typename internal::traits<Derived>::StorageKind StorageKind;
 
   /** \returns a reference to the derived object */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   Derived& derived() { return *static_cast<Derived*>(this); }
   /** \returns a const reference to the derived object */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  Derived& const_cast_derived() const
+  EIGEN_DEVICE_FUNC
+  inline Derived& const_cast_derived() const
   { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
   EIGEN_DEVICE_FUNC
   inline const Derived& const_derived() const
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 37e9117..661a3c4 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -396,7 +396,7 @@
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 const Product<Derived, OtherDerived>
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
@@ -439,7 +439,7 @@
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 const Product<Derived,OtherDerived,LazyProduct>
 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index cbb64d9..b67c4ed 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -214,10 +214,10 @@
 preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
 
 /** \internal \returns a + b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 padd(const Packet& a, const Packet& b) { return a+b; }
 // Avoid compiler warning for boolean algebra.
-template<> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool
+template<> EIGEN_DEVICE_FUNC inline bool
 padd(const bool& a, const bool& b) { return a || b; }
 
 /** \internal \returns a packet version of \a *from, (un-aligned masked add)
@@ -230,14 +230,14 @@
 
 
 /** \internal \returns a - b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 psub(const Packet& a, const Packet& b) { return a-b; }
 
 /** \internal \returns -a (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pnegate(const Packet& a) { return -a; }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool
+template<> EIGEN_DEVICE_FUNC inline bool
 pnegate(const bool& a) { return !a; }
 
 /** \internal \returns conj(a) (coeff-wise) */
@@ -245,14 +245,14 @@
 pconj(const Packet& a) { return numext::conj(a); }
 
 /** \internal \returns a * b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmul(const Packet& a, const Packet& b) { return a*b; }
 // Avoid compiler warning for boolean algebra.
-template<> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool
+template<> EIGEN_DEVICE_FUNC inline bool
 pmul(const bool& a, const bool& b) { return a && b; }
 
 /** \internal \returns a / b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pdiv(const Packet& a, const Packet& b) { return a/b; }
 
 // In the generic case, memset to all one bits.
@@ -475,7 +475,7 @@
 template<int NaNPropagation>
 struct pminmax_impl {
   template <typename Packet, typename Op>
-  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet run(const Packet& a, const Packet& b, Op op) {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
     return op(a,b);
   }
 };
@@ -520,25 +520,25 @@
 
 /** \internal \returns the min of \a a and \a b  (coeff-wise).
     If \a a or \b b is NaN, the return value is implementation defined. */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
 
 /** \internal \returns the min of \a a and \a b  (coeff-wise).
     NaNPropagation determines the NaN propagation semantics. */
 template <int NaNPropagation, typename Packet>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet pmin(const Packet& a, const Packet& b) {
+EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
 }
 
 /** \internal \returns the max of \a a and \a b  (coeff-wise)
     If \a a or \b b is NaN, the return value is implementation defined. */
-template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
 
 /** \internal \returns the max of \a a and \a b  (coeff-wise).
     NaNPropagation determines the NaN propagation semantics. */
 template <int NaNPropagation, typename Packet>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet pmax(const Packet& a, const Packet& b) {
+EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
 }
 
@@ -1047,28 +1047,28 @@
 // FMA instructions.
 /** \internal \returns a * b + c (coeff-wise) */
 template <typename Packet>
-EIGEN_DEVICE_FUNC Packet pmadd(const Packet& a, const Packet& b,
+EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b,
                                       const Packet& c) {
   return padd(pmul(a, b), c);
 }
 
 /** \internal \returns a * b - c (coeff-wise) */
 template <typename Packet>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet pmsub(const Packet& a, const Packet& b,
+EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b,
                                       const Packet& c) {
   return psub(pmul(a, b), c);
 }
 
 /** \internal \returns -(a * b) + c (coeff-wise) */
 template <typename Packet>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet pnmadd(const Packet& a, const Packet& b,
+EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b,
                                        const Packet& c) {
   return padd(pnegate(pmul(a, b)), c);
 }
 
 /** \internal \returns -(a * b) - c (coeff-wise) */
 template <typename Packet>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Packet pnmsub(const Packet& a, const Packet& b,
+EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b,
                                        const Packet& c) {
   return psub(pnegate(pmul(a, b)), c);
 }
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index fb4e817..56d1ff8 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -103,8 +103,8 @@
 
     typedef typename Base::PointerType PointerType;
     typedef PointerType PointerArgType;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+    EIGEN_DEVICE_FUNC
+    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
     inline Index innerStride() const
@@ -127,8 +127,8 @@
       * \param dataPtr pointer to the array to map
       * \param stride optional Stride object, passing the strides.
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    explicit Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+    EIGEN_DEVICE_FUNC
+    explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
     {
     }
@@ -152,8 +152,8 @@
       * \param cols the number of columns of the matrix expression
       * \param stride optional Stride object, passing the strides.
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
+    EIGEN_DEVICE_FUNC
+    inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
     {
     }
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 5d7fee1..bf8c163 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -101,33 +101,33 @@
       *
       * \sa innerStride(), outerStride()
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const Scalar* data() const { return m_data; }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
 
     /** \copydoc PlainObjectBase::coeff(Index,Index) const */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    const Scalar& coeff(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeff(Index rowId, Index colId) const
     {
       return m_data[colId * colStride() + rowId * rowStride()];
     }
 
     /** \copydoc PlainObjectBase::coeff(Index) const */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    const Scalar& coeff(Index index) const
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeff(Index index) const
     {
       EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
       return m_data[index * innerStride()];
     }
 
     /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    const Scalar& coeffRef(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return this->m_data[colId * colStride() + rowId * rowStride()];
     }
 
     /** \copydoc PlainObjectBase::coeffRef(Index) const */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    const Scalar& coeffRef(Index index) const
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
     {
       EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
       return this->m_data[index * innerStride()];
@@ -150,8 +150,8 @@
     }
 
     /** \internal Constructor for fixed size matrices or vectors */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    explicit MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+    EIGEN_DEVICE_FUNC
+    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
     {
       EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
       checkSanity<Derived>();
@@ -171,8 +171,8 @@
     }
 
     /** \internal Constructor for dynamically sized matrices */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    MapBase(PointerType dataPtr, Index rows, Index cols)
+    EIGEN_DEVICE_FUNC
+    inline MapBase(PointerType dataPtr, Index rows, Index cols)
             : m_data(dataPtr), m_rows(rows), m_cols(cols)
     {
       eigen_assert( (dataPtr == 0)
@@ -203,7 +203,7 @@
     }
 
     template<typename T>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     void checkSanity(std::enable_if_t<internal::traits<T>::Alignment==0,void*> = 0) const
     {}
 
@@ -253,8 +253,8 @@
                     const Scalar
                   > ScalarWithConstIfNotLvalue;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-    const Scalar* data() const { return this->m_data; }
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return this->m_data; }
     EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
 
@@ -286,9 +286,9 @@
                 (this->m_data + index * innerStride(), val);
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit MapBase(PointerType dataPtr) : Base(dataPtr) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
 
     EIGEN_DEVICE_FUNC
     Derived& operator=(const MapBase& other)
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 1c5fab0..af8b029 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -71,8 +71,8 @@
 struct real_default_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static RealScalar run(const Scalar& x)
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
   {
     return x;
   }
@@ -82,8 +82,8 @@
 struct real_default_impl<Scalar,true>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static RealScalar run(const Scalar& x)
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
   {
     using std::real;
     return real(x);
@@ -293,7 +293,7 @@
 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct conj_default_impl
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
     return x;
@@ -303,7 +303,7 @@
 template<typename Scalar>
 struct conj_default_impl<Scalar,true>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
     using std::conj;
@@ -328,7 +328,7 @@
 struct abs2_impl_default
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     return x*x;
@@ -339,7 +339,7 @@
 struct abs2_impl_default<Scalar, true> // IsComplex
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     return x.real()*x.real() + x.imag()*x.imag();
@@ -350,8 +350,8 @@
 struct abs2_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static RealScalar run(const Scalar& x)
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
   {
     return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
   }
@@ -433,8 +433,8 @@
 struct norm1_default_impl<Scalar,true>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static RealScalar run(const Scalar& x)
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
   {
     EIGEN_USING_STD(abs);
     return abs(x.real()) + abs(x.imag());
@@ -480,8 +480,8 @@
 template<typename OldType, typename NewType, typename EnableIf = void>
 struct cast_impl
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static NewType run(const OldType& x)
+  EIGEN_DEVICE_FUNC
+  static inline NewType run(const OldType& x)
   {
     return static_cast<NewType>(x);
   }
@@ -495,8 +495,8 @@
     !NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex
   >>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  static NewType run(const OldType& x)
+  EIGEN_DEVICE_FUNC
+  static inline NewType run(const OldType& x)
   {
     typedef typename NumTraits<NewType>::Real NewReal;
     return static_cast<NewType>(static_cast<NewReal>(x));
@@ -506,8 +506,8 @@
 // here, for once, we're plainly returning NewType: we don't want cast to do weird things.
 
 template<typename OldType, typename NewType>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-NewType cast(const OldType& x)
+EIGEN_DEVICE_FUNC
+inline NewType cast(const OldType& x)
 {
   return cast_impl<OldType, NewType>::run(x);
 }
@@ -1134,7 +1134,7 @@
 
 #if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
 {
   EIGEN_USING_STD(min)
@@ -1142,7 +1142,7 @@
 }
 
 template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
 {
   EIGEN_USING_STD(max)
@@ -1150,7 +1150,7 @@
 }
 #else
 template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
 {
   return y < x ? y : x;
@@ -1180,7 +1180,7 @@
 }
 
 template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
 {
   return x < y ? y : x;
@@ -1281,8 +1281,8 @@
 
 
 template<typename Scalar>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
 }
@@ -1330,22 +1330,22 @@
 }
 
 template<typename Scalar>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
 }
 
 template<typename Scalar>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x)
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x);
 }
 
 template<typename Scalar>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
 }
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index f65c7a6..23acd8a 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -221,7 +221,7 @@
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
     {
       return Base::_set(other);
@@ -234,14 +234,14 @@
       * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
     }
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
     {
       return Base::operator=(func);
@@ -257,7 +257,7 @@
       *
       * \sa resize(Index,Index)
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix() : Base()
     {
       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
@@ -269,7 +269,7 @@
       : Base(internal::constructor_without_unaligned_array_assert())
     { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
       : Base(std::move(other)) {}
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -287,7 +287,7 @@
      * \sa Matrix(const std::initializer_list<std::initializer_list<Scalar>>&)
      */
     template <typename... ArgTypes>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)
       : Base(a0, a1, a2, a3, args...) {}
 
@@ -312,7 +312,7 @@
       *
       * \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE Matrix(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -400,7 +400,7 @@
 
 
     /** \brief Copy constructor */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
     { }
 
@@ -408,7 +408,7 @@
       * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
       : Base(other.derived())
     { }
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d5b78f3..85132cc 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -144,11 +144,11 @@
     // trouble with MSVC.
 
     template <typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator=(const DenseBase<OtherDerived>& other);
 
     template <typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
@@ -156,19 +156,19 @@
     Derived& operator=(const ReturnByValue<OtherDerived>& other);
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator+=(const MatrixBase<OtherDerived>& other);
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator-=(const MatrixBase<OtherDerived>& other);
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const Product<Derived,OtherDerived>
     operator*(const MatrixBase<OtherDerived> &other) const;
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const Product<Derived,OtherDerived,LazyProduct>
     lazyProduct(const MatrixBase<OtherDerived> &other) const;
 
@@ -192,11 +192,11 @@
     operator*(const SkewSymmetricBase<SkewDerived> &skew) const;
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
     dot(const MatrixBase<OtherDerived>& other) const;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR RealScalar squaredNorm() const;
+    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
     EIGEN_DEVICE_FUNC RealScalar norm() const;
     RealScalar stableNorm() const;
     RealScalar blueNorm() const;
@@ -210,11 +210,11 @@
     EIGEN_DEVICE_FUNC void adjointInPlace();
 
     typedef Diagonal<Derived> DiagonalReturnType;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     DiagonalReturnType diagonal();
 
     typedef Diagonal<const Derived> ConstDiagonalReturnType;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const ConstDiagonalReturnType diagonal() const;
 
     template<int Index>
@@ -234,10 +234,10 @@
     template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
 
     template<unsigned int Mode>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     typename TriangularViewReturnType<Mode>::Type triangularView();
     template<unsigned int Mode>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
 
     template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
@@ -252,14 +252,14 @@
 
     const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
                                          const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const IdentityReturnType Identity();
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const IdentityReturnType Identity(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType Unit(Index size, Index i);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType Unit(Index i);
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType UnitX();
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType UnitY();
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType UnitZ();
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static const BasisReturnType UnitW();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
 
     EIGEN_DEVICE_FUNC
     const DiagonalWrapper<const Derived> asDiagonal() const;
@@ -292,7 +292,7 @@
       *          fuzzy comparison such as isApprox()
       * \sa isApprox(), operator!= */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool operator==(const MatrixBase<OtherDerived>& other) const
+    EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
     { return cwiseEqual(other).all(); }
 
     /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@@ -300,7 +300,7 @@
       *          fuzzy comparison such as isApprox()
       * \sa isApprox(), operator== */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+    EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
     { return cwiseNotEqual(other).any(); }
 
     NoAlias<Derived,Eigen::MatrixBase > EIGEN_DEVICE_FUNC noalias();
@@ -312,7 +312,7 @@
     template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
     template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Scalar trace() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;
 
     template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
 
@@ -321,10 +321,10 @@
 
     /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
       * \sa ArrayBase::matrix() */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
     /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
       * \sa ArrayBase::matrix() */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
 
 /////////// LU module ///////////
 
@@ -395,7 +395,7 @@
     };
     #endif // EIGEN_PARSED_BY_DOXYGEN
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     inline typename cross_product_return_type<OtherDerived>::type
 #else
diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h
index 818c7eb..693fc35 100644
--- a/Eigen/src/Core/PartialReduxEvaluator.h
+++ b/Eigen/src/Core/PartialReduxEvaluator.h
@@ -169,7 +169,7 @@
     Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit evaluator(const XprType xpr)
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
     : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));
@@ -178,13 +178,13 @@
 
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   const Scalar coeff(Index i, Index j) const
   {
     return coeff(Direction==Vertical ? j : i);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   const Scalar coeff(Index index) const
   {
     return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 5e32564..222eaf5 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -30,7 +30,7 @@
 
 template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
   template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static EIGEN_ALWAYS_INLINE void run(Index, Index)
   {
   }
@@ -146,9 +146,9 @@
     EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic), INVALID_MATRIX_TEMPLATE_PARAMETERS)
     EIGEN_STATIC_ASSERT(((Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS)
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Base& base() { return *static_cast<Base*>(this); }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const Base& base() const { return *static_cast<const Base*>(this); }
 
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
@@ -173,7 +173,7 @@
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
     {
       return m_storage.data()[index];
@@ -183,7 +183,7 @@
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
     {
       if(Flags & RowMajorBit)
@@ -196,7 +196,7 @@
       * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
       *
       * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
     {
       return m_storage.data()[index];
@@ -204,7 +204,7 @@
 
     /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
       * It is provided for convenience. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
     {
       if(Flags & RowMajorBit)
@@ -256,11 +256,11 @@
     }
 
     /** \returns a const pointer to the data array of this matrix */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const Scalar *data() const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
     { return m_storage.data(); }
 
     /** \returns a pointer to the data array of this matrix */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar *data()
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
     { return m_storage.data(); }
 
     /** Resizes \c *this to a \a rows x \a cols matrix.
@@ -279,7 +279,7 @@
       *
       * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
       */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
     {
       eigen_assert(internal::check_implication(RowsAtCompileTime!=Dynamic, rows==RowsAtCompileTime)
@@ -362,7 +362,7 @@
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
     {
       const OtherDerived& other = _other.derived();
@@ -484,7 +484,7 @@
     // by making all its constructor protected. See bug 1074.
   protected:
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
     {
 //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
@@ -501,7 +501,7 @@
     }
 #endif
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
       : m_storage( std::move(other.m_storage) )
     {
@@ -515,7 +515,7 @@
     }
 
     /** Copy constructor */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
       : Base(), m_storage(other.m_storage) { }
     EIGEN_DEVICE_FUNC
@@ -586,7 +586,7 @@
 
     /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
       : m_storage()
     {
@@ -619,7 +619,7 @@
       * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
     {
       _resize_to_match(other);
@@ -639,9 +639,9 @@
       * \see class Map
       */
     ///@{
-    static EIGEN_CONSTEXPR ConstMapType Map(const Scalar* data)
+    static inline ConstMapType Map(const Scalar* data)
     { return ConstMapType(data); }
-    static EIGEN_CONSTEXPR MapType Map(Scalar* data)
+    static inline MapType Map(Scalar* data)
     { return MapType(data); }
     static inline ConstMapType Map(const Scalar* data, Index size)
     { return ConstMapType(data, size); }
@@ -741,7 +741,7 @@
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
     {
       #ifdef EIGEN_NO_AUTOMATIC_RESIZING
@@ -771,7 +771,7 @@
     // aliasing is dealt once in internal::call_assignment
     // so at this stage we have to assume aliasing... and resising has to be done later.
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
     {
       internal::call_assignment(this->derived(), other.derived());
@@ -797,7 +797,7 @@
     }
 
     template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, std::enable_if_t<Base::SizeAtCompileTime!=2,T0>* = 0)
     {
       const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
@@ -809,7 +809,7 @@
     }
 
     template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, std::enable_if_t<Base::SizeAtCompileTime==2,T0>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
@@ -818,7 +818,7 @@
     }
 
     template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
                                     std::enable_if_t<    (!internal::is_same<Index,Scalar>::value)
                                                       && (internal::is_same<T0,Index>::value)
@@ -847,7 +847,7 @@
 
     // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitly converted)
     template<typename T>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Scalar& val0, std::enable_if_t<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
@@ -883,14 +883,14 @@
 
     // Initialize an arbitrary matrix from an object convertible to the Derived type.
     template<typename T>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Derived& other){
       this->_set_noalias(other);
     }
 
     // Initialize an arbitrary matrix from a generic Eigen expression
     template<typename T, typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
       this->derived() = other;
     }
@@ -947,7 +947,7 @@
       * of same type it is enough to swap the data pointers.
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     void swap(DenseBase<OtherDerived> & other)
     {
       enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
@@ -958,7 +958,7 @@
       * \brief const version forwarded to DenseBase::swap
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     void swap(DenseBase<OtherDerived> const & other)
     { Base::swap(other.derived()); }
 
@@ -1083,7 +1083,7 @@
 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
 struct matrix_swap_impl
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(MatrixTypeA& a, MatrixTypeB& b)
   {
     a.base().swap(b);
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 9ce0880..85842d1 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -92,7 +92,7 @@
     typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
     typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
     {
       eigen_assert(lhs.cols() == rhs.rows()
@@ -105,9 +105,9 @@
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const LhsNestedCleaned& lhs() const { return m_lhs; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const RhsNestedCleaned& rhs() const { return m_rhs; }
 
   protected:
@@ -169,7 +169,7 @@
 
   public:
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
     {
       EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
       eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@@ -177,7 +177,7 @@
       return internal::evaluator<Derived>(derived()).coeff(row,col);
     }
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar coeff(Index i) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
     {
       EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
       eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index c496e7c..9da2406 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -34,7 +34,7 @@
   typedef Product<Lhs, Rhs, Options> XprType;
   typedef product_evaluator<XprType> Base;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR explicit evaluator(const XprType& xpr) : Base(xpr) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
 
 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@@ -139,7 +139,7 @@
   std::enable_if_t<(Options==DefaultProduct || Options==AliasFreeProduct)>>
 {
   typedef Product<Lhs,Rhs,Options> SrcXprType;
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
     Index dstRows = src.rows();
@@ -314,7 +314,7 @@
   };
 
   template<typename Dst>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
   }
@@ -397,7 +397,7 @@
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
 
   template<typename Dst>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     // Same as: dst.noalias() = lhs.lazyProduct(rhs);
     // but easier on the compiler side
@@ -497,7 +497,7 @@
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit product_evaluator(const XprType& xpr)
     : m_lhs(xpr.lhs()),
       m_rhs(xpr.rhs()),
@@ -603,7 +603,7 @@
                         && (int(InnerSize) % packet_traits<Scalar>::size == 0)
   };
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const CoeffReturnType coeff(Index row, Index col) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
   {
     return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
   }
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 8a3bf5e..796e6c4 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -106,7 +106,7 @@
 
   typedef typename Evaluator::Scalar Scalar;
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
   {
     return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
@@ -124,7 +124,7 @@
 
   typedef typename Evaluator::Scalar Scalar;
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
   {
     return eval.coeffByOuterInner(outer, inner);
@@ -196,7 +196,7 @@
   typedef typename Evaluator::Scalar Scalar;
 
   template<typename XprType>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
   Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
   {
     eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
@@ -217,7 +217,7 @@
   typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
   typedef typename Evaluator::Scalar Scalar;
   template<typename XprType>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
   Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
   {
     return Base::run(eval,func);
@@ -360,7 +360,7 @@
   typedef internal::evaluator<XprType_> Base;
 public:
   typedef XprType_ XprType;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
   
   typedef typename XprType::Scalar Scalar;
@@ -376,16 +376,16 @@
     SizeAtCompileTime = XprType::SizeAtCompileTime,
     InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
   };
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
   { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
-
+  
   template<int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   PacketType packetByOuterInner(Index outer, Index inner) const
   { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
-
+  
 };
 
 } // end namespace internal
@@ -406,7 +406,7 @@
   */
 template<typename Derived>
 template<typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
@@ -428,13 +428,13 @@
   */
 template<typename Derived>
 template<int NaNPropagation>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
   return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
 }
 
-/** \returns the maximum of all coefficients of \c *this.
+/** \returns the maximum of all coefficients of \c *this. 
   * In case \c *this contains NaN, NaNPropagation determines the behavior:
   *   NaNPropagation == PropagateFast : undefined
   *   NaNPropagation == PropagateNaN : result is NaN
@@ -443,7 +443,7 @@
   */
 template<typename Derived>
 template<int NaNPropagation>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
   return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
@@ -456,7 +456,7 @@
   * \sa trace(), prod(), mean()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::sum() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -469,7 +469,7 @@
 * \sa trace(), prod(), sum()
 */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::mean() const
 {
 #ifdef __INTEL_COMPILER
@@ -490,7 +490,7 @@
   * \sa sum(), mean(), trace()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::prod() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -505,7 +505,7 @@
   * \sa diagonal(), sum()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 MatrixBase<Derived>::trace() const
 {
   return derived().diagonal().sum();
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 3948b8a..81de5f9 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -82,7 +82,7 @@
          : this->rows();
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR RefBase()
+  EIGEN_DEVICE_FUNC RefBase()
     : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
       // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
       m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
@@ -108,7 +108,7 @@
   // Returns true if construction is valid, false if there is a stride mismatch,
   // and fails if there is a size mismatch.
   template<typename Expression>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool construct(Expression& expr)
+  EIGEN_DEVICE_FUNC bool construct(Expression& expr)
   {
     // Check matrix sizes.  If this is a compile-time vector, we do allow
     // implicitly transposing.
@@ -338,7 +338,7 @@
     EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
 
     template<typename Derived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Ref(const DenseBase<Derived>& expr,
+    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
                                  std::enable_if_t<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>* = 0)
     {
 //      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
@@ -359,7 +359,7 @@
   protected:
 
     template<typename Expression>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void construct(const Expression& expr,internal::true_type)
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
     {
       // Check if we can use the underlying expr's storage directly, otherwise call the copy version.
       if (!Base::construct(expr)) {
@@ -368,7 +368,7 @@
     }
 
     template<typename Expression>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void construct(const Expression& expr, internal::false_type)
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
     {
       internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
       Base::construct(m_object);
diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h
index 91f7c2f..f77eac9 100644
--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -67,15 +67,15 @@
   typedef typename internal::traits<Solve>::PlainObject PlainObject;
   typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
 
-  EIGEN_CONSTEXPR Solve(const Decomposition &dec, const RhsType &rhs)
+  Solve(const Decomposition &dec, const RhsType &rhs)
     : m_dec(dec), m_rhs(rhs)
   {}
 
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dec.cols(); }
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); }
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const Decomposition& dec() const { return m_dec; }
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const RhsType&       rhs() const { return m_rhs; }
+  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
+  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }
 
 protected:
   const Decomposition &m_dec;
@@ -139,7 +139,7 @@
 struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
 {
   typedef Solve<DecType,RhsType> SrcXprType;
-  EIGEN_CONSTEXPR static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
     Index dstRows = src.rows();
     Index dstCols = src.cols();
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index a92b14b..71d6f85 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -87,7 +87,7 @@
   typedef blas_traits<Lhs> LhsProductTraits;
   typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
 
-  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void run(const Lhs& lhs, Rhs& rhs)
+  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
   {
     add_const_on_value_type_t<ActualLhsType> actualLhs = LhsProductTraits::extract(lhs);
 
@@ -120,7 +120,7 @@
     DiagIndex  = IsLower ? LoopIndex : Size - LoopIndex - 1,
     StartIndex = IsLower ? 0         : DiagIndex+1
   };
-  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void run(const Lhs& lhs, Rhs& rhs)
+  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
   {
     if (LoopIndex>0)
       rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
@@ -135,12 +135,12 @@
 
 template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
 struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
-  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void run(const Lhs&, Rhs&) {}
+  static EIGEN_DEVICE_FUNC void run(const Lhs&, Rhs&) {}
 };
 
 template<typename Lhs, typename Rhs, int Mode>
 struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
-  static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void run(const Lhs& lhs, Rhs& rhs)
+  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
   { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
 };
 
@@ -166,7 +166,7 @@
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
   OtherDerived& other = _other.const_cast_derived();
   eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
index 3e1d85d..2832e80 100644
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -61,7 +61,7 @@
     };
 
     /** Default constructor, for use when strides are fixed at compile time */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Stride()
       : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
     {
@@ -71,14 +71,14 @@
     }
 
     /** Constructor allowing to pass the strides at runtime */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Stride(Index outerStride, Index innerStride)
       : m_outer(outerStride), m_inner(innerStride)
     {
     }
 
     /** Copy constructor */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     Stride(const Stride& other)
       : m_outer(other.outer()), m_inner(other.inner())
     {}
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index f5b5400..b2e7511 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -32,7 +32,7 @@
   typedef typename Base::DstXprType DstXprType;
   typedef swap_assign_op<Scalar> Functor;
   
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
     : Base(dst, src, func, dstExpr)
   {}
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index 45198e4..74650ef 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -62,7 +62,7 @@
     EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
     typedef internal::remove_all_t<MatrixType> NestedExpression;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {}
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -73,12 +73,12 @@
     Index cols() const EIGEN_NOEXCEPT { return m_matrix.rows(); }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const internal::remove_all_t<MatrixTypeNested>&
     nestedExpression() const { return m_matrix; }
 
     /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     std::remove_reference_t<MatrixTypeNested>&
     nestedExpression() { return m_matrix; }
 
@@ -127,9 +127,9 @@
     EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Index innerStride() const { return derived().nestedExpression().innerStride(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Index outerStride() const { return derived().nestedExpression().outerStride(); }
 
     typedef std::conditional_t<
@@ -138,9 +138,9 @@
               const Scalar
             > ScalarWithConstIfNotLvalue;
 
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     const Scalar* data() const { return derived().nestedExpression().data(); }
 
     // FIXME: shall we keep the const version of coeffRef?
@@ -179,7 +179,7 @@
   *
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename DenseBase<Derived>::TransposeReturnType
 DenseBase<Derived>::transpose()
 {
@@ -192,7 +192,7 @@
   *
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 const typename DenseBase<Derived>::ConstTransposeReturnType
 DenseBase<Derived>::transpose() const
 {
@@ -402,7 +402,7 @@
 template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
 struct check_transpose_aliasing_run_time_selector
 {
-  static EIGEN_CONSTEXPR bool run(const Scalar* dest, const OtherDerived& src)
+  static bool run(const Scalar* dest, const OtherDerived& src)
   {
     return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
   }
@@ -411,7 +411,7 @@
 template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
 struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
 {
-  static EIGEN_CONSTEXPR bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
+  static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
   {
     return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
         || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
@@ -431,7 +431,7 @@
         >
 struct checkTransposeAliasing_impl
 {
-    static void EIGEN_CONSTEXPR run(const Derived& dst, const OtherDerived& other)
+    static void run(const Derived& dst, const OtherDerived& other)
     {
         eigen_assert((!check_transpose_aliasing_run_time_selector
                       <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
@@ -445,12 +445,12 @@
 template<typename Derived, typename OtherDerived>
 struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
 {
-    static EIGEN_CONSTEXPR void run(const Derived&, const OtherDerived&)
+    static void run(const Derived&, const OtherDerived&)
     {
     }
 };
 
-template<typename Dst, typename Src> EIGEN_CONSTEXPR
+template<typename Dst, typename Src>
 void check_for_aliasing(const Dst &dst, const Src &src)
 {
   if((!Dst::IsVectorAtCompileTime) && dst.rows()>1 && dst.cols()>1)
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 4abc6f3..c1bd13a 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -53,7 +53,7 @@
     typedef DenseMatrixType DenseType;
     typedef Derived const& Nested;
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline TriangularBase() { eigen_assert(!((int(Mode) & int(UnitDiag)) && (int(Mode) & int(ZeroDiag)))); }
 
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
@@ -66,7 +66,7 @@
     inline Index innerStride() const EIGEN_NOEXCEPT { return derived().innerStride(); }
 
     // dummy resize function
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     void resize(Index rows, Index cols)
     {
       EIGEN_UNUSED_VARIABLE(rows);
@@ -102,9 +102,9 @@
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline Derived& derived() { return *static_cast<Derived*>(this); }
     #endif // not EIGEN_PARSED_BY_DOXYGEN
 
@@ -216,7 +216,7 @@
       IsVectorAtCompileTime = false
     };
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
     {}
 
@@ -230,11 +230,11 @@
     inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); }
 
     /** \returns a const reference to the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const NestedExpression& nestedExpression() const { return m_matrix; }
 
     /** \returns a reference to the nested expression */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     NestedExpression& nestedExpression() { return m_matrix; }
 
     typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
@@ -280,7 +280,7 @@
     }
 
     template<typename Other>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline const Solve<TriangularView, Other>
     solve(const MatrixBase<Other>& other) const
     { return Solve<TriangularView, Other>(*this, other.derived()); }
@@ -515,11 +515,11 @@
       * See TriangularView:solve() for the details.
       */
     template<int Side, typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const;
 
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const
     { return solveInPlace<OnTheLeft>(other); }
 
@@ -547,7 +547,7 @@
     }
 
     template<typename RhsType, typename DstType>
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
       if(!internal::is_same_dense(dst,rhs))
         dst = rhs;
@@ -641,7 +641,7 @@
   */
 template<typename Derived>
 template<unsigned int Mode>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
 MatrixBase<Derived>::triangularView()
 {
@@ -651,7 +651,7 @@
 /** This is the const version of MatrixBase::triangularView() */
 template<typename Derived>
 template<unsigned int Mode>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
 MatrixBase<Derived>::triangularView() const
 {
@@ -734,7 +734,7 @@
 {
   typedef TriangularView<MatrixType,Mode> XprType;
   typedef evaluator<internal::remove_all_t<MatrixType>> Base;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
 };
 
@@ -770,8 +770,7 @@
   typedef typename Base::AssignmentTraits AssignmentTraits;
 
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
-  triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
     : Base(dst, src, func, dstExpr)
   {}
 
@@ -785,14 +784,14 @@
   using Base::assignCoeff;
 #endif
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void assignDiagonalCoeff(Index id)
+  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
   {
          if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
     else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
     else if(Mode==0)                       Base::assignCoeff(id,id);
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR void assignOppositeCoeff(Index row, Index col)
+  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
   {
     eigen_internal_assert(row!=col);
     if(SetOpposite)
@@ -801,7 +800,7 @@
 };
 
 template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
   typedef evaluator<DstXprType> DstEvaluatorType;
@@ -854,7 +853,7 @@
 template< typename DstXprType, typename SrcXprType, typename Functor>
 struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
   {
     call_triangular_assignment_loop<SrcXprType::Mode, (int(SrcXprType::Mode) & int(SelfAdjoint)) == 0>(dst, src, func);
   }
@@ -863,7 +862,7 @@
 template< typename DstXprType, typename SrcXprType, typename Functor>
 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
   {
     call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
   }
@@ -884,7 +883,7 @@
 
   typedef typename Kernel::Scalar Scalar;
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline void run(Kernel &kernel)
   {
     triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
@@ -902,7 +901,7 @@
 template<typename Kernel, unsigned int Mode, bool SetOpposite>
 struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
 {
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   static inline void run(Kernel &) {}
 };
 
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index 77a5c33..ee28da1 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -74,7 +74,7 @@
 
     /** Dynamic-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     VectorBlock(VectorType& vector, Index start, Index size)
       : Base(vector,
              IsColVector ? start : 0, IsColVector ? 0 : start,
@@ -83,7 +83,7 @@
 
     /** Fixed-size constructor
       */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     VectorBlock(VectorType& vector, Index start)
       : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
     { }
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 9692620..b004f76 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -63,7 +63,7 @@
     typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
       : m_matrix(mat), m_functor(func) {}
 
@@ -72,10 +72,10 @@
     EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
     Index cols() const EIGEN_NOEXCEPT { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     typename MatrixType::Nested nestedExpression() const { return m_matrix; }
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const MemberOp& functor() const { return m_functor; }
 
   protected:
@@ -93,7 +93,7 @@
     template<int Size> struct Cost { enum { value = COST }; };             \
     enum { Vectorizable = VECTORIZABLE };                                                   \
     template<typename XprType>                                                              \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR                                   \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                   \
     ResultType operator()(const XprType& mat) const                                         \
     { return mat.MEMBER(); }                                                                \
     BinaryOp binaryFunc() const { return BinaryOp(); }                                      \
@@ -265,11 +265,11 @@
     }
 
   public:
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
 
     /** \internal */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     inline const ExpressionType& _expression() const { return m_matrix; }
 
     #ifdef EIGEN_PARSED_BY_DOXYGEN
@@ -476,7 +476,7 @@
       * Output: \verbinclude PartialRedux_sum.out
       *
       * \sa DenseBase::sum() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const SumReturnType sum() const
     { return SumReturnType(_expression()); }
 
@@ -484,7 +484,7 @@
     * of each column (or row) of the referenced expression.
     *
     * \sa DenseBase::mean() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const MeanReturnType mean() const
     { return sum() / Scalar(Direction==Vertical?m_matrix.rows():m_matrix.cols()); }
 
@@ -493,7 +493,7 @@
       * This expression can be assigned to a vector with entries of type \c bool.
       *
       * \sa DenseBase::all() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const AllReturnType all() const
     { return AllReturnType(_expression()); }
 
@@ -502,7 +502,7 @@
       * This expression can be assigned to a vector with entries of type \c bool.
       *
       * \sa DenseBase::any() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const AnyReturnType any() const
     { return AnyReturnType(_expression()); }
 
@@ -515,7 +515,7 @@
       * Output: \verbinclude PartialRedux_count.out
       *
       * \sa DenseBase::count() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const CountReturnType count() const
     { return CountReturnType(_expression()); }
 
@@ -526,7 +526,7 @@
       * Output: \verbinclude PartialRedux_prod.out
       *
       * \sa DenseBase::prod() */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+    EIGEN_DEVICE_FUNC
     const ProdReturnType prod() const
     { return ProdReturnType(_expression()); }
 
@@ -760,7 +760,7 @@
   * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR typename DenseBase<Derived>::ColwiseReturnType
+EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType
 DenseBase<Derived>::colwise()
 {
   return ColwiseReturnType(derived());
@@ -774,7 +774,7 @@
   * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR typename DenseBase<Derived>::RowwiseReturnType
+EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType
 DenseBase<Derived>::rowwise()
 {
   return RowwiseReturnType(derived());
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 8f346f3..227e88a 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -212,6 +212,7 @@
     Vectorizable = 1,
     AlignedOnScalar = 1,
     HasCmp = 1,
+    HasDiv = 1,
     size=8
   };
 };
@@ -545,13 +546,19 @@
 
 template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
-{ eigen_assert(false && "packet integer division are not supported by AVX");
-  return pset1<Packet8i>(0);
+
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& a, const Packet8i& b)
+{
+#ifdef EIGEN_VECTORIZE_AVX512
+  return _mm512_cvttpd_epi32(_mm512_div_pd(_mm512_cvtepi32_pd(a), _mm512_cvtepi32_pd(b)));
+#else  
+  Packet4i lo = pdiv<Packet4i>(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
+  Packet4i hi = pdiv<Packet4i>(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
+#endif
 }
 
 #ifdef EIGEN_VECTORIZE_FMA
-
 template <>
 EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
   return _mm256_fmadd_ps(a, b, c);
@@ -1226,7 +1233,12 @@
 
 template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
 {
-  return _mm256_movemask_ps(x)!=0;
+  return _mm256_movemask_ps(x) != 0;
+}
+
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x)
+{
+  return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
 }
 
 EIGEN_DEVICE_FUNC inline void
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 7b07149..5e9670c 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -178,6 +178,7 @@
     Vectorizable = 1,
     AlignedOnScalar = 1,
     HasCmp = 1,
+    HasDiv = 1,
     size=16
   };
 };
@@ -389,12 +390,21 @@
                                               const Packet16f& b) {
   return _mm512_div_ps(a, b);
 }
+
 template <>
 EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
                                             const Packet8d& b) {
   return _mm512_div_pd(a, b);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet16i pdiv<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  Packet8i q_lo = pdiv<Packet8i>(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b,0));
+  Packet8i q_hi = pdiv<Packet8i>(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1));
+  return _mm512_inserti64x4(_mm512_castsi256_si512(q_lo), q_hi, 1);
+}
+
 #ifdef EIGEN_VECTORIZE_FMA
 template <>
 EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
@@ -1378,7 +1388,11 @@
   return !_mm512_kortestz(tmp,tmp);
 }
 
-
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16i& x)
+{
+  __mmask16 tmp = _mm512_test_epi32_mask(x,x);
+  return !_mm512_kortestz(tmp,tmp);
+}
 
 #define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
   EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
diff --git a/Eigen/src/Core/arch/AVX512/TrsmKernel.h b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
index 94f1f5a..edd6ef3 100644
--- a/Eigen/src/Core/arch/AVX512/TrsmKernel.h
+++ b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
@@ -28,12 +28,6 @@
 #define EIGEN_USE_AVX512_TRSM_L_KERNELS 0
 #endif
 
-#if defined(EIGEN_HAS_CXX17_IFCONSTEXPR)
-#define EIGEN_IF_CONSTEXPR(X) if constexpr (X)
-#else
-#define EIGEN_IF_CONSTEXPR(X) if (X)
-#endif
-
 // Need this for some std::min calls.
 #ifdef min
 #undef min
diff --git a/Eigen/src/Core/arch/Default/ConjHelper.h b/Eigen/src/Core/arch/Default/ConjHelper.h
index 81c2819..6b5afe3 100644
--- a/Eigen/src/Core/arch/Default/ConjHelper.h
+++ b/Eigen/src/Core/arch/Default/ConjHelper.h
@@ -47,16 +47,16 @@
 
 template<> struct conj_if<true> {
   template<typename T>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR T operator()(const T& x) const { return numext::conj(x); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { return numext::conj(x); }
   template<typename T>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR T pconj(const T& x) const { return internal::pconj(x); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T pconj(const T& x) const { return internal::pconj(x); }
 };
 
 template<> struct conj_if<false> {
   template<typename T>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const T& operator()(const T& x) const { return x; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator()(const T& x) const { return x; }
   template<typename T>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const T& pconj(const T& x) const { return x; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& pconj(const T& x) const { return x; }
 };
 
 // Generic Implementation, assume scalars since the packet-version is
@@ -93,11 +93,11 @@
 struct conj_helper<Packet, Packet, ConjLhs, ConjRhs>
 {
   typedef Packet ResultType;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const
   { return Eigen::internal::pmadd(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c); }
 
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Packet pmul(const Packet& x, const Packet& y) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const
   { return Eigen::internal::pmul(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y)); }
 };
 
diff --git a/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h b/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
index 6cd6edd..e49e394 100644
--- a/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
@@ -43,15 +43,18 @@
 
 #if EIGEN_ARCH_ARM64
 
+#ifndef EIGEN_NEON_GEBP_NR
+#define EIGEN_NEON_GEBP_NR 8
+#endif
+
 template<>
 struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
  : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
 {
   typedef float RhsPacket;
   typedef float32x4_t RhsPacketx4;
-
-  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
-  {
+  enum { nr = EIGEN_NEON_GEBP_NR };
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const {
     dest = *b;
   }
 
@@ -77,7 +80,6 @@
   {
     c = vfmaq_n_f32(c, a, b);
   }
-
   // NOTE: Template parameter inference failed when compiled with Android NDK:
   // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
 
@@ -94,9 +96,10 @@
   template<int LaneID>
   EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
   {
-    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
-    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
-    // vfmaq_laneq_f32 is implemented through a costly dup
+    #if EIGEN_COMP_GNUC_STRICT
+    // 1. workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
+    //    vfmaq_laneq_f32 is implemented through a costly dup, which was fixed in gcc9
+    // 2. workaround the gcc register split problem on arm64-neon
          if(LaneID==0)  asm("fmla %0.4s, %1.4s, %2.s[0]\n" : "+w" (c) : "w" (a), "w" (b) :  );
     else if(LaneID==1)  asm("fmla %0.4s, %1.4s, %2.s[1]\n" : "+w" (c) : "w" (a), "w" (b) :  );
     else if(LaneID==2)  asm("fmla %0.4s, %1.4s, %2.s[2]\n" : "+w" (c) : "w" (a), "w" (b) :  );
@@ -113,7 +116,7 @@
  : gebp_traits<double,double,false,false,Architecture::Generic>
 {
   typedef double RhsPacket;
-
+  enum { nr = EIGEN_NEON_GEBP_NR };
   struct RhsPacketx4 {
     float64x2_t B_0, B_1;
   };
@@ -163,9 +166,10 @@
   template <int LaneID>
   EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
   {
-    #if EIGEN_COMP_GNUC_STRICT && !(EIGEN_GNUC_AT_LEAST(9,0))
-    // workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
-    // vfmaq_laneq_f64 is implemented through a costly dup
+    #if EIGEN_COMP_GNUC_STRICT
+    // 1. workaround gcc issue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89101
+    //    vfmaq_laneq_f64 is implemented through a costly dup, which was fixed in gcc9
+    // 2. workaround the gcc register split problem on arm64-neon
          if(LaneID==0)  asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_0) :  );
     else if(LaneID==1)  asm("fmla %0.2d, %1.2d, %2.d[1]\n" : "+w" (c) : "w" (a), "w" (b.B_0) :  );
     else if(LaneID==2)  asm("fmla %0.2d, %1.2d, %2.d[0]\n" : "+w" (c) : "w" (a), "w" (b.B_1) :  );
@@ -179,6 +183,76 @@
   }
 };
 
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+template<>
+struct gebp_traits <half,half,false,false,Architecture::NEON>
+ : gebp_traits<half,half,false,false,Architecture::Generic>
+{
+  typedef half RhsPacket;
+  typedef float16x4_t RhsPacketx4;
+  typedef float16x4_t PacketHalf;
+  enum { nr = EIGEN_NEON_GEBP_NR };
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = *b;
+  }
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const
+  {
+    dest = vld1_f16((const __fp16 *)b);
+  }
+
+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = *b;
+  }
+
+  EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
+  {}
+
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+  {
+    loadRhs(b,dest);
+  }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  {
+    c = vfmaq_n_f16(c, a, b);
+  }
+  EIGEN_STRONG_INLINE void madd(const PacketHalf& a, const RhsPacket& b, PacketHalf& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  {
+    c = vfma_n_f16(c, a, b);
+  }
+
+  // NOTE: Template parameter inference failed when compiled with Android NDK:
+  // "candidate template ignored: could not match 'FixedInt<N>' against 'Eigen::internal::FixedInt<0>".
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
+  { madd_helper<0>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<1>&) const
+  { madd_helper<1>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<2>&) const
+  { madd_helper<2>(a, b, c); }
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<3>&) const
+  { madd_helper<3>(a, b, c); }
+ private:
+  template<int LaneID>
+  EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
+  {
+    #if EIGEN_COMP_GNUC_STRICT
+    // 1. vfmaq_lane_f16 is implemented through a costly dup
+    // 2. workaround the gcc register split problem on arm64-neon
+         if(LaneID==0)  asm("fmla %0.8h, %1.8h, %2.h[0]\n" : "+w" (c) : "w" (a), "w" (b) :  );
+    else if(LaneID==1)  asm("fmla %0.8h, %1.8h, %2.h[1]\n" : "+w" (c) : "w" (a), "w" (b) :  );
+    else if(LaneID==2)  asm("fmla %0.8h, %1.8h, %2.h[2]\n" : "+w" (c) : "w" (a), "w" (b) :  );
+    else if(LaneID==3)  asm("fmla %0.8h, %1.8h, %2.h[3]\n" : "+w" (c) : "w" (a), "w" (b) :  );
+    #else
+    c = vfmaq_lane_f16(c, a, b, LaneID);
+    #endif
+  }
+};
+#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
 #endif // EIGEN_ARCH_ARM64
 
 }  // namespace internal
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 0fa4394..f942668 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -195,6 +195,7 @@
     Vectorizable = 1,
     AlignedOnScalar = 1,
     HasCmp = 1,
+    HasDiv=1,
     size=4,
 
     HasShift = 1,
@@ -369,6 +370,22 @@
 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
 
+template <>
+EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a,
+                                            const Packet4i& b) {
+#ifdef EIGEN_VECTORIZE_AVX
+  return _mm256_cvttpd_epi32(
+      _mm256_div_pd(_mm256_cvtepi32_pd(a), _mm256_cvtepi32_pd(b)));
+#else
+  __m128i q_lo = _mm_cvttpd_epi32(_mm_div_pd(_mm_cvtepi32_pd(a), _mm_cvtepi32_pd(b)));
+  __m128i q_hi =
+      _mm_cvttpd_epi32(_mm_div_pd(_mm_cvtepi32_pd(vec4i_swizzle1(a, 2, 3, 0, 1)),
+                                 _mm_cvtepi32_pd(vec4i_swizzle1(b, 2, 3, 0, 1))));
+  return vec4i_swizzle1(_mm_unpacklo_epi32(q_lo, q_hi), 0, 2, 1, 3);
+#endif
+}
+
+
 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
 #ifdef EIGEN_VECTORIZE_FMA
@@ -1142,6 +1159,11 @@
   return _mm_movemask_ps(x) != 0x0;
 }
 
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x)
+{
+  return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
+}
+
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet4f,4>& kernel) {
   _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 26fa251..c9d80e6 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -21,7 +21,8 @@
   *
   */
 template<typename DstScalar,typename SrcScalar> struct assign_op {
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
 
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
@@ -44,7 +45,8 @@
   *
   */
 template<typename DstScalar,typename SrcScalar> struct add_assign_op {
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
 
   template<int Alignment, typename Packet>
   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
@@ -135,7 +137,8 @@
   *
   */
 template<typename Scalar> struct swap_assign_op {
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
   {
 #ifdef EIGEN_GPUCC
     // FIXME is there some kind of cuda::swap?
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 1a77a67..9b560e9 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -39,7 +39,7 @@
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
 #endif
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::padd(a,b); }
@@ -58,7 +58,7 @@
 
 
 template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR bool scalar_sum_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a || b; }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_sum_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a || b; }
 
 
 /** \internal
@@ -71,11 +71,11 @@
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
 #ifdef EIGEN_SCALAR_BINARY_OP_PLUGIN
-  EIGEN_CONSTEXPR scalar_product_op() {
+  scalar_product_op() {
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
 #endif
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pmul(a,b); }
@@ -93,7 +93,7 @@
 };
 
 template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR bool scalar_product_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a && b; }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_product_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a && b; }
 
 
 /** \internal
@@ -111,7 +111,7 @@
 
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
 
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const
   { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
 
   template<typename Packet>
@@ -135,7 +135,7 @@
 struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
     return internal::pmin<NaNPropagation>(a, b);
   }
   template<typename Packet>
@@ -167,7 +167,7 @@
 struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
     return internal::pmax<NaNPropagation>(a,b);
   }
   template<typename Packet>
@@ -218,7 +218,7 @@
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
 {
   typedef bool result_type;
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pcmp_eq(a,b); }
@@ -363,7 +363,7 @@
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
 #endif
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::psub(a,b); }
@@ -376,6 +376,24 @@
   };
 };
 
+template <typename Packet, bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+struct maybe_raise_div_by_zero {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Packet x) {
+    EIGEN_UNUSED_VARIABLE(x);
+  }
+};
+
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template <typename Packet>
+struct maybe_raise_div_by_zero<Packet, true> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Packet x) {
+    if (EIGEN_PREDICT_FALSE(predux_any(pcmp_eq(x, pzero(x))))) {
+      std::raise(SIGFPE);
+    }
+  }
+};
+#endif
+
 /** \internal
   * \brief Template functor to compute the quotient of two scalars
   *
@@ -390,10 +408,12 @@
     EIGEN_SCALAR_BINARY_OP_PLUGIN
   }
 #endif
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
   template<typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pdiv(a,b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const {
+    maybe_raise_div_by_zero<Packet>::run(b);
+    return internal::pdiv(a,b);
+  }
 };
 template<typename LhsScalar,typename RhsScalar>
 struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index ec86aeb..e099d4a 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -18,9 +18,9 @@
 
 template<typename Scalar>
 struct scalar_constant_op {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR scalar_constant_op(const Scalar& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const Scalar operator() () const { return m_other; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
   template<typename PacketType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
   const Scalar m_other;
@@ -32,7 +32,7 @@
 
 template<typename Scalar> struct scalar_identity_op {
   template<typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_identity_op<Scalar> >
diff --git a/Eigen/src/Core/functors/StlFunctors.h b/Eigen/src/Core/functors/StlFunctors.h
index 7da5016..5971075 100644
--- a/Eigen/src/Core/functors/StlFunctors.h
+++ b/Eigen/src/Core/functors/StlFunctors.h
@@ -20,7 +20,7 @@
 template<typename T = void>
 struct equal_to {
   typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool operator()(const T& lhs, const T& rhs) const {
+  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {
     return lhs == rhs;
   }
 };
@@ -28,7 +28,7 @@
 template<typename T = void>
 struct not_equal_to {
   typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool operator()(const T& lhs, const T& rhs) const {
+  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {
     return lhs != rhs;
   }
 };
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 39b7be7..8a84415 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -22,7 +22,7 @@
   * \sa class CwiseUnaryOp, MatrixBase::operator-
   */
 template<typename Scalar> struct scalar_opposite_op {
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
   { return internal::pnegate(a); }
@@ -88,7 +88,7 @@
   */
 template<typename Scalar> struct scalar_abs2_op {
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
@@ -154,7 +154,7 @@
 template<typename Scalar, typename NewType>
 struct scalar_cast_op {
   typedef NewType result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
 };
 template<typename Scalar, typename NewType>
 struct functor_traits<scalar_cast_op<Scalar,NewType> >
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index b1a1277..179d41c 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -354,7 +354,7 @@
 template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
 struct RhsPanelHelper {
  private:
-  static const int remaining_registers = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS - registers_taken;
+  static constexpr int remaining_registers = (std::max)(int(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS) - registers_taken, 0);
  public:
   typedef std::conditional_t<remaining_registers>=4, RhsPacketx4, RhsPacket> type;
 };
@@ -1070,6 +1070,7 @@
   typedef typename Traits::RhsPacketx4 RhsPacketx4;
 
   typedef typename RhsPanelHelper<RhsPacket, RhsPacketx4, 15>::type RhsPanel15;
+  typedef typename RhsPanelHelper<RhsPacket, RhsPacketx4, 27>::type RhsPanel27;
 
   typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs,Architecture::Target> SwappedTraits;
 
@@ -1215,13 +1216,140 @@
     int prefetch_res_offset, Index peeled_kc, Index pk, Index cols, Index depth, Index packet_cols4)
   {
     GEBPTraits traits;
-
+    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
     // loops on each largest micro horizontal panel of lhs
     // (LhsProgress x depth)
     for(Index i=peelStart; i<peelEnd; i+=LhsProgress)
     {
+#if EIGEN_ARCH_ARM64
+      EIGEN_IF_CONSTEXPR(nr>=8) {
+      for(Index j2=0; j2<packet_cols8; j2+=8)
+      {
+        const LhsScalar* blA = &blockA[i*strideA+offsetA*(LhsProgress)];
+        prefetch(&blA[0]);
+
+        // gets res block as register
+        AccPacket C0, C1, C2, C3, C4, C5, C6, C7;
+        traits.initAcc(C0);
+        traits.initAcc(C1);
+        traits.initAcc(C2);
+        traits.initAcc(C3);
+        traits.initAcc(C4);
+        traits.initAcc(C5);
+        traits.initAcc(C6);
+        traits.initAcc(C7);
+
+        LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+        LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+        LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+        LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+        LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+        LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+        LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+        LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+        r0.prefetch(prefetch_res_offset);
+        r1.prefetch(prefetch_res_offset);
+        r2.prefetch(prefetch_res_offset);
+        r3.prefetch(prefetch_res_offset);
+        r4.prefetch(prefetch_res_offset);
+        r5.prefetch(prefetch_res_offset);
+        r6.prefetch(prefetch_res_offset);
+        r7.prefetch(prefetch_res_offset);
+        const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+        prefetch(&blB[0]);
+
+        LhsPacket A0;
+        for(Index k=0; k<peeled_kc; k+=pk)
+        {
+            RhsPacketx4 rhs_panel;
+            RhsPacket T0;
+#define EIGEN_GEBGP_ONESTEP(K)                                                \
+            do {                                                              \
+                EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX8");    \
+                traits.loadLhs(&blA[(0 + 1 * K) * LhsProgress], A0);          \
+                traits.loadRhs(&blB[(0 + 8 * K) * RhsProgress], rhs_panel);   \
+                traits.madd(A0, rhs_panel, C0, T0, fix<0>);                   \
+                traits.updateRhs(&blB[(1 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C1, T0, fix<1>);                   \
+                traits.updateRhs(&blB[(2 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C2, T0, fix<2>);                   \
+                traits.updateRhs(&blB[(3 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C3, T0, fix<3>);                   \
+                traits.loadRhs(&blB[(4 + 8 * K) * RhsProgress], rhs_panel);   \
+                traits.madd(A0, rhs_panel, C4, T0, fix<0>);                   \
+                traits.updateRhs(&blB[(5 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C5, T0, fix<1>);                   \
+                traits.updateRhs(&blB[(6 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C6, T0, fix<2>);                   \
+                traits.updateRhs(&blB[(7 + 8 * K) * RhsProgress], rhs_panel); \
+                traits.madd(A0, rhs_panel, C7, T0, fix<3>);                   \
+                EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX8");      \
+            } while (false)
+
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX8");
+
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*8*RhsProgress;
+            blA += pk*(1*LhsProgress);
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 1pX8");
+          }
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacketx4 rhs_panel;
+            RhsPacket T0;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += 8*RhsProgress;
+            blA += 1*LhsProgress;
+          }
+
+#undef EIGEN_GEBGP_ONESTEP
+
+          ResPacket R0, R1;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.template loadPacket<ResPacket>(0);
+          R1 = r1.template loadPacket<ResPacket>(0);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C1, alphav, R1);
+          r0.storePacket(0, R0);
+          r1.storePacket(0, R1);
+
+          R0 = r2.template loadPacket<ResPacket>(0);
+          R1 = r3.template loadPacket<ResPacket>(0);
+          traits.acc(C2,  alphav, R0);
+          traits.acc(C3,  alphav, R1);
+          r2.storePacket(0, R0);
+          r3.storePacket(0, R1);
+
+          R0 = r4.template loadPacket<ResPacket>(0);
+          R1 = r5.template loadPacket<ResPacket>(0);
+          traits.acc(C4,  alphav, R0);
+          traits.acc(C5,  alphav, R1);
+          r4.storePacket(0, R0);
+          r5.storePacket(0, R1);
+
+          R0 = r6.template loadPacket<ResPacket>(0);
+          R1 = r7.template loadPacket<ResPacket>(0);
+          traits.acc(C6, alphav, R0);
+          traits.acc(C7, alphav, R1);
+          r6.storePacket(0, R0);
+          r7.storePacket(0, R1);
+      }
+      }
+#endif
+      
       // loops on each largest micro vertical panel of rhs (depth * nr)
-      for(Index j2=0; j2<packet_cols4; j2+=nr)
+      for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
       {
         // We select a LhsProgress x nr micro block of res
         // which is entirely stored into 1 x nr registers.
@@ -1257,7 +1385,7 @@
         r3.prefetch(prefetch_res_offset);
 
         // performs "inner" products
-        const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+        const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
         prefetch(&blB[0]);
         LhsPacket A0, A1;
 
@@ -1415,6 +1543,7 @@
     if(strideB==-1) strideB = depth;
     conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
     const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
     const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
     const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? peeled_mc2+((rows-peeled_mc2)/(1*LhsProgress))*(1*LhsProgress) : 0;
@@ -1443,7 +1572,220 @@
       for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
       {
         const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
-        for(Index j2=0; j2<packet_cols4; j2+=nr)
+#if EIGEN_ARCH_ARM64
+        EIGEN_IF_CONSTEXPR(nr>=8) {
+        for(Index j2=0; j2<packet_cols8; j2+=8)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+          {
+            const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
+            prefetch(&blA[0]);
+            // gets res block as register
+            AccPacket C0, C1, C2, C3, C4, C5, C6, C7,
+                      C8, C9, C10, C11, C12, C13, C14, C15,
+                      C16, C17, C18, C19, C20, C21, C22, C23;
+            traits.initAcc(C0);  traits.initAcc(C1);  traits.initAcc(C2);  traits.initAcc(C3);
+            traits.initAcc(C4);  traits.initAcc(C5);  traits.initAcc(C6);  traits.initAcc(C7);
+            traits.initAcc(C8);  traits.initAcc(C9);  traits.initAcc(C10); traits.initAcc(C11);
+            traits.initAcc(C12);  traits.initAcc(C13);  traits.initAcc(C14);  traits.initAcc(C15);
+            traits.initAcc(C16);  traits.initAcc(C17);  traits.initAcc(C18);  traits.initAcc(C19);
+            traits.initAcc(C20);  traits.initAcc(C21);  traits.initAcc(C22); traits.initAcc(C23);
+
+            LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+            LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+            LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+            LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+            LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+            LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+            LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+            LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+
+            r0.prefetch(0);
+            r1.prefetch(0);
+            r2.prefetch(0);
+            r3.prefetch(0);
+            r4.prefetch(0);
+            r5.prefetch(0);
+            r6.prefetch(0);
+            r7.prefetch(0);
+
+            // performs "inner" products
+            const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+            prefetch(&blB[0]);
+            LhsPacket A0, A1;
+            for(Index k=0; k<peeled_kc; k+=pk)
+            {
+              EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX8");
+              // 27 registers are taken (24 for acc, 3 for lhs).
+              RhsPanel27 rhs_panel;
+              RhsPacket T0;
+              LhsPacket A2;
+            #if EIGEN_COMP_GNUC_STRICT && EIGEN_ARCH_ARM64 && defined(EIGEN_VECTORIZE_NEON) && !(EIGEN_GNUC_AT_LEAST(9,0))
+            // see http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1633
+            // without this workaround A0, A1, and A2 are loaded in the same register,
+            // which is not good for pipelining
+            #define EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND __asm__  ("" : "+w,m" (A0), "+w,m" (A1), "+w,m" (A2));
+            #else
+            #define EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND
+            #endif
+
+#define EIGEN_GEBP_ONESTEP(K)                                                         \
+            do {                                                                      \
+                EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX8");            \
+                traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0);                  \
+                traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1);                  \
+                traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2);                  \
+                EIGEN_GEBP_3Px8_REGISTER_ALLOC_WORKAROUND                             \
+                traits.loadRhs(blB + (0 + 8 * K) * Traits::RhsProgress, rhs_panel);   \
+                traits.madd(A0, rhs_panel, C0, T0, fix<0>);                           \
+                traits.madd(A1, rhs_panel, C8, T0, fix<0>);                           \
+                traits.madd(A2, rhs_panel, C16, T0, fix<0>);                          \
+                traits.updateRhs(blB + (1 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C1, T0, fix<1>);                           \
+                traits.madd(A1, rhs_panel, C9, T0, fix<1>);                           \
+                traits.madd(A2, rhs_panel, C17, T0, fix<1>);                          \
+                traits.updateRhs(blB + (2 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C2, T0, fix<2>);                           \
+                traits.madd(A1, rhs_panel, C10, T0, fix<2>);                          \
+                traits.madd(A2, rhs_panel, C18, T0, fix<2>);                          \
+                traits.updateRhs(blB + (3 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C3, T0, fix<3>);                           \
+                traits.madd(A1, rhs_panel, C11, T0, fix<3>);                          \
+                traits.madd(A2, rhs_panel, C19, T0, fix<3>);                          \
+                traits.loadRhs(blB + (4 + 8 * K) * Traits::RhsProgress, rhs_panel);   \
+                traits.madd(A0, rhs_panel, C4, T0, fix<0>);                           \
+                traits.madd(A1, rhs_panel, C12, T0, fix<0>);                          \
+                traits.madd(A2, rhs_panel, C20, T0, fix<0>);                          \
+                traits.updateRhs(blB + (5 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C5, T0, fix<1>);                           \
+                traits.madd(A1, rhs_panel, C13, T0, fix<1>);                          \
+                traits.madd(A2, rhs_panel, C21, T0, fix<1>);                          \
+                traits.updateRhs(blB + (6 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C6, T0, fix<2>);                           \
+                traits.madd(A1, rhs_panel, C14, T0, fix<2>);                          \
+                traits.madd(A2, rhs_panel, C22, T0, fix<2>);                          \
+                traits.updateRhs(blB + (7 + 8 * K) * Traits::RhsProgress, rhs_panel); \
+                traits.madd(A0, rhs_panel, C7, T0, fix<3>);                           \
+                traits.madd(A1, rhs_panel, C15, T0, fix<3>);                          \
+                traits.madd(A2, rhs_panel, C23, T0, fix<3>);                          \
+                EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX8");              \
+            } while (false)
+
+                EIGEN_GEBP_ONESTEP(0);
+                EIGEN_GEBP_ONESTEP(1);
+                EIGEN_GEBP_ONESTEP(2);
+                EIGEN_GEBP_ONESTEP(3);
+                EIGEN_GEBP_ONESTEP(4);
+                EIGEN_GEBP_ONESTEP(5);
+                EIGEN_GEBP_ONESTEP(6);
+                EIGEN_GEBP_ONESTEP(7);
+
+                blB += pk * 8 * RhsProgress;
+                blA += pk * 3 * Traits::LhsProgress;
+                EIGEN_ASM_COMMENT("end gebp micro kernel 3pX8");
+            }
+
+            // process remaining peeled loop
+            for (Index k = peeled_kc; k < depth; k++)
+            {
+
+                RhsPanel27 rhs_panel;
+                RhsPacket T0;
+                LhsPacket A2;
+                EIGEN_GEBP_ONESTEP(0);
+                blB += 8 * RhsProgress;
+                blA += 3 * Traits::LhsProgress;
+            }
+
+            #undef EIGEN_GEBP_ONESTEP
+
+            ResPacket R0, R1, R2;
+            ResPacket alphav = pset1<ResPacket>(alpha);
+
+            R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C0, alphav, R0);
+            traits.acc(C8, alphav, R1);
+            traits.acc(C16, alphav, R2);
+            r0.storePacket(0 * Traits::ResPacketSize, R0);
+            r0.storePacket(1 * Traits::ResPacketSize, R1);
+            r0.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C1, alphav, R0);
+            traits.acc(C9, alphav, R1);
+            traits.acc(C17, alphav, R2);
+            r1.storePacket(0 * Traits::ResPacketSize, R0);
+            r1.storePacket(1 * Traits::ResPacketSize, R1);
+            r1.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C2, alphav, R0);
+            traits.acc(C10, alphav, R1);
+            traits.acc(C18, alphav, R2);
+            r2.storePacket(0 * Traits::ResPacketSize, R0);
+            r2.storePacket(1 * Traits::ResPacketSize, R1);
+            r2.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C3, alphav, R0);
+            traits.acc(C11, alphav, R1);
+            traits.acc(C19, alphav, R2);
+            r3.storePacket(0 * Traits::ResPacketSize, R0);
+            r3.storePacket(1 * Traits::ResPacketSize, R1);
+            r3.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r4.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r4.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r4.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C4, alphav, R0);
+            traits.acc(C12, alphav, R1);
+            traits.acc(C20, alphav, R2);
+            r4.storePacket(0 * Traits::ResPacketSize, R0);
+            r4.storePacket(1 * Traits::ResPacketSize, R1);
+            r4.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r5.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r5.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r5.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C5, alphav, R0);
+            traits.acc(C13, alphav, R1);
+            traits.acc(C21, alphav, R2);
+            r5.storePacket(0 * Traits::ResPacketSize, R0);
+            r5.storePacket(1 * Traits::ResPacketSize, R1);
+            r5.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r6.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r6.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r6.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C6, alphav, R0);
+            traits.acc(C14, alphav, R1);
+            traits.acc(C22, alphav, R2);
+            r6.storePacket(0 * Traits::ResPacketSize, R0);
+            r6.storePacket(1 * Traits::ResPacketSize, R1);
+            r6.storePacket(2 * Traits::ResPacketSize, R2);
+
+            R0 = r7.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r7.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r7.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
+            traits.acc(C7, alphav, R0);
+            traits.acc(C15, alphav, R1);
+            traits.acc(C23, alphav, R2);
+            r7.storePacket(0 * Traits::ResPacketSize, R0);
+            r7.storePacket(1 * Traits::ResPacketSize, R1);
+            r7.storePacket(2 * Traits::ResPacketSize, R2);
+          }
+        }
+        }
+#endif
+        for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
         {
           for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
           {
@@ -1473,14 +1815,14 @@
           r3.prefetch(0);
 
           // performs "inner" products
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
           prefetch(&blB[0]);
           LhsPacket A0, A1;
 
           for(Index k=0; k<peeled_kc; k+=pk)
           {
             EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
-            // 15 registers are taken (12 for acc, 2 for lhs).
+            // 15 registers are taken (12 for acc, 3 for lhs).
             RhsPanel15 rhs_panel;
             RhsPacket T0;
             LhsPacket A2;
@@ -1689,7 +2031,173 @@
       for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
       {
         Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
-        for(Index j2=0; j2<packet_cols4; j2+=nr)
+#if EIGEN_ARCH_ARM64
+        EIGEN_IF_CONSTEXPR(nr>=8) {
+        for(Index j2=0; j2<packet_cols8; j2+=8)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+          {
+            const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+            prefetch(&blA[0]);
+
+            AccPacket C0, C1, C2, C3, C4, C5, C6, C7,
+                      C8, C9, C10, C11, C12, C13, C14, C15;
+            traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+            traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+            traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
+            traits.initAcc(C12); traits.initAcc(C13); traits.initAcc(C14); traits.initAcc(C15);
+
+            LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+            LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+            LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+            LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+            LinearMapper r4 = res.getLinearMapper(i, j2 + 4);
+            LinearMapper r5 = res.getLinearMapper(i, j2 + 5);
+            LinearMapper r6 = res.getLinearMapper(i, j2 + 6);
+            LinearMapper r7 = res.getLinearMapper(i, j2 + 7);
+            r0.prefetch(prefetch_res_offset);
+            r1.prefetch(prefetch_res_offset);
+            r2.prefetch(prefetch_res_offset);
+            r3.prefetch(prefetch_res_offset);
+            r4.prefetch(prefetch_res_offset);
+            r5.prefetch(prefetch_res_offset);
+            r6.prefetch(prefetch_res_offset);
+            r7.prefetch(prefetch_res_offset);
+
+            const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+            prefetch(&blB[0]);
+            LhsPacket A0, A1;
+            for(Index k=0; k<peeled_kc; k+=pk)
+            {
+              RhsPacketx4 rhs_panel;
+              RhsPacket T0;
+              // NOTE: the begin/end asm comments below work around bug 935!
+              // but they are not enough for gcc>=6 without FMA (bug 1637)
+              #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+                #define EIGEN_GEBP_2Px8_SPILLING_WORKAROUND __asm__  ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
+              #else
+                #define EIGEN_GEBP_2Px8_SPILLING_WORKAROUND
+              #endif
+#define EIGEN_GEBGP_ONESTEP(K)                                                \
+            do {                                                              \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX8");      \
+              traits.loadLhs(&blA[(0 + 2 * K) * LhsProgress], A0);            \
+              traits.loadLhs(&blA[(1 + 2 * K) * LhsProgress], A1);            \
+              traits.loadRhs(&blB[(0 + 8 * K) * RhsProgress], rhs_panel);     \
+              traits.madd(A0, rhs_panel, C0, T0, fix<0>);                     \
+              traits.madd(A1, rhs_panel, C8, T0, fix<0>);                     \
+              traits.updateRhs(&blB[(1 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C1, T0, fix<1>);                     \
+              traits.madd(A1, rhs_panel, C9, T0, fix<1>);                     \
+              traits.updateRhs(&blB[(2 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C2, T0, fix<2>);                     \
+              traits.madd(A1, rhs_panel, C10, T0, fix<2>);                    \
+              traits.updateRhs(&blB[(3 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C3, T0, fix<3>);                     \
+              traits.madd(A1, rhs_panel, C11, T0, fix<3>);                    \
+              traits.loadRhs(&blB[(4 + 8 * K) * RhsProgress], rhs_panel);     \
+              traits.madd(A0, rhs_panel, C4, T0, fix<0>);                     \
+              traits.madd(A1, rhs_panel, C12, T0, fix<0>);                    \
+              traits.updateRhs(&blB[(5 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C5, T0, fix<1>);                     \
+              traits.madd(A1, rhs_panel, C13, T0, fix<1>);                    \
+              traits.updateRhs(&blB[(6 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C6, T0, fix<2>);                     \
+              traits.madd(A1, rhs_panel, C14, T0, fix<2>);                    \
+              traits.updateRhs(&blB[(7 + 8 * K) * RhsProgress], rhs_panel);   \
+              traits.madd(A0, rhs_panel, C7, T0, fix<3>);                     \
+              traits.madd(A1, rhs_panel, C15, T0, fix<3>);                    \
+              EIGEN_GEBP_2Px8_SPILLING_WORKAROUND                             \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX8");        \
+            } while (false)
+
+              EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX8");
+
+              EIGEN_GEBGP_ONESTEP(0);
+              EIGEN_GEBGP_ONESTEP(1);
+              EIGEN_GEBGP_ONESTEP(2);
+              EIGEN_GEBGP_ONESTEP(3);
+              EIGEN_GEBGP_ONESTEP(4);
+              EIGEN_GEBGP_ONESTEP(5);
+              EIGEN_GEBGP_ONESTEP(6);
+              EIGEN_GEBGP_ONESTEP(7);
+
+              blB += pk*8*RhsProgress;
+              blA += pk*(2*Traits::LhsProgress);
+
+              EIGEN_ASM_COMMENT("end gebp micro kernel 2pX8");
+            }
+            // process remaining peeled loop
+            for(Index k=peeled_kc; k<depth; k++)
+            {
+              RhsPacketx4 rhs_panel;
+              RhsPacket T0;
+              EIGEN_GEBGP_ONESTEP(0);
+              blB += 8*RhsProgress;
+              blA += 2*Traits::LhsProgress;
+            }
+
+#undef EIGEN_GEBGP_ONESTEP
+
+            ResPacket R0, R1, R2, R3;
+            ResPacket alphav = pset1<ResPacket>(alpha);
+
+            R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C0, alphav, R0);
+            traits.acc(C8, alphav, R1);
+            traits.acc(C1, alphav, R2);
+            traits.acc(C9, alphav, R3);
+            r0.storePacket(0 * Traits::ResPacketSize, R0);
+            r0.storePacket(1 * Traits::ResPacketSize, R1);
+            r1.storePacket(0 * Traits::ResPacketSize, R2);
+            r1.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C2,  alphav, R0);
+            traits.acc(C10,  alphav, R1);
+            traits.acc(C3,  alphav, R2);
+            traits.acc(C11,  alphav, R3);
+            r2.storePacket(0 * Traits::ResPacketSize, R0);
+            r2.storePacket(1 * Traits::ResPacketSize, R1);
+            r3.storePacket(0 * Traits::ResPacketSize, R2);
+            r3.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r4.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r4.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r5.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r5.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C4,  alphav, R0);
+            traits.acc(C12,  alphav, R1);
+            traits.acc(C5,  alphav, R2);
+            traits.acc(C13,  alphav, R3);
+            r4.storePacket(0 * Traits::ResPacketSize, R0);
+            r4.storePacket(1 * Traits::ResPacketSize, R1);
+            r5.storePacket(0 * Traits::ResPacketSize, R2);
+            r5.storePacket(1 * Traits::ResPacketSize, R3);
+
+            R0 = r6.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R1 = r6.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            R2 = r7.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+            R3 = r7.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+            traits.acc(C6,  alphav, R0);
+            traits.acc(C14,  alphav, R1);
+            traits.acc(C7,  alphav, R2);
+            traits.acc(C15,  alphav, R3);
+            r6.storePacket(0 * Traits::ResPacketSize, R0);
+            r6.storePacket(1 * Traits::ResPacketSize, R1);
+            r7.storePacket(0 * Traits::ResPacketSize, R2);
+            r7.storePacket(1 * Traits::ResPacketSize, R3);
+          }
+        }
+        }
+#endif
+        for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
         {
           for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
           {
@@ -1717,7 +2225,7 @@
           r3.prefetch(prefetch_res_offset);
 
           // performs "inner" products
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
           prefetch(&blB[0]);
           LhsPacket A0, A1;
 
@@ -1906,15 +2414,71 @@
     //---------- Process remaining rows, 1 at once ----------
     if(peeled_mc_quarter<rows)
     {
+#if EIGEN_ARCH_ARM64
+      EIGEN_IF_CONSTEXPR(nr>=8) {
       // loop on each panel of the rhs
-      for(Index j2=0; j2<packet_cols4; j2+=nr)
+      for(Index j2=0; j2<packet_cols8; j2+=8)
       {
         // loop on each row of the lhs (1*LhsProgress x depth)
         for(Index i=peeled_mc_quarter; i<rows; i+=1)
         {
           const LhsScalar* blA = &blockA[i*strideA+offsetA];
           prefetch(&blA[0]);
-          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          // gets a 1 x 1 res block as registers
+          ResScalar C0(0),C1(0),C2(0),C3(0),C4(0),C5(0),C6(0),C7(0);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*8];
+          for(Index k=0; k<depth; k++)
+          {
+            LhsScalar A0 = blA[k];
+            RhsScalar B_0;
+
+            B_0 = blB[0];
+            C0 = cj.pmadd(A0, B_0, C0);
+
+            B_0 = blB[1];
+            C1 = cj.pmadd(A0, B_0, C1);
+
+            B_0 = blB[2];
+            C2 = cj.pmadd(A0, B_0, C2);
+
+            B_0 = blB[3];
+            C3 = cj.pmadd(A0, B_0, C3);
+
+            B_0 = blB[4];
+            C4 = cj.pmadd(A0, B_0, C4);
+
+            B_0 = blB[5];
+            C5 = cj.pmadd(A0, B_0, C5);
+
+            B_0 = blB[6];
+            C6 = cj.pmadd(A0, B_0, C6);
+
+            B_0 = blB[7];
+            C7 = cj.pmadd(A0, B_0, C7);
+
+            blB += 8;
+          }
+          res(i, j2 + 0) += alpha * C0;
+          res(i, j2 + 1) += alpha * C1;
+          res(i, j2 + 2) += alpha * C2;
+          res(i, j2 + 3) += alpha * C3;
+          res(i, j2 + 4) += alpha * C4;
+          res(i, j2 + 5) += alpha * C5;
+          res(i, j2 + 6) += alpha * C6;
+          res(i, j2 + 7) += alpha * C7;
+        }
+      }
+      }
+#endif
+
+      for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+      {
+        // loop on each row of the lhs (1*LhsProgress x depth)
+        for(Index i=peeled_mc_quarter; i<rows; i+=1)
+        {
+          const LhsScalar* blA = &blockA[i*strideA+offsetA];
+          prefetch(&blA[0]);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*4];
 
           // If LhsProgress is 8 or 16, it assumes that there is a
           // half or quarter packet, respectively, of the same size as
@@ -2397,53 +2961,125 @@
   Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
   Index count = 0;
   const Index peeled_k = (depth/PacketSize)*PacketSize;
-//   if(nr>=8)
-//   {
-//     for(Index j2=0; j2<packet_cols8; j2+=8)
-//     {
-//       // skip what we have before
-//       if(PanelMode) count += 8 * offset;
-//       const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-//       const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-//       const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-//       const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-//       const Scalar* b4 = &rhs[(j2+4)*rhsStride];
-//       const Scalar* b5 = &rhs[(j2+5)*rhsStride];
-//       const Scalar* b6 = &rhs[(j2+6)*rhsStride];
-//       const Scalar* b7 = &rhs[(j2+7)*rhsStride];
-//       Index k=0;
-//       if(PacketSize==8) // TODO enable vectorized transposition for PacketSize==4
-//       {
-//         for(; k<peeled_k; k+=PacketSize) {
-//           PacketBlock<Packet> kernel;
-//           for (int p = 0; p < PacketSize; ++p) {
-//             kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);
-//           }
-//           ptranspose(kernel);
-//           for (int p = 0; p < PacketSize; ++p) {
-//             pstoreu(blockB+count, cj.pconj(kernel.packet[p]));
-//             count+=PacketSize;
-//           }
-//         }
-//       }
-//       for(; k<depth; k++)
-//       {
-//         blockB[count+0] = cj(b0[k]);
-//         blockB[count+1] = cj(b1[k]);
-//         blockB[count+2] = cj(b2[k]);
-//         blockB[count+3] = cj(b3[k]);
-//         blockB[count+4] = cj(b4[k]);
-//         blockB[count+5] = cj(b5[k]);
-//         blockB[count+6] = cj(b6[k]);
-//         blockB[count+7] = cj(b7[k]);
-//         count += 8;
-//       }
-//       // skip what we have after
-//       if(PanelMode) count += 8 * (stride-offset-depth);
-//     }
-//   }
 
-  if(nr>=4)
+#if EIGEN_ARCH_ARM64
+  EIGEN_IF_CONSTEXPR(nr>=8)
+  {
+    for(Index j2=0; j2<packet_cols8; j2+=8)
+    {
+      // skip what we have before
+      if(PanelMode) count += 8 * offset;
+      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+      const LinearMapper dm4 = rhs.getLinearMapper(0, j2 + 4);
+      const LinearMapper dm5 = rhs.getLinearMapper(0, j2 + 5);
+      const LinearMapper dm6 = rhs.getLinearMapper(0, j2 + 6);
+      const LinearMapper dm7 = rhs.getLinearMapper(0, j2 + 7);
+      Index k = 0;
+      if (PacketSize % 2 == 0 && PacketSize <= 8) // 2 4 8
+      {
+        for (; k < peeled_k; k += PacketSize)
+        {
+          if (PacketSize == 2)
+          {
+            PacketBlock<Packet, PacketSize==2 ?2:PacketSize> kernel0, kernel1, kernel2, kernel3;
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel1.packet[0%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel1.packet[1%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel2.packet[0%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel2.packet[1%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel3.packet[0%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel3.packet[1%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+            ptranspose(kernel1);
+            ptranspose(kernel2);
+            ptranspose(kernel3);
+
+            pstoreu(blockB + count + 0 * PacketSize, cj.pconj(kernel0.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 1 * PacketSize, cj.pconj(kernel1.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 2 * PacketSize, cj.pconj(kernel2.packet[0 % PacketSize]));
+            pstoreu(blockB + count + 3 * PacketSize, cj.pconj(kernel3.packet[0 % PacketSize]));
+
+            pstoreu(blockB + count + 4 * PacketSize, cj.pconj(kernel0.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 5 * PacketSize, cj.pconj(kernel1.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 6 * PacketSize, cj.pconj(kernel2.packet[1 % PacketSize]));
+            pstoreu(blockB + count + 7 * PacketSize, cj.pconj(kernel3.packet[1 % PacketSize]));
+            count+=8*PacketSize;
+          }
+          else if (PacketSize == 4)
+          {
+            PacketBlock<Packet, PacketSize == 4?4:PacketSize> kernel0, kernel1;
+
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel0.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel0.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel1.packet[0%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel1.packet[1%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel1.packet[2%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel1.packet[3%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+            ptranspose(kernel1);
+
+            pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel0.packet[0%PacketSize]));
+            pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel1.packet[0%PacketSize]));
+            pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel0.packet[1%PacketSize]));
+            pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel1.packet[1%PacketSize]));
+            pstoreu(blockB+count+4*PacketSize, cj.pconj(kernel0.packet[2%PacketSize]));
+            pstoreu(blockB+count+5*PacketSize, cj.pconj(kernel1.packet[2%PacketSize]));
+            pstoreu(blockB+count+6*PacketSize, cj.pconj(kernel0.packet[3%PacketSize]));
+            pstoreu(blockB+count+7*PacketSize, cj.pconj(kernel1.packet[3%PacketSize]));
+            count+=8*PacketSize;
+          }
+          else if (PacketSize == 8)
+          {
+            PacketBlock<Packet, PacketSize==8?8:PacketSize> kernel0;
+
+            kernel0.packet[0%PacketSize] = dm0.template loadPacket<Packet>(k);
+            kernel0.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+            kernel0.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);
+            kernel0.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);
+            kernel0.packet[4%PacketSize] = dm4.template loadPacket<Packet>(k);
+            kernel0.packet[5%PacketSize] = dm5.template loadPacket<Packet>(k);
+            kernel0.packet[6%PacketSize] = dm6.template loadPacket<Packet>(k);
+            kernel0.packet[7%PacketSize] = dm7.template loadPacket<Packet>(k);
+            ptranspose(kernel0);
+
+            pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel0.packet[0%PacketSize]));
+            pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel0.packet[1%PacketSize]));
+            pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel0.packet[2%PacketSize]));
+            pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel0.packet[3%PacketSize]));
+            pstoreu(blockB+count+4*PacketSize, cj.pconj(kernel0.packet[4%PacketSize]));
+            pstoreu(blockB+count+5*PacketSize, cj.pconj(kernel0.packet[5%PacketSize]));
+            pstoreu(blockB+count+6*PacketSize, cj.pconj(kernel0.packet[6%PacketSize]));
+            pstoreu(blockB+count+7*PacketSize, cj.pconj(kernel0.packet[7%PacketSize]));
+            count+=8*PacketSize;
+          }
+        }
+      }
+
+      for(; k<depth; k++)
+      {
+        blockB[count+0] = cj(dm0(k));
+        blockB[count+1] = cj(dm1(k));
+        blockB[count+2] = cj(dm2(k));
+        blockB[count+3] = cj(dm3(k));
+        blockB[count+4] = cj(dm4(k));
+        blockB[count+5] = cj(dm5(k));
+        blockB[count+6] = cj(dm6(k));
+        blockB[count+7] = cj(dm7(k));
+        count += 8;
+      }
+      // skip what we have after
+      if(PanelMode) count += 8 * (stride-offset-depth);
+    }
+  }
+#endif
+  
+  EIGEN_IF_CONSTEXPR(nr>=4)
   {
     for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
     {
@@ -2522,39 +3158,44 @@
     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
     Index count = 0;
 
-  //   if(nr>=8)
-  //   {
-  //     for(Index j2=0; j2<packet_cols8; j2+=8)
-  //     {
-  //       // skip what we have before
-  //       if(PanelMode) count += 8 * offset;
-  //       for(Index k=0; k<depth; k++)
-  //       {
-  //         if (PacketSize==8) {
-  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
-  //           pstoreu(blockB+count, cj.pconj(A));
-  //         } else if (PacketSize==4) {
-  //           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
-  //           Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);
-  //           pstoreu(blockB+count, cj.pconj(A));
-  //           pstoreu(blockB+count+PacketSize, cj.pconj(B));
-  //         } else {
-  //           const Scalar* b0 = &rhs[k*rhsStride + j2];
-  //           blockB[count+0] = cj(b0[0]);
-  //           blockB[count+1] = cj(b0[1]);
-  //           blockB[count+2] = cj(b0[2]);
-  //           blockB[count+3] = cj(b0[3]);
-  //           blockB[count+4] = cj(b0[4]);
-  //           blockB[count+5] = cj(b0[5]);
-  //           blockB[count+6] = cj(b0[6]);
-  //           blockB[count+7] = cj(b0[7]);
-  //         }
-  //         count += 8;
-  //       }
-  //       // skip what we have after
-  //       if(PanelMode) count += 8 * (stride-offset-depth);
-  //     }
-  //   }
+#if EIGEN_ARCH_ARM64
+    EIGEN_IF_CONSTEXPR(nr>=8)
+    {
+      for(Index j2=0; j2<packet_cols8; j2+=8)
+      {
+        // skip what we have before
+        if(PanelMode) count += 8 * offset;
+        for(Index k=0; k<depth; k++)
+        {
+          if (PacketSize==8) {
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            pstoreu(blockB+count, cj.pconj(A));
+            count += PacketSize;
+          } else if (PacketSize==4) {
+            Packet A = rhs.template loadPacket<Packet>(k, j2);
+            Packet B = rhs.template loadPacket<Packet>(k, j2 + 4);
+            pstoreu(blockB+count, cj.pconj(A));
+            pstoreu(blockB+count+PacketSize, cj.pconj(B));
+            count += 2*PacketSize;
+          } else {
+            const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+            blockB[count+0] = cj(dm0(0));
+            blockB[count+1] = cj(dm0(1));
+            blockB[count+2] = cj(dm0(2));
+            blockB[count+3] = cj(dm0(3));
+            blockB[count+4] = cj(dm0(4));
+            blockB[count+5] = cj(dm0(5));
+            blockB[count+6] = cj(dm0(6));
+            blockB[count+7] = cj(dm0(7));
+            count += 8;
+          }
+        }
+        // skip what we have after
+        if(PanelMode) count += 8 * (stride-offset-depth);
+      }
+    }
+#endif
+    
     if(nr>=4)
     {
       for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
@@ -2623,7 +3264,7 @@
 }
 
 /** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
-rs.                                                                                                                
+rs.
 * \sa setCpuCacheSize */
 inline std::ptrdiff_t l3CacheSize()
 {
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index d023b3b..3f00077 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -487,8 +487,8 @@
     ExtractType,
     typename ExtractType_::PlainObject
     > DirectLinearAccessType;
-  static inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR ExtractType extract(const XprType& x) { return x; }
-  static inline EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+  static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
+  static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
 };
 
 // pop conjugate
@@ -576,7 +576,7 @@
   enum {
     IsTransposed = Base::IsTransposed ? 0 : 1
   };
-  static inline EIGEN_CONSTEXPR ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
+  static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
   static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
 };
 
@@ -587,7 +587,7 @@
 
 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
 struct extract_data_selector {
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR static const typename T::Scalar* run(const T& m)
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
   {
     return blas_traits<T>::extract(m).data();
   }
@@ -599,7 +599,7 @@
 };
 
 template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR const typename T::Scalar* extract_data(const T& m)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
 {
   return extract_data_selector<T>::run(m);
 }
diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h
index b00fb9c..ea275bd 100644
--- a/Eigen/src/Core/util/IntegralConstant.h
+++ b/Eigen/src/Core/util/IntegralConstant.h
@@ -161,7 +161,7 @@
   static const int value = N;
 };
 
-template<typename T> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index get_runtime_value(const T &x) { return x; }
+template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; }
 
 // Cleanup integer/FixedInt/VariableAndFixedInt/etc types:
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 7261921..570d033 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -1068,13 +1068,13 @@
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =; \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
     template <typename OtherDerived> \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
 #else
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =; \
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Derived& operator=(const Derived& other) \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
     { \
       Base::operator=(other); \
       return *this; \
@@ -1107,14 +1107,11 @@
  *
  * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision
  */
-#if defined(EIGEN_GPUCC)
 #define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Derived() = default; \
+    EIGEN_DEVICE_FUNC Derived() = default; \
     EIGEN_DEVICE_FUNC ~Derived() = default;
-#else
-#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Derived() = default;
-#endif
+
+
 
 
 
@@ -1171,7 +1168,7 @@
 
 #define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
   template<typename OtherDerived> \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
   (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
   { \
     return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
@@ -1189,7 +1186,7 @@
                 const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
 
 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
-  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR \
+  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
   const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME)\
   (METHOD)(const T& scalar) const { \
     typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
@@ -1198,7 +1195,7 @@
   }
 
 #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
-  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR friend \
+  template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \
   const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME) \
   (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
     typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
@@ -1270,4 +1267,12 @@
   #define EIGEN_UNROLL_LOOP
 #endif
 
+// Notice: Use this macro with caution. The code in the if body should still
+// compile with C++14.
+#if defined(EIGEN_HAS_CXX17_IFCONSTEXPR)
+#define EIGEN_IF_CONSTEXPR(X) if constexpr (X)
+#else
+#define EIGEN_IF_CONSTEXPR(X) if (X)
+#endif
+
 #endif // EIGEN_MACROS_H
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 1aa41ab..b5f91bf 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -113,7 +113,7 @@
 {
   public:
     EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic)
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
     EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     T value() { return T(Value); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
@@ -126,10 +126,10 @@
 {
     T m_value;
   public:
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR T value() const { return m_value; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR operator T() const { return m_value; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void setValue(T value) { m_value = value; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
 };
 
 /** \internal like variable_if_dynamic but for DynamicIndex
@@ -137,7 +137,7 @@
 template<typename T, int Value> class variable_if_dynamicindex
 {
   public:
-    EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
     EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
     T value() { return T(Value); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -675,7 +675,7 @@
 };
 
 template<typename T1, typename T2>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 bool is_same_dense(const T1 &mat1, const T2 &mat2, std::enable_if_t<possibly_same_dense<T1,T2>::value> * = 0)
 {
   return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index c140831..dc6a762 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -29,7 +29,7 @@
 template<typename Derived>
 template<typename OtherDerived>
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
 #else
 typename MatrixBase<Derived>::PlainObject
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 7394dea..0aca4c4 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -65,34 +65,34 @@
 
 
   /** \returns the \c x coefficient */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
   /** \returns the \c y coefficient */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
   /** \returns the \c z coefficient */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
   /** \returns the \c w coefficient */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
 
   /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
   /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
   /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
   /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
+  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
 
   /** \returns a vector expression of the imaginary part (x,y,z) */
   EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
 
   /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
 
   /** \returns a vector expression of the coefficients (x,y,z,w) */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
   template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
@@ -105,12 +105,12 @@
 //  { return operator=<Derived>(other); }
 
   EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
-  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Derived& operator=(const MatrixBase<OtherDerived>& m);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
 
   /** \returns a quaternion representing an identity rotation
     * \sa MatrixBase::Identity()
     */
-  EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
+  EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
 
   /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
     */
@@ -119,7 +119,7 @@
   /** \returns the squared norm of the quaternion's coefficients
     * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
     */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
+  EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
 
   /** \returns the norm of the quaternion's coefficients
     * \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
@@ -138,12 +138,12 @@
     * corresponds to the cosine of half the angle between the two rotations.
     * \sa angularDistance()
     */
-  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
+  template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
 
   template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns an equivalent 3x3 rotation matrix */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Matrix3 toRotationMatrix() const;
+  EIGEN_DEVICE_FUNC inline Matrix3 toRotationMatrix() const;
 
   /** \returns the quaternion which transform \a a into \a b through a rotation */
   template<typename Derived1, typename Derived2>
@@ -153,7 +153,7 @@
   template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
 
   /** \returns the quaternion describing the inverse rotation */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Quaternion<Scalar> inverse() const;
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
 
   /** \returns the conjugated quaternion */
   EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
@@ -165,7 +165,7 @@
     *          fuzzy comparison such as isApprox()
     * \sa isApprox(), operator!= */
   template<class OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool operator==(const QuaternionBase<OtherDerived>& other) const
+  EIGEN_DEVICE_FUNC inline bool operator==(const QuaternionBase<OtherDerived>& other) const
   { return coeffs() == other.coeffs(); }
 
   /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@@ -173,7 +173,7 @@
     *          fuzzy comparison such as isApprox()
     * \sa isApprox(), operator== */
   template<class OtherDerived>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline bool operator!=(const QuaternionBase<OtherDerived>& other) const
+  EIGEN_DEVICE_FUNC inline bool operator!=(const QuaternionBase<OtherDerived>& other) const
   { return coeffs() != other.coeffs(); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -185,7 +185,7 @@
   { return coeffs().isApprox(other.coeffs(), prec); }
 
   /** return the result vector of \a v through the rotation*/
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Vector3 _transformVector(const Vector3& v) const;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
 
   #ifdef EIGEN_PARSED_BY_DOXYGEN
   /** \returns \c *this with scalar type casted to \a NewScalarType
@@ -199,14 +199,14 @@
   #else
 
   template<typename NewScalarType>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline
+  EIGEN_DEVICE_FUNC inline
   std::enable_if_t<internal::is_same<Scalar,NewScalarType>::value,const Derived&> cast() const
   {
     return derived();
   }
 
   template<typename NewScalarType>
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline
+  EIGEN_DEVICE_FUNC inline
   std::enable_if_t<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> > cast() const
   {
     return Quaternion<NewScalarType>(coeffs().template cast<NewScalarType>());
@@ -296,10 +296,10 @@
     * while internally the coefficients are stored in the following order:
     * [\c x, \c y, \c z, \c w]
     */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
+  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
 
   /** Constructs and initialize a quaternion from the array data */
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline explicit Quaternion(const Scalar* data) : m_coeffs(data) {}
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
 
   /** Copy constructor */
   template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
@@ -312,11 +312,11 @@
     *  - a 4D vector expression representing quaternion coefficients in the order [\c x, \c y, \c z, \c w].
     */
   template<typename Derived>
-  EIGEN_DEVICE_FUNC explicit EIGEN_CONSTEXPR inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
 
   /** Explicit copy constructor with scalar conversion */
   template<typename OtherScalar, int OtherOptions>
-  EIGEN_DEVICE_FUNC explicit EIGEN_CONSTEXPR Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
   // We define a copy constructor, which means we don't get an implicit move constructor or assignment operator.
@@ -337,8 +337,8 @@
   template<typename Derived1, typename Derived2>
   EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Coefficients& coeffs() { return m_coeffs;}
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const Coefficients& coeffs() const { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
 
@@ -415,9 +415,9 @@
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter Options_ is set to #Aligned, then the pointer coeffs must be aligned. */
-    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const Coefficients& coeffs() const { return m_coeffs;}
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   protected:
     const Coefficients m_coeffs;
@@ -452,10 +452,10 @@
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter Options_ is set to #Aligned, then the pointer coeffs must be aligned. */
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Coefficients& coeffs() { return m_coeffs; }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const Coefficients& coeffs() const { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
 
   protected:
     Coefficients m_coeffs;
@@ -483,7 +483,7 @@
 namespace internal {
 template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
     return Quaternion<Scalar>
     (
       a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
@@ -524,7 +524,7 @@
   *   - Via a Matrix3: 24 + 15n
   */
 template <class Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename QuaternionBase<Derived>::Vector3
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
 QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 {
     // Note that this algorithm comes from the optimization by hand
@@ -573,7 +573,7 @@
 
 template<class Derived>
 template<class MatrixDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
 {
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
@@ -585,7 +585,7 @@
   * be normalized, otherwise the result is undefined.
   */
 template<class Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline typename QuaternionBase<Derived>::Matrix3
+EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
 QuaternionBase<Derived>::toRotationMatrix(void) const
 {
   // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
@@ -714,7 +714,7 @@
   * \sa QuaternionBase::conjugate()
   */
 template <class Derived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
 {
   // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
   Scalar n2 = this->squaredNorm();
@@ -731,12 +731,12 @@
 namespace internal {
 template<int Arch, class Derived, typename Scalar> struct quat_conj
 {
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
     return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
   }
 };
 }
-
+                         
 /** \returns the conjugate of the \c *this which is equal to the multiplicative inverse
   * if the quaternion is normalized.
   * The conjugate of a quaternion represents the opposite rotation.
@@ -854,7 +854,7 @@
 struct quaternionbase_assign_impl<Other,4,1>
 {
   typedef typename Other::Scalar Scalar;
-  template<class Derived> EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR inline void run(QuaternionBase<Derived>& q, const Other& vec)
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
   {
     q.coeffs() = vec;
   }
diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h
index 73fcf7f..f21277f 100644
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -40,8 +40,8 @@
     typedef Matrix<Scalar,Dim,1> VectorType;
 
   public:
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Derived& derived() { return *static_cast<Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
 
     /** \returns an equivalent rotation matrix */
     EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
@@ -69,7 +69,7 @@
       *  - a vector of size Dim
       */
     template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
     operator*(const EigenBase<OtherDerived>& e) const
     { return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
 
@@ -125,7 +125,7 @@
 {
   enum { Dim = RotationDerived::Dim };
   typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR ReturnType run(const RotationDerived& r, const OtherVectorType& v)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
   {
     return r._transformVector(v);
   }
diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
index ce9bcf6..5f1e844 100644
--- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -137,18 +137,18 @@
 // TODO code generating macros could be moved to Macros.h and could include generation of documentation
 #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
 template<typename OtherDerived> \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
 OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
 { \
   return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
 }\
 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
 typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const Cmp ## COMPARATOR ## ReturnType \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
 OP(const Scalar& s) const { \
   return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
 } \
-EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE EIGEN_CONSTEXPR const RCmp ## COMPARATOR ## ReturnType \
+EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
 OP(const Scalar& s, const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& d) { \
   return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
 }
diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h
index a93fbec..68b9413 100644
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -87,7 +87,7 @@
 /// \sa class Block, fix, fix<N>(int)
 ///
 template<typename NRowsType, typename NColsType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
 #else
@@ -101,7 +101,7 @@
 
 /// This is the const version of block(Index,Index,NRowsType,NColsType)
 template<typename NRowsType, typename NColsType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
 #else
@@ -133,7 +133,7 @@
 /// \sa block(Index,Index,NRowsType,NColsType), class Block
 ///
 template<typename NRowsType, typename NColsType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
 #else
@@ -147,7 +147,7 @@
 
 /// This is the const version of topRightCorner(NRowsType, NColsType).
 template<typename NRowsType, typename NColsType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
 #else
@@ -1023,7 +1023,7 @@
 /// \sa block(Index,Index,NRowsType,NColsType), class Block
 ///
 template<int NRows, int NCols>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
 {
   return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
@@ -1031,7 +1031,7 @@
 
 /// This is the const version of block<>(Index, Index). */
 template<int NRows, int NCols>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
 {
   return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
@@ -1093,14 +1093,14 @@
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 /**
   * \sa row(), class Block */
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 ColXpr col(Index i)
 {
   return ColXpr(derived(), i);
 }
 
 /// This is the const version of col().
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 ConstColXpr col(Index i) const
 {
   return ConstColXpr(derived(), i);
@@ -1114,14 +1114,14 @@
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 /**
   * \sa col(), class Block */
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 RowXpr row(Index i)
 {
   return RowXpr(derived(), i);
 }
 
 /// This is the const version of row(). */
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 ConstRowXpr row(Index i) const
 {
   return ConstRowXpr(derived(), i);
@@ -1293,7 +1293,7 @@
 /// \sa segment(Index,NType), class Block
 ///
 template<int N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -1302,7 +1302,7 @@
 
 /// This is the const version of segment<int>(Index).
 template<int N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -1334,7 +1334,7 @@
 
 /// This is the const version of head<int>().
 template<int N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -1426,7 +1426,7 @@
 
 /** This is the const version of subVector(Index) */
 template<DirectionType Direction>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 std::conditional_t<Direction==Vertical,ConstColXpr,ConstRowXpr>
 subVector(Index i) const
 {
diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h
index 591902e..2f50329 100644
--- a/Eigen/src/plugins/CommonCwiseBinaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h
@@ -38,7 +38,7 @@
   * \sa class CwiseBinaryOp, operator+(), operator-(), cwiseProduct()
   */
 template<typename CustomBinaryOp, typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
 binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
 {
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h
index 3d52535..390759c 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -40,7 +40,7 @@
 ///
 EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
 ///
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 inline const NegativeReturnType
 operator-() const { return NegativeReturnType(derived()); }
 
@@ -57,7 +57,7 @@
 /// \sa class CwiseUnaryOp
 ///
 template<typename NewType>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 typename CastXpr<NewType>::Type
 cast() const
 {
diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
index 533383e..46fe08c 100644
--- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -18,7 +18,7 @@
   * \sa class CwiseBinaryOp, cwiseAbs2
   */
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
 cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -38,7 +38,7 @@
   * \sa cwiseNotEqual(), isApprox(), isMuchSmallerThan()
   */
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<numext::equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -58,7 +58,7 @@
   * \sa cwiseEqual(), isApprox(), isMuchSmallerThan()
   */
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<numext::not_equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
index dc578f6..98d925d 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -41,7 +41,7 @@
 ///
 /// \sa cwiseAbs()
 ///
-EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
 cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7d6f978..8ffa002 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -186,7 +186,6 @@
 ei_add_test(vectorization_logic)
 ei_add_test(basicstuff)
 ei_add_test(constructor)
-ei_add_test(compile_time_evaluation)
 ei_add_test(linearstructure)
 ei_add_test(integer_types)
 ei_add_test(unalignedcount)
diff --git a/test/compile_time_evaluation.cpp b/test/compile_time_evaluation.cpp
deleted file mode 100644
index 6ed0a4a..0000000
--- a/test/compile_time_evaluation.cpp
+++ /dev/null
@@ -1,288 +0,0 @@
-// main.h adds instrumentation which breaks constexpr so we do not run any tests in here,
-// this is strictly compile-time.
-#include <Eigen/Dense>
-#include <Eigen/Geometry>
-
-using namespace Eigen;
-
-template<int Blah>
-struct AssertConstexpr {};
-#define assert_constexpr(expr)            \
-  do {                                    \
-    (void)  AssertConstexpr<(expr, 1)>(); \
-  } while (false)
-
-constexpr bool zeroSized()
-{
-  constexpr Matrix<float, 0, 0> m0;
-  static_assert(m0.size() == 0, "");
-
-  constexpr Matrix<float, 0, 0> m1;
-  static_assert(m0 == m1, "");
-  static_assert(!(m0 != m1), "");
-
-  constexpr Array<int, 0, 0> a0;
-  static_assert(a0.size() == 0, "");
-
-  constexpr Array<int, 0, 0> a1;
-  static_assert((a0 == a1).all(), "");
-  static_assert((a0 != a1).count() == 0, "");
-
-  constexpr Array<float, 0, 0> af;
-  static_assert(m0 == af.matrix(), "");
-  static_assert((m0.array() == af).all(), "");
-  static_assert(m0.array().matrix() == m0, "");
-
-  return true;
-}
-
-static_assert(zeroSized(), "");
-
-static constexpr double static_data[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
-
-constexpr bool maps()
-{
-  constexpr Map<const Vector4d> m(static_data);
-  static_assert(m(0) == 1, "");
-  constexpr Map<const Array<double, 4, 1>> a(static_data);
-  static_assert(m == a.matrix(), "");
-  static_assert(m.size() == 4, "");
-  static_assert(a.size() == 4, "");
-  static_assert(m.rows() == 4 && m.cols() == 1, "");
-  return true;
-}
-
-static_assert(maps(), "");
-
-constexpr bool nc_maps()
-{
-  bool result = true;
-
-  double d[] = {1, 2, 3, 4};
-  Map<Vector4d> m(d);
-  result = result && (m.x() == 1 && m.y() == 2 && m.z() == 3 && m.w() == 4);
-
-  float array[3] = {};
-  auto v = Vector3f::Map(array);
-  v.fill(10);
-  result = result && (v.array() == 10).all();
-
-  return result;
-}
-
-constexpr bool blocks()
-{
-  constexpr Map<const Matrix2d> m(static_data);
-  constexpr auto block = m.block<2,1>(0, 1);
-
-  constexpr Map<const Vector2d> v(static_data + 2);
-  static_assert(block == v, "");
-
-  return true;
-}
-
-static_assert(blocks(), "");
-
-constexpr bool diagonal_row_columns()
-{
-  constexpr Map<const Matrix2d> m(static_data);
-  static_assert(m.block<2,1>(0, 1) == m.col(1), "");
-  static_assert(m.block<1,2>(1, 0) == m.row(1), "");
-  static_assert(m.diagonal()(0) == 1 && m.diagonal()(1) == 4, "");
-  return true;
-}
-
-static_assert(diagonal_row_columns(), "");
-
-static constexpr int static_data_antisym[] = {
-  0, 1, -1,
-  -1, 0, 1,
-  1, -1, 0 };
-
-constexpr bool transpose_unaryminus()
-{
-  constexpr Map<const Matrix<int, 3, 3>> m(static_data_antisym);
-
-  static_assert(m.transpose() == -m, "");
-  static_assert(-m.transpose() == m, "");
-  static_assert((-m).transpose() == m, "");
-
-  static_assert(m.transpose() != m, "");
-  static_assert(-m.transpose() != -m, "");
-  static_assert((-m).transpose() != -m, "");
-
-  return true;
-}
-
-static_assert(transpose_unaryminus(), "");
-
-constexpr bool reductions()
-{
-  constexpr Map<const Matrix<int, 3, 3>> m(static_data_antisym);
-  static_assert(m.size() == 9, "");
-
-  return true;
-}
-
-static_assert(reductions(), "");
-
-constexpr bool scalar_mult_div()
-{
-  constexpr Map<const Matrix2d> m(static_data);
-
-  static_assert((m * 2)(0,0) == 2, "");
-  static_assert((m / 2)(1,1) == 2*m(0,0), "");
-
-  constexpr double c = 8;
-  static_assert((m * c)(0,0) == 8, "");
-  static_assert((m.array() / c).matrix() == 1/c * m, "");
-  return true;
-}
-
-static_assert(scalar_mult_div(), "");
-
-constexpr bool constant_identity()
-{
-  static_assert(Matrix3f::Zero()(0,0) == 0, "");
-  static_assert(Matrix4d::Ones()(3,3) == 1, "");
-  static_assert(Matrix2i::Identity()(0,0) == 1 && Matrix2i::Identity()(1,0) == 0, "");
-  static_assert(Matrix<float, Dynamic, Dynamic>::Ones(2,3).size() == 6, "");
-  static_assert(Matrix<float, Dynamic, 1>::Zero(10).rows() == 10, "");
-
-  return true;
-}
-
-static_assert(constant_identity(), "");
-
-constexpr bool dynamic_basics()
-{
-  // This verifies that we only calculate the entry that we need.
-  static_assert(Matrix<double, Dynamic, Dynamic>::Identity(50000,50000).array()(25,25) == 1, "");
-
-  static_assert(Matrix4d::Identity().block(1,1,2,2)(0,1) == 0, "");
-  static_assert(MatrixXf::Identity(50,50).transpose() == MatrixXf::Identity(50, 50), "");
-
-  constexpr Map<const MatrixXi> dynMap(static_data_antisym, 3, 3);
-  constexpr Map<const Matrix3i> staticMap(static_data_antisym);
-  static_assert(dynMap == staticMap, "");
-  static_assert(dynMap.transpose() != staticMap, "");
-  // e.g. this hits an assertion at compile-time that would otherwise fail at runtime.
-  //static_assert(dynMap != staticMap.block(1,2,0,0));
-
-  return true;
-}
-
-static_assert(dynamic_basics(), "");
-
-constexpr bool sums()
-{
-  constexpr Map<const Matrix<double, 4, 4>> m(static_data);
-  constexpr auto b(m.block<2,2>(0,0)); // 1 2 5 6
-  constexpr Map<const Matrix2d> m2(static_data); // 1 2 3 4
-
-  static_assert((b + m2).col(0) == 2*Map<const Vector2d>(static_data), "");
-  static_assert(b + m2 == m2 + b, "");
-
-  static_assert((b - m2).col(0) == Vector2d::Zero(), "");
-  static_assert((b - m2).col(1) == 2*Vector2d::Ones(), "");
-
-  static_assert((2*b - m2).col(0) == b.col(0), "");
-  static_assert((b - 2*m2).col(0) == -b.col(0), "");
-
-  static_assert((b - m2 + b + m2 - 2*b) == Matrix2d::Zero(), "");
-
-  return true;
-}
-
-static_assert(sums(), "");
-
-constexpr bool unit_vectors()
-{
-  static_assert(Vector4d::UnitX()(0) == 1, "");
-  static_assert(Vector4d::UnitY()(1) == 1, "");
-  static_assert(Vector4d::UnitZ()(2) == 1, "");
-  static_assert(Vector4d::UnitW()(3) == 1, "");
-
-  static_assert(Vector4d::UnitX().dot(Vector4d::UnitX()) == 1, "");
-  static_assert(Vector4d::UnitX().dot(Vector4d::UnitY()) == 0, "");
-  static_assert((Vector4d::UnitX() + Vector4d::UnitZ()).dot(Vector4d::UnitY() + Vector4d::UnitW()) == 0, "");
-
-  return true;
-}
-
-static_assert(unit_vectors(), "");
-
-constexpr bool construct_from_other()
-{
-  return true;
-}
-
-static_assert(construct_from_other(), "");
-
-constexpr bool construct_from_values()
-{
-  return true;
-}
-
-static_assert(construct_from_values(), "");
-
-constexpr bool triangular()
-{
-  bool result = true;
-
-  return result;
-}
-
-static_assert(triangular(), "");
-
-constexpr bool nc_construct_from_values()
-{
-  bool result = true;
-
-  return result;
-}
-
-constexpr bool nc_crossproduct()
-{
-  bool result = true;
-  return result;
-}
-
-constexpr bool nc_cast()
-{
-  bool result = true;
-
-  return result;
-}
-
-constexpr bool nc_product()
-{
-  bool result = true;
-  return result;
-}
-
-static constexpr double static_data_quat[] = { 0, 1, 1, 0 };
-
-constexpr bool nc_quat_mult()
-{
-  bool result = static_data_quat[3] == 0; // Silence warning about unused with C++14
-
-  return result;
-}
-
-// Run tests that aren't explicitly constexpr.
-constexpr bool test_nc()
-{
-  assert_constexpr(nc_maps());
-  assert_constexpr(nc_construct_from_values());
-  assert_constexpr(nc_crossproduct());
-  assert_constexpr(nc_cast());
-  assert_constexpr(nc_product());
-  assert_constexpr(nc_quat_mult());
-  return true;
-}
-
-int main()
-{
-  return !test_nc();
-}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
index 3b09a0f..8ea1bf0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
@@ -130,7 +130,13 @@
  public:
   // Use the default stream on the current device
   GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
-    gpuGetDevice(&device_);
+    gpuError_t status = gpuGetDevice(&device_);
+    if (status != gpuSuccess) {
+      std::cerr << "Failed to get the GPU devices "
+                << gpuGetErrorString(status)
+                << std::endl;
+      gpu_assert(status == gpuSuccess);
+    }
   }
   // Use the default stream on the specified device
   GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {}
@@ -141,7 +147,13 @@
   GpuStreamDevice(const gpuStream_t* stream, int device = -1)
       : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
     if (device < 0) {
-      gpuGetDevice(&device_);
+      gpuError_t status = gpuGetDevice(&device_);
+      if (status != gpuSuccess) {
+        std::cerr << "Failed to get the GPU devices "
+                  << gpuGetErrorString(status)
+                  << std::endl;
+        gpu_assert(status == gpuSuccess);
+      }
     } else {
       int num_devices;
       gpuError_t err = gpuGetDeviceCount(&num_devices);
diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp
index 5512871..4a1f938 100644
--- a/unsupported/test/sparse_extra.cpp
+++ b/unsupported/test/sparse_extra.cpp
@@ -167,7 +167,7 @@
 template <typename Scalar>
 void check_sparse_inverse() {
   typedef SparseMatrix<Scalar> MatrixType;
-
+ 
   Matrix<Scalar, -1, -1> A;
   A.resize(1000, 1000);
   A.fill(0);