Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/5ad8b9bfe2bf75620bc89467c5cc051fc2a597df

PiperOrigin-RevId: 389724736
Change-Id: Id345049614ea97804c219ed9c676ab69e85bc3e8
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index f8c87d0..7d76f0c 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -785,6 +785,16 @@
   dense_assignment_loop<Kernel>::run(kernel);
 }
 
+// Specialization for filling the destination with a constant value.
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template<typename DstXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
+{
+  resize_if_allowed(dst, src, func);
+  std::fill_n(dst.data(), dst.size(), src.functor()());
+}
+#endif
+
 template<typename DstXprType, typename SrcXprType>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
 {
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 53800a0..cf677a1 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -129,6 +129,22 @@
 
 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
 
+template<typename T> struct unpacket_traits
+{
+  typedef T type;
+  typedef T half;
+  enum
+  {
+    size = 1,
+    alignment = 1,
+    vectorizable = false,
+    masked_load_available=false,
+    masked_store_available=false
+  };
+};
+
+template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
+
 template <typename Src, typename Tgt> struct type_casting_traits {
   enum {
     VectorizedCast = 0,
@@ -154,6 +170,18 @@
   T m_val;
 };
 
+
+/** \internal A convenience utility for determining if the type is a scalar.
+ * This is used to enable some generic packet implementations.
+ */
+template<typename Packet>
+struct is_scalar {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  enum {
+    value = internal::is_same<Packet, Scalar>::value
+  };
+};
+
 /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
 template <typename SrcPacket, typename TgtPacket>
 EIGEN_DEVICE_FUNC inline TgtPacket
@@ -215,13 +243,59 @@
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pdiv(const Packet& a, const Packet& b) { return a/b; }
 
-/** \internal \returns one bits */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
+// In the generic case, memset to all one bits.
+template<typename Packet, typename EnableIf = void>
+struct ptrue_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
+    Packet b;
+    memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
+    return b;
+  }
+};
 
-/** \internal \returns zero bits */
+// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
+// Although this is technically not a valid bitmask, the scalar path for pselect
+// uses a comparison to zero, so this should still work in most cases. We don't
+// have another option, since the scalar type requires initialization.
+template<typename T>
+struct ptrue_impl<T, 
+    typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
+  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
+    return T(1);
+  }
+};
+
+/** \internal \returns one bits. */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;}
+ptrue(const Packet& a) {
+  return ptrue_impl<Packet>::run(a);
+}
+
+// In the general case, memset to zero.
+template<typename Packet, typename EnableIf = void>
+struct pzero_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
+    Packet b;
+    memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
+    return b;
+  }
+};
+
+// For scalars, explicitly set to Scalar(0), since the underlying representation
+// for zero may not consist of all-zero bits.
+template<typename T>
+struct pzero_impl<T,
+    typename internal::enable_if<is_scalar<T>::value>::type> {
+  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
+    return T(0);
+  }
+};
+
+/** \internal \returns packet of zeros */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pzero(const Packet& a) {
+  return pzero_impl<Packet>::run(a);
+}
 
 /** \internal \returns a <= b as a bit mask */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -238,33 +312,6 @@
 /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
-template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
-  EIGEN_UNUSED_VARIABLE(a)
-  return 0.f;
-}
-
-template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
-  EIGEN_UNUSED_VARIABLE(a)
-  return 0.;
-}
-
-template <typename RealScalar>
-EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
-  RealScalar b = ptrue(RealScalar(0));
-  return std::complex<RealScalar>(b, b);
-}
-
-template <typename Packet, typename Op>
-EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) {
-  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
-  const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
-  Packet c;
-  unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
-  for (size_t i = 0; i < sizeof(Packet); ++i) {
-    *c_ptr++ = op(*a_ptr++, *b_ptr++);
-  }
-  return c;
-}
 
 template<typename T>
 struct bit_and {
@@ -287,42 +334,123 @@
   }
 };
 
+template<typename T>
+struct bit_not {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
+    return ~a;
+  }
+};
+
+// Use operators &, |, ^, ~.
+template<typename T>
+struct operator_bitwise_helper {
+  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
+};
+
+// Apply binary operations byte-by-byte
+template<typename T>
+struct bytewise_bitwise_helper {
+  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
+    return binary(a, b, bit_and<unsigned char>());
+  }
+  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { 
+    return binary(a, b, bit_or<unsigned char>());
+   }
+  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
+    return binary(a, b, bit_xor<unsigned char>());
+  }
+  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { 
+    return unary(a,bit_not<unsigned char>());
+   }
+  
+ private:
+  template<typename Op>
+  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
+    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+    T c;
+    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+    for (size_t i = 0; i < sizeof(T); ++i) {
+      *c_ptr++ = op(*a_ptr++);
+    }
+    return c;
+  }
+
+  template<typename Op>
+  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
+    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+    const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
+    T c;
+    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+    for (size_t i = 0; i < sizeof(T); ++i) {
+      *c_ptr++ = op(*a_ptr++, *b_ptr++);
+    }
+    return c;
+  }
+};
+
+// In the general case, use byte-by-byte manipulation.
+template<typename T, typename EnableIf = void>
+struct bitwise_helper : public bytewise_bitwise_helper<T> {};
+
+// For integers or non-trivial scalars, use binary operators.
+template<typename T>
+struct bitwise_helper<T,
+  typename internal::enable_if<
+    is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
+  > : public operator_bitwise_helper<T> {};
+
 /** \internal \returns the bitwise and of \a a and \a b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pand(const Packet& a, const Packet& b) {
-  return bitwise_helper(a, b, bit_and<unsigned char>());
+  return bitwise_helper<Packet>::bitwise_and(a, b);
 }
 
 /** \internal \returns the bitwise or of \a a and \a b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 por(const Packet& a, const Packet& b) {
-  return bitwise_helper(a ,b, bit_or<unsigned char>());
+  return bitwise_helper<Packet>::bitwise_or(a, b);
 }
 
 /** \internal \returns the bitwise xor of \a a and \a b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pxor(const Packet& a, const Packet& b) {
-  return bitwise_helper(a ,b, bit_xor<unsigned char>());
+  return bitwise_helper<Packet>::bitwise_xor(a, b);
+}
+
+/** \internal \returns the bitwise not of \a a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pnot(const Packet& a) {
+  return bitwise_helper<Packet>::bitwise_not(a);
 }
 
 /** \internal \returns the bitwise and of \a a and not \a b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); }
+pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
+
+// In the general case, use bitwise select.
+template<typename Packet, typename EnableIf = void>
+struct pselect_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
+    return por(pand(a,mask),pandnot(b,mask));
+  }
+};
+
+// For scalars, use ternary select.
+template<typename Packet>
+struct pselect_impl<Packet, 
+    typename internal::enable_if<is_scalar<Packet>::value>::type > {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
+    return numext::equal_strict(mask, Packet(0)) ? b : a;
+  }
+};
 
 /** \internal \returns \a or \b for each field in packet according to \mask */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pselect(const Packet& mask, const Packet& a, const Packet& b) {
-  return por(pand(a,mask),pandnot(b,mask));
-}
-
-template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
-    const float& cond, const float& a, const float&b) {
-  return numext::equal_strict(cond,0.f) ? b : a;
-}
-
-template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
-    const double& cond, const double& a, const double& b) {
-  return numext::equal_strict(cond,0.) ? b : a;
+  return pselect_impl<Packet>::run(mask, a, b);
 }
 
 template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index d2aeef4..8cac1be 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -219,6 +219,7 @@
     size = 16,
     HasHalfPacket = 1,
 
+    HasCmp       = 1,
     HasAdd       = 1,
     HasSub       = 1,
     HasShift     = 1,
@@ -248,6 +249,7 @@
     size = 16,
     HasHalfPacket = 1,
 
+    HasCmp       = 1,
     HasAdd       = 1,
     HasSub       = 1,
     HasShift     = 1,
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 63f09ab..fc87815 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -110,13 +110,13 @@
   enum {
     Conj = NumTraits<LhsScalar>::IsComplex
   };
-  
+
   typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
-  
+
   EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const
   { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
-  
+
   template<typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
   { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
@@ -205,7 +205,11 @@
 struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
   enum {
     Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
-    PacketAccess = false
+    PacketAccess = is_same<LhsScalar, RhsScalar>::value &&
+        packet_traits<LhsScalar>::HasCmp &&
+        // Since return type is bool, we currently require the inputs
+        // to be bool to enable packet access.
+        is_same<LhsScalar, bool>::value
   };
 };
 
@@ -221,6 +225,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
@@ -228,6 +235,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_lt(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
@@ -235,6 +245,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_le(a,b); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
@@ -242,6 +255,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_lt(b,a); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
@@ -249,6 +265,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_le(b,a); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
@@ -256,6 +275,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(internal::por(internal::pcmp_le(a, b), internal::pcmp_le(b, a)), internal::pzero(a)); }
 };
 template<typename LhsScalar, typename RhsScalar>
 struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
@@ -263,6 +285,9 @@
   typedef bool result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pcmp_eq(internal::pcmp_eq(a, b), internal::pzero(a)); }
 };
 
 /** \internal
diff --git a/Eigen/src/Core/functors/StlFunctors.h b/Eigen/src/Core/functors/StlFunctors.h
index d2e7b5b..4570c9b 100644
--- a/Eigen/src/Core/functors/StlFunctors.h
+++ b/Eigen/src/Core/functors/StlFunctors.h
@@ -12,6 +12,28 @@
 
 namespace Eigen {
 
+// Portable replacements for certain functors.
+namespace numext {
+
+template<typename T = void>
+struct equal_to {
+  typedef bool result_type;
+  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {
+    return lhs == rhs;
+  }
+};
+
+template<typename T = void>
+struct not_equal_to {
+  typedef bool result_type;
+  EIGEN_DEVICE_FUNC bool operator()(const T& lhs, const T& rhs) const {
+    return lhs != rhs;
+  }
+};
+
+}
+
+
 namespace internal {
 
 // default functor traits for STL functors:
@@ -69,9 +91,17 @@
 { enum { Cost = 1, PacketAccess = false }; };
 
 template<typename T>
+struct functor_traits<numext::equal_to<T> >
+  : functor_traits<std::equal_to<T> > {};
+
+template<typename T>
 struct functor_traits<std::not_equal_to<T> >
 { enum { Cost = 1, PacketAccess = false }; };
 
+template<typename T>
+struct functor_traits<numext::not_equal_to<T> >
+  : functor_traits<std::not_equal_to<T> > {};
+
 #if (EIGEN_COMP_CXXVER < 11)
 // std::binder* are deprecated since c++11 and will be removed in c++17
 template<typename T>
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 1116321..f35b760 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -44,7 +44,7 @@
 #endif // defined(EIGEN_DEFAULT_L2_CACHE_SIZE)
 
 #if defined(EIGEN_DEFAULT_L3_CACHE_SIZE)
-#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) EIGEN_SET_DEFAULT_L3_CACHE_SIZE
+#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) EIGEN_DEFAULT_L3_CACHE_SIZE
 #else
 #define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) val
 #endif // defined(EIGEN_DEFAULT_L3_CACHE_SIZE)
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 4420632..e16a564 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -518,7 +518,7 @@
 
 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
 struct extract_data_selector {
-  static const typename T::Scalar* run(const T& m)
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
   {
     return blas_traits<T>::extract(m).data();
   }
@@ -529,7 +529,8 @@
   static typename T::Scalar* run(const T&) { return 0; }
 };
 
-template<typename T> const typename T::Scalar* extract_data(const T& m)
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
 {
   return extract_data_selector<T>::run(m);
 }
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index f7f907a..35dcaa7 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -157,7 +157,7 @@
 /** \deprecated \ingroup flags
   *
   * means the first coefficient packet is guaranteed to be aligned.
-  * An expression cannot has the AlignedBit without the PacketAccessBit flag.
+  * An expression cannot have the AlignedBit without the PacketAccessBit flag.
   * In other words, this means we are allow to perform an aligned packet access to the first element regardless
   * of the expression kind:
   * \code
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index fda253d..fd79c14 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -1189,8 +1189,12 @@
   #define EIGEN_USING_STD(FUNC) using std::FUNC;
 #endif
 
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_COMP_NVCC)
-  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC))
+  // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary,
+  //   otherwise we get duplicate definition errors
+  // For later MSVC versions, we require explicit operator= definition, otherwise we get
+  //   use of implicitly deleted operator errors.
+  // (cf Bugs 920, 1000, 1324, 2291)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index f232317..71c32b8 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -184,19 +184,7 @@
 
 template<typename T> struct packet_traits;
 
-template<typename T> struct unpacket_traits
-{
-  typedef T type;
-  typedef T half;
-  enum
-  {
-    size = 1,
-    alignment = 1,
-    vectorizable = false,
-    masked_load_available=false,
-    masked_store_available=false
-  };
-};
+template<typename T> struct unpacket_traits;
 
 template<int Size, typename PacketType,
          bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h
index 1bab00c..a40cefa 100644
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -77,10 +77,11 @@
     const MatrixType& matrix, const typename ResultType::Scalar& invdet,
     ResultType& result)
 {
+  typename ResultType::Scalar temp = matrix.coeff(0,0);
   result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;
   result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
   result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
-  result.coeffRef(1,1) =  matrix.coeff(0,0) * invdet;
+  result.coeffRef(1,1) =  temp * invdet;
 }
 
 template<typename MatrixType, typename ResultType>
@@ -143,13 +144,18 @@
     const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
     ResultType& result)
 {
-  result.row(0) = cofactors_col0 * invdet;
-  result.coeffRef(1,0) =  cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
-  result.coeffRef(1,1) =  cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  // Compute cofactors in a way that avoids aliasing issues.
+  typedef typename ResultType::Scalar Scalar;
+  const Scalar c01 = cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
+  const Scalar c11 = cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  const Scalar c02 = cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
   result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
-  result.coeffRef(2,0) =  cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
   result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
   result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
+  result.coeffRef(1,0) =  c01;
+  result.coeffRef(1,1) =  c11;
+  result.coeffRef(2,0) =  c02;  
+  result.row(0) = cofactors_col0 * invdet;
 }
 
 template<typename MatrixType, typename ResultType>
@@ -181,14 +187,13 @@
     bool& invertible
   )
   {
-    using std::abs;
     typedef typename ResultType::Scalar Scalar;
     Matrix<Scalar,3,1> cofactors_col0;
     cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
     cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
     cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
     determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
-    invertible = abs(determinant) > absDeterminantThreshold;
+    invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold;
     if(!invertible) return;
     const Scalar invdet = Scalar(1) / determinant;
     compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
@@ -273,7 +278,13 @@
     using std::abs;
     determinant = matrix.determinant();
     invertible = abs(determinant) > absDeterminantThreshold;
-    if(invertible) compute_inverse<MatrixType, ResultType>::run(matrix, inverse);
+    if(invertible && extract_data(matrix) != extract_data(inverse)) {
+      compute_inverse<MatrixType, ResultType>::run(matrix, inverse);
+    }
+    else if(invertible) {
+      MatrixType matrix_t = matrix;
+      compute_inverse<MatrixType, ResultType>::run(matrix_t, inverse);
+    }
   }
 };
 
@@ -347,6 +358,8 @@
   *
   * This is only for fixed-size square matrices of size up to 4x4.
   *
+  * Notice that it will trigger a copy of input matrix when trying to do the inverse in place.
+  *
   * \param inverse Reference to the matrix in which to store the inverse.
   * \param determinant Reference to the variable in which to store the determinant.
   * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
@@ -387,6 +400,8 @@
   *
   * This is only for fixed-size square matrices of size up to 4x4.
   *
+  * Notice that it will trigger a copy of input matrix when trying to do the inverse in place.
+  *
   * \param inverse Reference to the matrix in which to store the inverse.
   * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
   * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 8551a06..9d95acd 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -112,12 +112,12 @@
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    TrOptions = RowsAtCompileTime==1 ? (int(MatrixType::Options) & ~(int(RowMajor)))
-              : ColsAtCompileTime==1 ? (int(MatrixType::Options) | int(RowMajor))
-              : MatrixType::Options
+    Options = MatrixType::Options
   };
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, TrOptions, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
+
+  typedef typename internal::make_proper_matrix_type<
+    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
+  >::type TransposeTypeWithSameStorageOrder;
 
   void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
   {
@@ -202,13 +202,12 @@
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    TrOptions = RowsAtCompileTime==1 ? (int(MatrixType::Options) & ~(int(RowMajor)))
-              : ColsAtCompileTime==1 ? (int(MatrixType::Options) | int(RowMajor))
-              : MatrixType::Options
+    Options = MatrixType::Options
   };
 
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, TrOptions, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
+  typedef typename internal::make_proper_matrix_type<
+    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
+  >::type TransposeTypeWithSameStorageOrder;
 
   void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
   {
@@ -303,8 +302,9 @@
     Options = MatrixType::Options
   };
 
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
+  typedef typename internal::make_proper_matrix_type<
+    Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime
+  >::type TransposeTypeWithSameStorageOrder;
 
   void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
   {
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 616b4a0..1db906d 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -253,9 +253,10 @@
     inline void setZero()
     {
       m_data.clear();
-      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));
-      if(m_innerNonZeros)
-        memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
+      std::fill_n(m_outerIndex, m_outerSize + 1, StorageIndex(0));
+      if(m_innerNonZeros) {
+        std::fill_n(m_innerNonZeros, m_outerSize, StorageIndex(0));
+      }
     }
 
     /** Preallocates \a reserveSize non zeros.
@@ -641,7 +642,7 @@
         std::free(m_innerNonZeros);
         m_innerNonZeros = 0;
       }
-      memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(StorageIndex));
+      std::fill_n(m_outerIndex, m_outerSize + 1, StorageIndex(0));
     }
 
     /** \internal
@@ -1260,7 +1261,7 @@
       m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
       if(!m_innerNonZeros) internal::throw_std_bad_alloc();
       
-      memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
+      std::fill(m_innerNonZeros, m_innerNonZeros + m_outerSize, StorageIndex(0));
       
       // pack all inner-vectors to the end of the pre-allocated space
       // and allocate the entire free-space to the first inner-vector
diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
index f1084ab..a0feef8 100644
--- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -39,10 +39,10 @@
   */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+inline const CwiseBinaryOp<numext::equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<numext::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }
 
 /** \returns an expression of the coefficient-wise != operator of *this and \a other
@@ -59,10 +59,10 @@
   */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+inline const CwiseBinaryOp<numext::not_equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<numext::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }
 
 /** \returns an expression of the coefficient-wise min of *this and \a other
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index 0825e15..0e8339e 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -564,9 +564,9 @@
 
     // Initialize the content of the memory pools to prevent asan from
     // complaining.
-    device_.memset(a_, 12, m_ * k_ * sizeof(T));
-    device_.memset(b_, 23, k_ * n_ * sizeof(T));
-    device_.memset(c_, 31, m_ * n_ * sizeof(T));
+    device_.fill(a_, a_ + m_ * k_, T(12));
+    device_.fill(b_, b_ + k_ * n_, T(23));
+    device_.fill(c_, c_ + m_ * n_, T(31));
 
   }
 
diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake
index 7d1f81b..1bb8f19 100644
--- a/cmake/FindBLAS.cmake
+++ b/cmake/FindBLAS.cmake
@@ -147,6 +147,7 @@
 
 include(CheckFunctionExists)
 include(CheckFortranFunctionExists)
+include(CMakeFindDependencyMacro)
 
 set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
 
@@ -509,9 +510,9 @@
 
     if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX)
       if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED)
-	find_package(Threads)
+	find_dependency(Threads)
       else()
-	find_package(Threads REQUIRED)
+	find_dependency(Threads REQUIRED)
       endif()
 
       set(BLAS_SEARCH_LIBS "")
diff --git a/cmake/FindBLASEXT.cmake b/cmake/FindBLASEXT.cmake
index 0fe7fb8..69a9418 100644
--- a/cmake/FindBLASEXT.cmake
+++ b/cmake/FindBLASEXT.cmake
@@ -41,18 +41,19 @@
 #  License text for the above reference.)
 
 # macro to factorize this call
+include(CMakeFindDependencyMacro)
 macro(find_package_blas)
   if(BLASEXT_FIND_REQUIRED)
     if(BLASEXT_FIND_QUIETLY)
-      find_package(BLAS REQUIRED QUIET)
+      find_dependency(BLAS REQUIRED QUIET)
     else()
-      find_package(BLAS REQUIRED)
+      find_dependency(BLAS REQUIRED)
     endif()
   else()
     if(BLASEXT_FIND_QUIETLY)
-      find_package(BLAS QUIET)
+      find_dependency(BLAS QUIET)
     else()
-      find_package(BLAS)
+      find_dependency(BLAS)
     endif()
   endif()
 endmacro()
@@ -316,7 +317,7 @@
 	"\n   (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)")
       message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES")
     endif()
-    find_package_handle_standard_args(BLAS DEFAULT_MSG
+    find_package_handle_standard_args(BLASEXT DEFAULT_MSG
       BLAS_SEQ_LIBRARIES
       BLAS_LIBRARY_DIRS
       BLAS_INCLUDE_DIRS)
@@ -324,14 +325,14 @@
       if(NOT BLASEXT_FIND_QUIETLY)
 	message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES")
       endif()
-      find_package_handle_standard_args(BLAS DEFAULT_MSG
+      find_package_handle_standard_args(BLASEXT DEFAULT_MSG
 	BLAS_PAR_LIBRARIES)
     endif()
   else()
     if(NOT BLASEXT_FIND_QUIETLY)
       message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES")
     endif()
-    find_package_handle_standard_args(BLAS DEFAULT_MSG
+    find_package_handle_standard_args(BLASEXT DEFAULT_MSG
       BLAS_SEQ_LIBRARIES
       BLAS_LIBRARY_DIRS
       BLAS_INCLUDE_DIRS)
@@ -343,14 +344,14 @@
       "\n   (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)")
     message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES")
   endif()
-  find_package_handle_standard_args(BLAS DEFAULT_MSG
+  find_package_handle_standard_args(BLASEXT DEFAULT_MSG
     BLAS_SEQ_LIBRARIES
     BLAS_LIBRARY_DIRS)
   if(BLAS_PAR_LIBRARIES)
     if(NOT BLASEXT_FIND_QUIETLY)
       message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES")
     endif()
-    find_package_handle_standard_args(BLAS DEFAULT_MSG
+    find_package_handle_standard_args(BLASEXT DEFAULT_MSG
       BLAS_PAR_LIBRARIES)
   endif()
 elseif(BLA_VENDOR MATCHES "IBMESSL*")
@@ -360,21 +361,24 @@
       "\n   (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)")
     message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES")
   endif()
-  find_package_handle_standard_args(BLAS DEFAULT_MSG
+  find_package_handle_standard_args(BLASEXT DEFAULT_MSG
     BLAS_SEQ_LIBRARIES
     BLAS_LIBRARY_DIRS)
   if(BLAS_PAR_LIBRARIES)
     if(NOT BLASEXT_FIND_QUIETLY)
       message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES")
     endif()
-    find_package_handle_standard_args(BLAS DEFAULT_MSG
+    find_package_handle_standard_args(BLASEXT DEFAULT_MSG
       BLAS_PAR_LIBRARIES)
   endif()
 else()
   if(NOT BLASEXT_FIND_QUIETLY)
     message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES")
   endif()
-  find_package_handle_standard_args(BLAS DEFAULT_MSG
+  find_package_handle_standard_args(BLASEXT DEFAULT_MSG
     BLAS_SEQ_LIBRARIES
     BLAS_LIBRARY_DIRS)
 endif()
+
+# Callers expect BLAS_FOUND to be set as well.
+set(BLAS_FOUND BLASEXT_FOUND)
diff --git a/cmake/FindComputeCpp.cmake b/cmake/FindComputeCpp.cmake
index 3cca515..1c271f0 100644
--- a/cmake/FindComputeCpp.cmake
+++ b/cmake/FindComputeCpp.cmake
@@ -41,7 +41,8 @@
   "Bitcode type to use as SYCL target in compute++")
 mark_as_advanced(COMPUTECPP_BITCODE)
 
-find_package(OpenCL REQUIRED)
+include(CMakeFindDependencyMacro)
+find_dependency(OpenCL REQUIRED)
 
 # Find ComputeCpp package
 
diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake
index fad476d..ed55c5f 100644
--- a/cmake/FindFFTW.cmake
+++ b/cmake/FindFFTW.cmake
@@ -22,7 +22,8 @@
 endif()
 
 # Check if we can use PkgConfig
-find_package(PkgConfig)
+include(CMakeFindDependencyMacro)
+find_dependency(PkgConfig)
 
 #Determine from PKG
 if( PKG_CONFIG_FOUND AND NOT FFTW_ROOT )
diff --git a/cmake/FindHWLOC.cmake b/cmake/FindHWLOC.cmake
index 4832915..522f521 100644
--- a/cmake/FindHWLOC.cmake
+++ b/cmake/FindHWLOC.cmake
@@ -65,8 +65,9 @@
 
 # Optionally use pkg-config to detect include/library dirs (if pkg-config is available)
 # -------------------------------------------------------------------------------------
-include(FindPkgConfig)
-find_package(PkgConfig QUIET)
+include(CMakeFindDependencyMacro)
+# include(FindPkgConfig)
+find_dependency(PkgConfig QUIET)
 if( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER )
 
   pkg_search_module(HWLOC hwloc)
diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake
index 284a452..3fd7388 100644
--- a/cmake/FindLAPACK.cmake
+++ b/cmake/FindLAPACK.cmake
@@ -26,6 +26,7 @@
 
 
 include(CheckFunctionExists)
+include(CMakeFindDependencyMacro)
 
 # This macro checks for the existence of the combination of fortran libraries
 # given by _list.  If the combination is found, this macro checks (using the
@@ -88,7 +89,7 @@
     set(${LIBRARIES}    ${_libraries_found})
     # Some C++ linkers require the f2c library to link with Fortran libraries.
     # I do not know which ones, thus I just add the f2c library if it is available.
-    find_package( F2C QUIET )
+    find_dependency( F2C QUIET )
     if ( F2C_FOUND )
       set(${DEFINITIONS}  ${${DEFINITIONS}} ${F2C_DEFINITIONS})
       set(${LIBRARIES}    ${${LIBRARIES}} ${F2C_LIBRARIES})
@@ -135,9 +136,9 @@
 
 # LAPACK requires BLAS
 if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
-  find_package(BLAS)
+  find_dependency(BLAS)
 else()
-  find_package(BLAS REQUIRED)
+  find_dependency(BLAS REQUIRED)
 endif()
 
 if (NOT BLAS_FOUND)
diff --git a/cmake/FindMPREAL.cmake b/cmake/FindMPREAL.cmake
new file mode 100644
index 0000000..947a1ce
--- /dev/null
+++ b/cmake/FindMPREAL.cmake
@@ -0,0 +1,103 @@
+# Try to find the MPFR C++ (MPREAL) library
+# See http://www.holoborodko.com/pavel/mpreal/
+#
+# This module supports requiring a minimum version, e.g. you can do
+#   find_package(MPREAL 1.8.6)
+# to require version 1.8.6 or newer of MPREAL C++.
+#
+# Once done this will define
+#
+#  MPREAL_FOUND - system has MPREAL lib with correct version
+#  MPREAL_INCLUDES - MPREAL required include directories
+#  MPREAL_LIBRARIES - MPREAL required libraries
+#  MPREAL_VERSION - MPREAL version
+
+# Copyright (c) 2020 The Eigen Authors.
+# Redistribution and use is allowed according to the terms of the BSD license.
+
+include(CMakeFindDependencyMacro)
+find_dependency(MPFR)
+find_dependency(GMP)
+
+# Set MPREAL_INCLUDES
+find_path(MPREAL_INCLUDES
+  NAMES
+  mpreal.h
+  PATHS
+  $ENV{GMPDIR}
+  ${INCLUDE_INSTALL_DIR}
+)
+
+# Set MPREAL_FIND_VERSION to 1.0.0 if no minimum version is specified
+
+if(NOT MPREAL_FIND_VERSION)
+  if(NOT MPREAL_FIND_VERSION_MAJOR)
+    set(MPREAL_FIND_VERSION_MAJOR 1)
+  endif()
+  if(NOT MPREAL_FIND_VERSION_MINOR)
+    set(MPREAL_FIND_VERSION_MINOR 0)
+  endif()
+  if(NOT MPREAL_FIND_VERSION_PATCH)
+    set(MPREAL_FIND_VERSION_PATCH 0)
+  endif()
+
+  set(MPREAL_FIND_VERSION "${MPREAL_FIND_VERSION_MAJOR}.${MPREAL_FIND_VERSION_MINOR}.${MPREAL_FIND_VERSION_PATCH}")
+endif()
+
+# Check bugs
+# - https://github.com/advanpix/mpreal/issues/7
+# - https://github.com/advanpix/mpreal/issues/9
+set(MPREAL_TEST_PROGRAM "
+#include <mpreal.h>
+#include <algorithm>
+int main(int argc, char** argv) {
+  const mpfr::mpreal one  =    1.0;
+  const mpfr::mpreal zero =    0.0;
+  using namespace std;
+  const mpfr::mpreal smaller = min(one, zero);
+  return 0;
+}")
+
+if(MPREAL_INCLUDES)
+
+  # Set MPREAL_VERSION
+  
+  file(READ "${MPREAL_INCLUDES}/mpreal.h" _mpreal_version_header)
+  
+  string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MAJOR[ \t]+([0-9]+)" _mpreal_major_version_match "${_mpreal_version_header}")
+  set(MPREAL_MAJOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+MPREAL_VERSION_MINOR[ \t]+([0-9]+)" _mpreal_minor_version_match "${_mpreal_version_header}")
+  set(MPREAL_MINOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+MPREAL_VERSION_PATCHLEVEL[ \t]+([0-9]+)" _mpreal_patchlevel_version_match "${_mpreal_version_header}")
+  set(MPREAL_PATCHLEVEL_VERSION "${CMAKE_MATCH_1}")
+  
+  set(MPREAL_VERSION ${MPREAL_MAJOR_VERSION}.${MPREAL_MINOR_VERSION}.${MPREAL_PATCHLEVEL_VERSION})
+  
+  # Check whether found version exceeds minimum version
+  
+  if(${MPREAL_VERSION} VERSION_LESS ${MPREAL_FIND_VERSION})
+    set(MPREAL_VERSION_OK FALSE)
+    message(STATUS "MPREAL version ${MPREAL_VERSION} found in ${MPREAL_INCLUDES}, "
+                   "but at least version ${MPREAL_FIND_VERSION} is required")
+  else()
+    set(MPREAL_VERSION_OK TRUE)
+    
+    list(APPEND MPREAL_INCLUDES "${MPFR_INCLUDES}" "${GMP_INCLUDES}")
+    list(REMOVE_DUPLICATES MPREAL_INCLUDES)
+    
+    list(APPEND MPREAL_LIBRARIES "${MPFR_LIBRARIES}" "${GMP_LIBRARIES}")
+    list(REMOVE_DUPLICATES MPREAL_LIBRARIES)
+    
+    # Make sure it compiles with the current compiler.
+    unset(MPREAL_WORKS CACHE)
+    include(CheckCXXSourceCompiles)
+    set(CMAKE_REQUIRED_INCLUDES "${MPREAL_INCLUDES}")
+    set(CMAKE_REQUIRED_LIBRARIES "${MPREAL_LIBRARIES}")
+    check_cxx_source_compiles("${MPREAL_TEST_PROGRAM}" MPREAL_WORKS)
+  endif()
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(MPREAL DEFAULT_MSG
+                                  MPREAL_INCLUDES MPREAL_VERSION_OK MPREAL_WORKS)
+mark_as_advanced(MPREAL_INCLUDES)
diff --git a/cmake/FindPastix.cmake b/cmake/FindPASTIX.cmake
similarity index 96%
rename from cmake/FindPastix.cmake
rename to cmake/FindPASTIX.cmake
index 3b47d5c..db1427b 100644
--- a/cmake/FindPastix.cmake
+++ b/cmake/FindPASTIX.cmake
@@ -118,7 +118,7 @@
     if (${component} STREQUAL "SCOTCH")
       set(PASTIX_LOOK_FOR_SCOTCH ON)
     endif()
-    if (${component} STREQUAL "SCOTCH")
+    if (${component} STREQUAL "PTSCOTCH")
       set(PASTIX_LOOK_FOR_PTSCOTCH ON)
     endif()
     if (${component} STREQUAL "METIS")
@@ -133,14 +133,14 @@
 
 # Required dependencies
 # ---------------------
-
+include(CMakeFindDependencyMacro)
 if (NOT PASTIX_FIND_QUIETLY)
   message(STATUS "Looking for PASTIX - Try to detect pthread")
 endif()
 if (PASTIX_FIND_REQUIRED)
-  find_package(Threads REQUIRED QUIET)
+  find_dependency(Threads REQUIRED QUIET)
 else()
-  find_package(Threads QUIET)
+  find_dependency(Threads QUIET)
 endif()
 set(PASTIX_EXTRA_LIBRARIES "")
 if( THREADS_FOUND )
@@ -198,9 +198,9 @@
   message(STATUS "Looking for PASTIX - Try to detect HWLOC")
 endif()
 if (PASTIX_FIND_REQUIRED)
-  find_package(HWLOC REQUIRED QUIET)
+  find_dependency(HWLOC REQUIRED QUIET)
 else()
-  find_package(HWLOC QUIET)
+  find_dependency(HWLOC QUIET)
 endif()
 
 # PASTIX depends on BLAS
@@ -209,9 +209,9 @@
   message(STATUS "Looking for PASTIX - Try to detect BLAS")
 endif()
 if (PASTIX_FIND_REQUIRED)
-  find_package(BLASEXT REQUIRED QUIET)
+  find_dependency(BLASEXT REQUIRED QUIET)
 else()
-  find_package(BLASEXT QUIET)
+  find_dependency(BLASEXT QUIET)
 endif()
 
 # Optional dependencies
@@ -230,9 +230,9 @@
     set(MPI_C_COMPILER mpicc)
   endif()
   if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_MPI)
-    find_package(MPI REQUIRED QUIET)
+    find_dependency(MPI REQUIRED QUIET)
   else()
-    find_package(MPI QUIET)
+    find_dependency(MPI QUIET)
   endif()
   if (MPI_FOUND)
     mark_as_advanced(MPI_LIBRARY)
@@ -272,10 +272,10 @@
   endif()
   # set the list of optional dependencies we may discover
   if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_STARPU)
-    find_package(STARPU ${PASTIX_STARPU_VERSION} REQUIRED
+    find_dependency(STARPU ${PASTIX_STARPU_VERSION} REQUIRED
       COMPONENTS ${STARPU_COMPONENT_LIST})
   else()
-    find_package(STARPU ${PASTIX_STARPU_VERSION}
+    find_dependency(STARPU ${PASTIX_STARPU_VERSION}
       COMPONENTS ${STARPU_COMPONENT_LIST})
   endif()
 
@@ -288,9 +288,9 @@
     message(STATUS "Looking for PASTIX - Try to detect SCOTCH")
   endif()
   if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_SCOTCH)
-    find_package(SCOTCH REQUIRED QUIET)
+    find_dependency(SCOTCH REQUIRED QUIET)
   else()
-    find_package(SCOTCH QUIET)
+    find_dependency(SCOTCH QUIET)
   endif()
 endif()
 
@@ -301,9 +301,9 @@
     message(STATUS "Looking for PASTIX - Try to detect PTSCOTCH")
   endif()
   if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_PTSCOTCH)
-    find_package(PTSCOTCH REQUIRED QUIET)
+    find_dependency(PTSCOTCH REQUIRED QUIET)
   else()
-    find_package(PTSCOTCH QUIET)
+    find_dependency(PTSCOTCH QUIET)
   endif()
 endif()
 
@@ -314,9 +314,9 @@
     message(STATUS "Looking for PASTIX - Try to detect METIS")
   endif()
   if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_METIS)
-    find_package(METIS REQUIRED QUIET)
+    find_dependency(METIS REQUIRED QUIET)
   else()
-    find_package(METIS QUIET)
+    find_dependency(METIS QUIET)
   endif()
 endif()
 
diff --git a/cmake/FindPTSCOTCH.cmake b/cmake/FindPTSCOTCH.cmake
index 51eecf1..6ccc743 100644
--- a/cmake/FindPTSCOTCH.cmake
+++ b/cmake/FindPTSCOTCH.cmake
@@ -79,20 +79,21 @@
 endif()
 
 # PTSCOTCH depends on Threads, try to find it
+include(CMakeFindDependencyMacro)
 if (NOT THREADS_FOUND)
   if (PTSCOTCH_FIND_REQUIRED)
-    find_package(Threads REQUIRED)
+    find_dependency(Threads REQUIRED)
   else()
-    find_package(Threads)
+    find_dependency(Threads)
   endif()
 endif()
 
 # PTSCOTCH depends on MPI, try to find it
 if (NOT MPI_FOUND)
   if (PTSCOTCH_FIND_REQUIRED)
-    find_package(MPI REQUIRED)
+    find_dependency(MPI REQUIRED)
   else()
-    find_package(MPI)
+    find_dependency(MPI)
   endif()
 endif()
 
@@ -148,18 +149,18 @@
     foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find})
       set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND")
       find_path(PTSCOTCH_${ptscotch_hdr}_DIRS
-	NAMES ${ptscotch_hdr}
-	HINTS ${PTSCOTCH_DIR}
-	PATH_SUFFIXES "include" "include/scotch")
+        NAMES ${ptscotch_hdr}
+        HINTS ${PTSCOTCH_DIR}
+        PATH_SUFFIXES "include" "include/scotch")
       mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS)
     endforeach()
   else()
     foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find})
       set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND")
       find_path(PTSCOTCH_${ptscotch_hdr}_DIRS
-	NAMES ${ptscotch_hdr}
-	HINTS ${_inc_env}
-	PATH_SUFFIXES "scotch")
+        NAMES ${ptscotch_hdr}
+        HINTS ${_inc_env}
+        PATH_SUFFIXES "scotch")
       mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS)
     endforeach()
   endif()
@@ -171,7 +172,6 @@
   if (PTSCOTCH_${ptscotch_hdr}_DIRS)
     list(APPEND PTSCOTCH_INCLUDE_DIRS "${PTSCOTCH_${ptscotch_hdr}_DIRS}")
   else ()
-    set(PTSCOTCH_INCLUDE_DIRS "PTSCOTCH_INCLUDE_DIRS-NOTFOUND")
     if (NOT PTSCOTCH_FIND_QUIETLY)
       message(STATUS "Looking for ptscotch -- ${ptscotch_hdr} not found")
     endif()
@@ -229,16 +229,16 @@
     foreach(ptscotch_lib ${PTSCOTCH_libs_to_find})
       set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND")
       find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY
-	NAMES ${ptscotch_lib}
-	HINTS ${PTSCOTCH_DIR}
-	PATH_SUFFIXES lib lib32 lib64)
+        NAMES ${ptscotch_lib}
+        HINTS ${PTSCOTCH_DIR}
+        PATH_SUFFIXES lib lib32 lib64)
     endforeach()
   else()
     foreach(ptscotch_lib ${PTSCOTCH_libs_to_find})
       set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND")
       find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY
-	NAMES ${ptscotch_lib}
-	HINTS ${_lib_env})
+        NAMES ${ptscotch_lib}
+        HINTS ${_lib_env})
     endforeach()
   endif()
 endif()
@@ -255,7 +255,6 @@
     list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}")
     list(APPEND PTSCOTCH_LIBRARY_DIRS "${${ptscotch_lib}_lib_path}")
   else ()
-    list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}")
     if (NOT PTSCOTCH_FIND_QUIETLY)
       message(STATUS "Looking for ptscotch -- lib ${ptscotch_lib} not found")
     endif()
diff --git a/cmake/FindScotch.cmake b/cmake/FindSCOTCH.cmake
similarity index 98%
rename from cmake/FindScotch.cmake
rename to cmake/FindSCOTCH.cmake
index af00eb0..11b971a 100644
--- a/cmake/FindScotch.cmake
+++ b/cmake/FindSCOTCH.cmake
@@ -71,11 +71,12 @@
 endif()
 
 # SCOTCH may depend on Threads, try to find it
+include(CMakeFindDependencyMacro)
 if (NOT THREADS_FOUND)
   if (SCOTCH_FIND_REQUIRED)
-    find_package(Threads REQUIRED)
+    find_dependency(Threads REQUIRED)
   else()
-    find_package(Threads)
+    find_dependency(Threads)
   endif()
 endif()
 
diff --git a/cmake/FindTriSYCL.cmake b/cmake/FindTriSYCL.cmake
index 41bc2fa..8104239 100644
--- a/cmake/FindTriSYCL.cmake
+++ b/cmake/FindTriSYCL.cmake
@@ -57,18 +57,19 @@
 mark_as_advanced(TRISYCL_TRACE_KERNEL)
 
 #triSYCL definitions
-set(CL_SYCL_LANGUAGE_VERSION 220 CACHE VERSION
+set(CL_SYCL_LANGUAGE_VERSION 220 CACHE STRING
   "Host language version to be used by trisYCL (default is: 220)")
-set(TRISYCL_CL_LANGUAGE_VERSION 220 CACHE VERSION
+set(TRISYCL_CL_LANGUAGE_VERSION 220 CACHE STRING
   "Device language version to be used by trisYCL (default is: 220)")
-#set(TRISYCL_COMPILE_OPTIONS "-std=c++1z -Wall -Wextra")
-set(CMAKE_CXX_STANDARD 14)
+# triSYCL now requires c++17
+set(CMAKE_CXX_STANDARD 17)
 set(CXX_STANDARD_REQUIRED ON)
 
 
 # Find OpenCL package
+include(CMakeFindDependencyMacro)
 if(TRISYCL_OPENCL)
-  find_package(OpenCL REQUIRED)
+  find_dependency(OpenCL REQUIRED)
   if(UNIX)
     set(BOOST_COMPUTE_INCPATH /usr/include/compute CACHE PATH
       "Path to Boost.Compute headers (default is: /usr/include/compute)")
@@ -77,11 +78,11 @@
 
 # Find OpenMP package
 if(TRISYCL_OPENMP)
-  find_package(OpenMP REQUIRED)
+  find_dependency(OpenMP REQUIRED)
 endif()
 
 # Find Boost
-find_package(Boost 1.58 REQUIRED COMPONENTS chrono log)
+find_dependency(Boost 1.58 REQUIRED COMPONENTS chrono log)
 
 # If debug or trace we need boost log
 if(TRISYCL_DEBUG OR TRISYCL_DEBUG_STRUCTORS OR TRISYCL_TRACE_KERNEL)
@@ -90,9 +91,23 @@
   set(LOG_NEEDED OFF)
 endif()
 
-find_package(Threads REQUIRED)
+find_dependency(Threads REQUIRED)
 
 # Find triSYCL directory
+if (TRISYCL_INCLUDES AND TRISYCL_LIBRARIES)
+  set(TRISYCL_FIND_QUIETLY TRUE)
+endif ()
+
+find_path(TRISYCL_INCLUDE_DIR
+  NAMES sycl.hpp
+  PATHS $ENV{TRISYCLDIR} $ENV{TRISYCLDIR}/include ${INCLUDE_INSTALL_DIR}
+  PATH_SUFFIXES triSYCL
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(TriSYCL DEFAULT_MSG
+                                  TRISYCL_INCLUDE_DIR)
+
 if(NOT TRISYCL_INCLUDE_DIR)
   message(FATAL_ERROR
     "triSYCL include directory - Not found! (please set TRISYCL_INCLUDE_DIR")
@@ -100,36 +115,42 @@
   message(STATUS "triSYCL include directory - Found ${TRISYCL_INCLUDE_DIR}")
 endif()
 
+include(CMakeParseArguments)
 #######################
 #  add_sycl_to_target
 #######################
-#
-#  Sets the proper flags and includes for the target compilation.
-#
-#  targetName : Name of the target to add a SYCL to.
-#  sourceFile : Source file to be compiled for SYCL.
-#  binaryDir : Intermediate directory to output the integration header.
-#
-function(add_sycl_to_target targetName sourceFile binaryDir)
+function(add_sycl_to_target)
+  set(options)
+  set(one_value_args
+    TARGET
+  )
+  set(multi_value_args
+    SOURCES
+  )
+  cmake_parse_arguments(ADD_SYCL_ARGS
+    "${options}"
+    "${one_value_args}"
+    "${multi_value_args}"
+    ${ARGN}
+  )
 
   # Add include directories to the "#include <>" paths
-  target_include_directories (${targetName} PUBLIC
+  target_include_directories (${ADD_SYCL_ARGS_TARGET} PUBLIC
     ${TRISYCL_INCLUDE_DIR}
     ${Boost_INCLUDE_DIRS}
     $<$<BOOL:${TRISYCL_OPENCL}>:${OpenCL_INCLUDE_DIRS}>
     $<$<BOOL:${TRISYCL_OPENCL}>:${BOOST_COMPUTE_INCPATH}>)
 
-
   # Link dependencies
-  target_link_libraries(${targetName} PUBLIC
+  target_link_libraries(${ADD_SYCL_ARGS_TARGET}
     $<$<BOOL:${TRISYCL_OPENCL}>:${OpenCL_LIBRARIES}>
     Threads::Threads
     $<$<BOOL:${LOG_NEEDED}>:Boost::log>
     Boost::chrono)
 
-
   # Compile definitions
-  target_compile_definitions(${targetName} PUBLIC
+  target_compile_definitions(${ADD_SYCL_ARGS_TARGET} PUBLIC
+    EIGEN_SYCL_TRISYCL
     $<$<BOOL:${TRISYCL_NO_ASYNC}>:TRISYCL_NO_ASYNC>
     $<$<BOOL:${TRISYCL_OPENCL}>:TRISYCL_OPENCL>
     $<$<BOOL:${TRISYCL_DEBUG}>:TRISYCL_DEBUG>
@@ -138,13 +159,13 @@
     $<$<BOOL:${LOG_NEEDED}>:BOOST_LOG_DYN_LINK>)
 
   # C++ and OpenMP requirements
-  target_compile_options(${targetName} PUBLIC
+  target_compile_options(${ADD_SYCL_ARGS_TARGET} PUBLIC
     ${TRISYCL_COMPILE_OPTIONS}
     $<$<BOOL:${TRISYCL_OPENMP}>:${OpenMP_CXX_FLAGS}>)
 
   if(${TRISYCL_OPENMP} AND (NOT WIN32))
     # Does not support generator expressions
-    set_target_properties(${targetName}
+    set_target_properties(${ADD_SYCL_ARGS_TARGET}
       PROPERTIES
       LINK_FLAGS ${OpenMP_CXX_FLAGS})
   endif()
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index 22cf346..da89857 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
-  <!-- Fixed x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- 2 x 2 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
-      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 3 x 3 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
-      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 4 x 4 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
-      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>  
-  
-  <!-- Dynamic x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Column Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_cols</Item>
-        <ArrayItems>
-          <Size>m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Row Vector -->
-  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_rows</Item>
-        <ArrayItems>
-          <Size>m_storage.m_rows</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
-      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
-      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
-      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-      </Expand>
-  </Type>
-  
-    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
-      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-        <Item Name="[w]">m_storage.m_data.array[3]</Item>
-      </Expand>
-  </Type>
-
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>

+

+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

+

+  <!-- Fixed x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- 2 x 2 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>

+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 3 x 3 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>

+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 4 x 4 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>

+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>  

+  

+  <!-- Dynamic x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Column Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_cols</Item>

+        <ArrayItems>

+          <Size>m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Row Vector -->

+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_rows</Item>

+        <ArrayItems>

+          <Size>m_storage.m_rows</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>

+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>

+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>

+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+      </Expand>

+  </Type>

+  

+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>

+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+        <Item Name="[w]">m_storage.m_data.array[3]</Item>

+      </Expand>

+  </Type>

+

+</AutoVisualizer>

diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 273c10d..35ef580 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@
-; ***************************************************************
-; * Eigen Visualizer
-; *
-; * Author: Hauke Heibel <hauke.heibel@gmail.com>
-; *
-; * Support the enhanced debugging of the following Eigen
-; * types (*: any, +:fixed dimension) :
-; *
-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
-; * - Eigen::Matrix<*,-1,-1,*,*,*>
-; * - Eigen::Matrix<*,+,-1,*,*,*>
-; * - Eigen::Matrix<*,-1,+,*,*,*>
-; * - Eigen::Matrix<*,+,+,*,*,*>
-; *
-; * Matrices are displayed properly independently of the memory
-; * alignment (RowMajor vs. ColMajor).
-; *
-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
-; * that your original autoexp.dat file is copied to a safe 
-; * place before proceeding with its modification.
-; ***************************************************************
-
-[Visualizer]
-
-; Fixed size 4-vectors
-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2],
-         w : ($c.m_storage.m_data.array)[3]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        4,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 4),
-        ")"
-      )
-   )
-}
-
-; Fixed size 3-vectors
-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        3,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 3),
-        ")"
-      )
-   )
-}
-
-; Fixed size 2-vectors
-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        2,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 2),
-        ")"
-      )
-   )
-}
-
-; Fixed size 1-vectors
-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        1,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 1),
-        ")"
-      )
-   )
-}
-
-; Dynamic matrices (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed size matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data.array)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
+; ***************************************************************

+; * Eigen Visualizer

+; *

+; * Author: Hauke Heibel <hauke.heibel@gmail.com>

+; *

+; * Support the enhanced debugging of the following Eigen

+; * types (*: any, +:fixed dimension) :

+; *

+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>

+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>

+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>

+; * - Eigen::Matrix<*,-1,-1,*,*,*>

+; * - Eigen::Matrix<*,+,-1,*,*,*>

+; * - Eigen::Matrix<*,-1,+,*,*,*>

+; * - Eigen::Matrix<*,+,+,*,*,*>

+; *

+; * Matrices are displayed properly independently of the memory

+; * alignment (RowMajor vs. ColMajor).

+; *

+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure

+; * that your original autoexp.dat file is copied to a safe 

+; * place before proceeding with its modification.

+; ***************************************************************

+

+[Visualizer]

+

+; Fixed size 4-vectors

+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2],

+         w : ($c.m_storage.m_data.array)[3]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        4,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 4),

+        ")"

+      )

+   )

+}

+

+; Fixed size 3-vectors

+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        3,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 3),

+        ")"

+      )

+   )

+}

+

+; Fixed size 2-vectors

+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        2,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 2),

+        ")"

+      )

+   )

+}

+

+; Fixed size 1-vectors

+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        1,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 1),

+        ")"

+      )

+   )

+}

+

+; Dynamic matrices (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed size matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data.array)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}

diff --git a/doc/QuickStartGuide.dox b/doc/QuickStartGuide.dox
index 4192b28..0372694 100644
--- a/doc/QuickStartGuide.dox
+++ b/doc/QuickStartGuide.dox
@@ -22,11 +22,11 @@
 
 \section GettingStartedCompiling Compiling and running your first program
 
-There is no library to link to. The only thing that you need to keep in mind when compiling the above program is that the compiler must be able to find the Eigen header files. The directory in which you placed Eigen's source code must be in the include path. With GCC you use the -I option to achieve this, so you can compile the program with a command like this:
+There is no library to link to. The only thing that you need to keep in mind when compiling the above program is that the compiler must be able to find the Eigen header files. The directory in which you placed Eigen's source code must be in the include path. With GCC you use the \c -I option to achieve this, so you can compile the program with a command like this:
 
 \code g++ -I /path/to/eigen/ my_program.cpp -o my_program \endcode
 
-On Linux or Mac OS X, another option is to symlink or copy the Eigen folder into /usr/local/include/. This way, you can compile the program with:
+On Linux or Mac OS X, another option is to symlink or copy the Eigen folder into \c /usr/local/include/. This way, you can compile the program with:
 
 \code g++ my_program.cpp -o my_program \endcode
 
diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox
index 38754e4..1f7db6f 100644
--- a/doc/SparseLinearSystems.dox
+++ b/doc/SparseLinearSystems.dox
@@ -137,7 +137,7 @@
 x2 = solver.solve(b2);
 ...
 \endcode
-The compute() method is equivalent to calling both analyzePattern() and factorize().
+The `compute()` method is equivalent to calling both `analyzePattern()` and `factorize()`.
 
 Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on.
 More details are available in the documentations of the respective classes.
@@ -145,9 +145,9 @@
 Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
 
 \section TheSparseCompute The Compute Step
-In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). 
+In the `compute()` function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into `analyzePattern()` and `factorize()`. 
 
-The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices.
+The goal of `analyzePattern()` is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices.
 
 Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver :
 \code
@@ -156,21 +156,21 @@
 
 See the \link OrderingMethods_Module OrderingMethods module \endlink for the list of available methods and the associated options. 
 
-In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. 
+In `factorize()`, the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. 
 
 For iterative solvers, the compute step is used to eventually setup a preconditioner. For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is  selected by simply adding it as a template parameter to the iterative solver object. 
 \code
 IterativeSolverClassName<SparseMatrix<double>, PreconditionerName<SparseMatrix<double> > solver; 
 \endcode
-The member function preconditioner() returns a read-write reference to the preconditioner 
+The member function `preconditioner()` returns a read-write reference to the preconditioner 
  to directly interact with it. See the \link IterativeLinearSolvers_Module Iterative solvers module \endlink and the documentation of each class for the list of available methods.
 
 \section TheSparseSolve The Solve step
-The solve() function computes the solution of the linear systems with one or many right hand sides.
+The `solve()` function computes the solution of the linear systems with one or many right hand sides.
 \code
 X = solver.solve(B);
 \endcode 
-Here, B  can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance when all the right hand sides are not available at once. 
+Here, B  can be a vector or a matrix where the columns form the different right hand sides. `The solve()` function can be called several times as well, for instance when all the right hand sides are not available at once. 
 \code
 x1 = solver.solve(b1);
 // Get the second right hand side b2
@@ -180,7 +180,7 @@
 For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using \b setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. 
 
 \section BenchmarkRoutine
-Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen.
+Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to `bench/spbench` and compile the routine by typing `make spbenchsolver`. Run it with `--help` option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen.
 
 To export your matrices and right-hand-side vectors in the matrix-market format, you can the the unsupported SparseExtra module:
 \code
diff --git a/doc/SparseQuickReference.dox b/doc/SparseQuickReference.dox
index 9779f3f..b8264a4 100644
--- a/doc/SparseQuickReference.dox
+++ b/doc/SparseQuickReference.dox
@@ -249,7 +249,7 @@
 \endcode
 </td>
 <td>
-If the matrix is not in compressed form, makeCompressed() should be called before.\n
+If the matrix is not in compressed form, `makeCompressed()` should be called before.\n
 Note that these functions are mostly provided for interoperability purposes with external libraries.\n
 A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
 </tr>
diff --git a/doc/TutorialMatrixClass.dox b/doc/TutorialMatrixClass.dox
index 2c45222..3e0785a 100644
--- a/doc/TutorialMatrixClass.dox
+++ b/doc/TutorialMatrixClass.dox
@@ -151,14 +151,14 @@
 \verbinclude tut_matrix_coefficient_accessors.out
 </td></tr></table>
 
-Note that the syntax <tt> m(index) </tt>
+Note that the syntax `m(index)` 
 is not restricted to vectors, it is also available for general matrices, meaning index-based access
 in the array of coefficients. This however depends on the matrix's storage order. All Eigen matrices default to
 column-major storage order, but this can be changed to row-major, see \ref TopicStorageOrders "Storage orders".
 
-The operator[] is also overloaded for index-based access in vectors, but keep in mind that C++ doesn't allow operator[] to
-take more than one argument. We restrict operator[] to vectors, because an awkwardness in the C++ language
-would make matrix[i,j] compile to the same thing as matrix[j] !
+The `operator[]` is also overloaded for index-based access in vectors, but keep in mind that C++ doesn't allow `operator[]` to
+take more than one argument. We restrict `operator[]` to vectors, because an awkwardness in the C++ language
+would make `matrix[i,j]` compile to the same thing as `matrix[j]`!
 
 \section TutorialMatrixCommaInitializer Comma-initialization
 
@@ -186,8 +186,8 @@
 <td>\verbinclude tut_matrix_resize.out </td>
 </tr></table>
 
-The resize() method is a no-operation if the actual matrix size doesn't change; otherwise it is destructive: the values of the coefficients may change.
-If you want a conservative variant of resize() which does not change the coefficients, use \link PlainObjectBase::conservativeResize() conservativeResize()\endlink, see \ref TopicResizing "this page" for more details.
+The `resize()` method is a no-operation if the actual matrix size doesn't change; otherwise it is destructive: the values of the coefficients may change.
+If you want a conservative variant of `resize()` which does not change the coefficients, use \link PlainObjectBase::conservativeResize() conservativeResize()\endlink, see \ref TopicResizing "this page" for more details.
 
 All these methods are still available on fixed-size matrices, for the sake of API uniformity. Of course, you can't actually
 resize a fixed-size matrix. Trying to change a fixed size to an actually different value will trigger an assertion failure;
@@ -234,7 +234,7 @@
 \code MatrixXf mymatrix(rows,columns); \endcode
 amounts to doing
 \code float *mymatrix = new float[rows*columns]; \endcode
-and in addition to that, the MatrixXf object stores its number of rows and columns as
+and in addition to that, the \c MatrixXf object stores its number of rows and columns as
 member variables.
 
 The limitation of using fixed sizes, of course, is that this is only possible
@@ -276,14 +276,14 @@
 \section TutorialMatrixTypedefs Convenience typedefs
 
 Eigen defines the following Matrix typedefs:
-\li MatrixNt for Matrix<type, N, N>. For example, MatrixXi for Matrix<int, Dynamic, Dynamic>.
-\li VectorNt for Matrix<type, N, 1>. For example, Vector2f for Matrix<float, 2, 1>.
-\li RowVectorNt for Matrix<type, 1, N>. For example, RowVector3d for Matrix<double, 1, 3>.
+\li \c MatrixNt for `Matrix<type, N, N>`. For example, \c MatrixXi for `Matrix<int, Dynamic, Dynamic>`.
+\li \c VectorNt for `Matrix<type, N, 1>`. For example, \c Vector2f for `Matrix<float, 2, 1>`.
+\li \c RowVectorNt for `Matrix<type, 1, N>`. For example, \c RowVector3d for `Matrix<double, 1, 3>`.
 
 Where:
-\li N can be any one of \c 2, \c 3, \c 4, or \c X (meaning \c Dynamic).
-\li t can be any one of \c i (meaning int), \c f (meaning float), \c d (meaning double),
-      \c cf (meaning complex<float>), or \c cd (meaning complex<double>). The fact that typedefs are only
+\li \c N can be any one of \c 2, \c 3, \c 4, or \c X (meaning \c Dynamic).
+\li \c t can be any one of \c i (meaning \c int), \c f (meaning \c float), \c d (meaning \c double),
+      \c cf (meaning `complex<float>`), or \c cd (meaning `complex<double>`). The fact that `typedef`s are only
     defined for these five types doesn't mean that they are the only supported scalar types. For example,
     all standard integer types are supported, see \ref TopicScalarTypes "Scalar types".
 
diff --git a/doc/TutorialReshape.dox b/doc/TutorialReshape.dox
index 5b4022a..07e5c3c 100644
--- a/doc/TutorialReshape.dox
+++ b/doc/TutorialReshape.dox
@@ -3,7 +3,7 @@
 /** \eigenManualPage TutorialReshape Reshape
 
 Since the version 3.4, %Eigen exposes convenient methods to reshape a matrix to another matrix of different sizes or vector.
-All cases are handled via the DenseBase::reshaped(NRowsType,NColsType) and DenseBase::reshaped() functions.
+All cases are handled via the `DenseBase::reshaped(NRowsType,NColsType)` and `DenseBase::reshaped()` functions.
 Those functions do not perform in-place reshaping, but instead return a <i> view </i> on the input expression.
 
 \eigenAutoToc
@@ -23,7 +23,7 @@
 </td></tr></table>
 
 By default, the input coefficients are always interpreted in column-major order regardless of the storage order of the input expression.
-For more control on ordering, compile-time sizes, and automatic size deduction, please see de documentation of DenseBase::reshaped(NRowsType,NColsType) that contains all the details with many examples.
+For more control on ordering, compile-time sizes, and automatic size deduction, please see de documentation of `DenseBase::reshaped(NRowsType,NColsType)` that contains all the details with many examples.
 
 
 \section TutorialReshapeMat2Vec 1D linear views
diff --git a/doc/TutorialSparse.dox b/doc/TutorialSparse.dox
index 350ea11..77a08da 100644
--- a/doc/TutorialSparse.dox
+++ b/doc/TutorialSparse.dox
@@ -54,13 +54,13 @@
 
 Currently the elements of a given inner vector are guaranteed to be always sorted by increasing inner indices.
 The \c "_" indicates available free space to quickly insert new elements.
-Assuming no reallocation is needed, the insertion of a random element is therefore in O(nnz_j) where nnz_j is the number of nonzeros of the respective inner vector.
-On the other hand, inserting elements with increasing inner indices in a given inner vector is much more efficient since this only requires to increase the respective \c InnerNNZs entry that is a O(1) operation.
+Assuming no reallocation is needed, the insertion of a random element is therefore in `O(nnz_j)` where `nnz_j` is the number of nonzeros of the respective inner vector.
+On the other hand, inserting elements with increasing inner indices in a given inner vector is much more efficient since this only requires to increase the respective \c InnerNNZs entry that is a `O(1)` operation.
 
 The case where no empty space is available is a special case, and is referred as the \em compressed mode.
 It corresponds to the widely used Compressed Column (or Row) Storage schemes (CCS or CRS).
 Any SparseMatrix can be turned to this form by calling the SparseMatrix::makeCompressed() function.
-In this case, one can remark that the \c InnerNNZs array is redundant with \c OuterStarts because we the equality: \c InnerNNZs[j] = \c OuterStarts[j+1]-\c OuterStarts[j].
+In this case, one can remark that the \c InnerNNZs array is redundant with \c OuterStarts because we have the equality: `InnerNNZs[j] == OuterStarts[j+1] - OuterStarts[j]`.
 Therefore, in practice a call to SparseMatrix::makeCompressed() frees this buffer.
 
 It is worth noting that most of our wrappers to external libraries requires compressed matrices as inputs.
@@ -221,9 +221,9 @@
 5: mat.makeCompressed();                        // optional
 \endcode
 
-- The key ingredient here is the line 2 where we reserve room for 6 non-zeros per column. In many cases, the number of non-zeros per column or row can easily be known in advance. If it varies significantly for each inner vector, then it is possible to specify a reserve size for each inner vector by providing a vector object with an operator[](int j) returning the reserve size of the \c j-th inner vector (e.g., via a VectorXi or std::vector<int>). If only a rought estimate of the number of nonzeros per inner-vector can be obtained, it is highly recommended to overestimate it rather than the opposite. If this line is omitted, then the first insertion of a new element will reserve room for 2 elements per inner vector.
+- The key ingredient here is the line 2 where we reserve room for 6 non-zeros per column. In many cases, the number of non-zeros per column or row can easily be known in advance. If it varies significantly for each inner vector, then it is possible to specify a reserve size for each inner vector by providing a vector object with an `operator[](int j)` returning the reserve size of the \c j-th inner vector (e.g., via a `VectorXi` or `std::vector<int>`). If only a rought estimate of the number of nonzeros per inner-vector can be obtained, it is highly recommended to overestimate it rather than the opposite. If this line is omitted, then the first insertion of a new element will reserve room for 2 elements per inner vector.
 - The line 4 performs a sorted insertion. In this example, the ideal case is when the \c j-th column is not full and contains non-zeros whose inner-indices are smaller than \c i. In this case, this operation boils down to trivial O(1) operation.
-- When calling insert(i,j) the element \c i \c ,j must not already exists, otherwise use the coeffRef(i,j) method that will allow to, e.g., accumulate values. This method first performs a binary search and finally calls insert(i,j) if the element does not already exist. It is more flexible than insert() but also more costly.
+- When calling `insert(i,j)` the element `i`, `j` must not already exists, otherwise use the `coeffRef(i,j)` method that will allow to, e.g., accumulate values. This method first performs a binary search and finally calls `insert(i,j)` if the element does not already exist. It is more flexible than `insert()` but also more costly.
 - The line 5 suppresses the remaining empty space and transforms the matrix into a compressed column storage.
 
 
@@ -259,7 +259,7 @@
 dm2 = sm1 + dm1;
 dm2 = dm1 - sm1;
 \endcode
-Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing <tt>dm2 = sm1 + dm1</tt>, better write:
+Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing `dm2 = sm1 + dm1`, better write:
 \code
 dm2 = dm1;
 dm2 += sm1;
@@ -272,7 +272,7 @@
 sm1 = sm2.transpose();
 sm1 = sm2.adjoint();
 \endcode
-However, there is no transposeInPlace() method.
+However, there is no `transposeInPlace()` method.
 
 
 \subsection TutorialSparse_Products Matrix products
@@ -284,18 +284,18 @@
 dm2 = dm1 * sm1.adjoint();
 dm2 = 2. * sm1 * dm1;
     \endcode
-  - \b symmetric \b sparse-dense. The product of a sparse symmetric matrix with a dense matrix (or vector) can also be optimized by specifying the symmetry with selfadjointView():
+  - \b symmetric \b sparse-dense. The product of a sparse symmetric matrix with a dense matrix (or vector) can also be optimized by specifying the symmetry with `selfadjointView()`:
     \code
-dm2 = sm1.selfadjointView<>() * dm1;        // if all coefficients of A are stored
-dm2 = A.selfadjointView<Upper>() * dm1;     // if only the upper part of A is stored
-dm2 = A.selfadjointView<Lower>() * dm1;     // if only the lower part of A is stored
+dm2 = sm1.selfadjointView<>() * dm1;          // if all coefficients of sm1 are stored
+dm2 = sm1.selfadjointView<Upper>() * dm1;     // if only the upper part of sm1 is stored
+dm2 = sm1.selfadjointView<Lower>() * dm1;     // if only the lower part of sm1 is stored
     \endcode
   - \b sparse-sparse. For sparse-sparse products, two different algorithms are available. The default one is conservative and preserve the explicit zeros that might appear:
     \code
 sm3 = sm1 * sm2;
 sm3 = 4 * sm1.adjoint() * sm2;
     \endcode
-    The second algorithm prunes on the fly the explicit zeros, or the values smaller than a given threshold. It is enabled and controlled through the prune() functions:
+    The second algorithm prunes on the fly the explicit zeros, or the values smaller than a given threshold. It is enabled and controlled through the `prune()` functions:
     \code
 sm3 = (sm1 * sm2).pruned();                  // removes numerical zeros
 sm3 = (sm1 * sm2).pruned(ref);               // removes elements much smaller than ref
@@ -314,7 +314,7 @@
 \subsection TutorialSparse_SubMatrices Block operations
 
 Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction.
-However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as <tt>block(...)</tt> and <tt>corner*(...)</tt>. The available API for write-access to a SparseMatrix are summarized below:
+However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as `block(...)` and `corner*(...)`. The available API for write-access to a SparseMatrix are summarized below:
 \code
 SparseMatrix<double,ColMajor> sm1;
 sm1.col(j) = ...;
@@ -329,22 +329,22 @@
 sm2.bottomRows(nrows) = ...;
 \endcode
 
-In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage.
+In addition, sparse matrices expose the `SparseMatrixBase::innerVector()` and `SparseMatrixBase::innerVectors()` methods, which are aliases to the `col`/`middleCols` methods for a column-major storage, and to the `row`/`middleRows` methods for a row-major storage.
 
 \subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views
 
-Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:
+Just as with dense matrices, the `triangularView()` function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:
 \code
 dm2 = sm1.triangularView<Lower>(dm1);
 dv2 = sm1.transpose().triangularView<Upper>(dv1);
 \endcode
 
-The selfadjointView() function permits various operations:
+The `selfadjointView()` function permits various operations:
  - optimized sparse-dense matrix products:
     \code
-dm2 = sm1.selfadjointView<>() * dm1;        // if all coefficients of A are stored
-dm2 = A.selfadjointView<Upper>() * dm1;     // if only the upper part of A is stored
-dm2 = A.selfadjointView<Lower>() * dm1;     // if only the lower part of A is stored
+dm2 = sm1.selfadjointView<>() * dm1;          // if all coefficients of sm1 are stored
+dm2 = sm1.selfadjointView<Upper>() * dm1;     // if only the upper part of sm1 is stored
+dm2 = sm1.selfadjointView<Lower>() * dm1;     // if only the lower part of sm1 is stored
     \endcode
  - copy of triangular parts:
     \code
diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h
index 0f8e70d..7ace083 100644
--- a/test/AnnoyingScalar.h
+++ b/test/AnnoyingScalar.h
@@ -126,7 +126,7 @@
 struct NumTraits<AnnoyingScalar> : NumTraits<float>
 {
   enum {
-    RequireInitialization = true
+    RequireInitialization = 1,
   };
   typedef AnnoyingScalar Real;
   typedef AnnoyingScalar Nested;
@@ -145,10 +145,6 @@
 }
 
 namespace internal {
-  template<> EIGEN_STRONG_INLINE AnnoyingScalar pcmp_eq(const AnnoyingScalar& a, const AnnoyingScalar& b)
-  { return AnnoyingScalar(pcmp_eq(*a.v, *b.v)); }
-  template<> EIGEN_STRONG_INLINE AnnoyingScalar pselect(const AnnoyingScalar& mask, const AnnoyingScalar& a, const AnnoyingScalar& b)
-  { return numext::equal_strict(*mask.v, 0.f) ? b : a; }
   template<> EIGEN_STRONG_INLINE double cast(const AnnoyingScalar& x) { return double(*x.v); }
   template<> EIGEN_STRONG_INLINE float  cast(const AnnoyingScalar& x) { return *x.v; }
 }
diff --git a/test/OffByOneScalar.h b/test/OffByOneScalar.h
new file mode 100644
index 0000000..c0371a6
--- /dev/null
+++ b/test/OffByOneScalar.h
@@ -0,0 +1,28 @@
+
+// A Scalar with internal representation T+1 so that zero is internally
+// represented by T(1). This is used to test memory fill.
+// 
+template<typename T>
+class OffByOneScalar {
+ public:
+  OffByOneScalar() : val_(1) {}
+  OffByOneScalar(const OffByOneScalar& other) {
+    *this = other;
+  }
+  OffByOneScalar& operator=(const OffByOneScalar& other) {
+    val_ = other.val_;
+    return *this;
+  }
+  
+  OffByOneScalar(T val) : val_(val + 1) {}
+  OffByOneScalar& operator=(T val) {
+    val_ = val + 1;
+  }
+  
+  operator T() const {
+    return val_ - 1;
+  }
+ 
+ private:
+  T val_;
+};
diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu
index bf8dcac..4298da3 100644
--- a/test/gpu_basic.cu
+++ b/test/gpu_basic.cu
@@ -197,18 +197,17 @@
     res.segment(block_idx, size) = x1; res.segment(block_idx, size).array() /= x2.array();
     block_idx += size;
 
-    // Equality comparisons currently not functional on device
-    //   (std::equal_to<T> is host-only).
-    // const T true_vector = T::Constant(true_value);
-    // const T false_vector = T::Constant(false_value);
-    // res.segment(block_idx, size) = (x1 == x2 ? true_vector : false_vector);
-    // block_idx += size;
+    const T true_vector = T::Constant(true_value);
+    const T false_vector = T::Constant(false_value);
+    res.segment(block_idx, size) = (x1 == x2 ? true_vector : false_vector);
+    block_idx += size;
+    // Mixing types in equality comparison does not work.
     // res.segment(block_idx, size) = (x1 == x2.real() ? true_vector : false_vector);
     // block_idx += size;
     // res.segment(block_idx, size) = (x1.real() == x2 ? true_vector : false_vector);
     // block_idx += size;
-    // res.segment(block_idx, size) = (x1 != x2 ? true_vector : false_vector);
-    // block_idx += size;
+    res.segment(block_idx, size) = (x1 != x2 ? true_vector : false_vector);
+    block_idx += size;
     // res.segment(block_idx, size) = (x1 != x2.real() ? true_vector : false_vector);
     // block_idx += size;
     // res.segment(block_idx, size) = (x1.real() != x2 ? true_vector : false_vector);
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp
index 89484d9..5b15c5a 100644
--- a/test/jacobisvd.cpp
+++ b/test/jacobisvd.cpp
@@ -36,6 +36,9 @@
 template<typename MatrixType> void jacobisvd_verify_assert(const MatrixType& m)
 {
   svd_verify_assert<JacobiSVD<MatrixType> >(m);
+  svd_verify_assert<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> >(m, true);
+  svd_verify_assert<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner> >(m);
+  svd_verify_assert<JacobiSVD<MatrixType, HouseholderQRPreconditioner> >(m);
   Index rows = m.rows();
   Index cols = m.cols();
 
diff --git a/test/main.h b/test/main.h
index 07f3794..786673d 100644
--- a/test/main.h
+++ b/test/main.h
@@ -641,14 +641,21 @@
     return false;
 }
 
-/** Creates a random Partial Isometry matrix of given rank.
-  *
-  * A partial isometry is a matrix all of whose singular values are either 0 or 1.
-  * This is very useful to test rank-revealing algorithms.
-  */
 // Forward declaration to avoid ICC warning
 template<typename MatrixType>
 void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m);
+/**
+ * Creates a random partial isometry matrix of given rank.
+ *
+ * A partial isometry is a matrix all of whose singular values are either 0 or 1.
+ * This is very useful to test rank-revealing algorithms.
+ *
+ * @tparam MatrixType type of random partial isometry matrix
+ * @param desired_rank rank requested for the random partial isometry matrix
+ * @param rows row dimension of requested random partial isometry matrix
+ * @param cols column dimension of requested random partial isometry matrix
+ * @param m random partial isometry matrix
+ */
 template<typename MatrixType>
 void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m)
 {
@@ -689,6 +696,13 @@
 // Forward declaration to avoid ICC warning
 template<typename PermutationVectorType>
 void randomPermutationVector(PermutationVectorType& v, Index size);
+/**
+ * Generate random permutation vector.
+ *
+ * @tparam PermutationVectorType type of vector used to store permutation
+ * @param v permutation vector
+ * @param size length of permutation vector
+ */
 template<typename PermutationVectorType>
 void randomPermutationVector(PermutationVectorType& v, Index size)
 {
@@ -705,16 +719,37 @@
   }
 }
 
+/**
+ * Check if number is "not a number" (NaN).
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is "not a number" (NaN)
+ */
 template<typename T> bool isNotNaN(const T& x)
 {
   return x==x;
 }
 
+/**
+ * Check if number is plus infinity.
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is plus infinity
+ */
 template<typename T> bool isPlusInf(const T& x)
 {
   return x > NumTraits<T>::highest();
 }
 
+/**
+ * Check if number is minus infinity.
+ *
+ * @tparam T input type
+ * @param x input value
+ * @return true, if input value is minus infinity
+ */
 template<typename T> bool isMinusInf(const T& x)
 {
   return x < NumTraits<T>::lowest();
@@ -743,6 +778,11 @@
 
 using namespace Eigen;
 
+/**
+ * Set number of repetitions for unit test from input string.
+ *
+ * @param str input string
+ */
 inline void set_repeat_from_string(const char *str)
 {
   errno = 0;
@@ -755,6 +795,11 @@
   g_has_set_repeat = true;
 }
 
+/**
+ * Set seed for randomized unit tests from input string.
+ *
+ * @param str input string
+ */
 inline void set_seed_from_string(const char *str)
 {
   errno = 0;
diff --git a/test/svd_common.h b/test/svd_common.h
index bd62edc..eae4c0b 100644
--- a/test/svd_common.h
+++ b/test/svd_common.h
@@ -462,7 +462,7 @@
 }
 
 template<typename SvdType,typename MatrixType> 
-void svd_verify_assert(const MatrixType& m)
+void svd_verify_assert(const MatrixType& m, bool fullOnly = false)
 {
   typedef typename MatrixType::Scalar Scalar;
   Index rows = m.rows();
@@ -489,8 +489,17 @@
   VERIFY_RAISES_ASSERT(svd.matrixV())
   svd.singularValues();
   VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    
-  if (ColsAtCompileTime == Dynamic)
+
+  svd.compute(a, ComputeFullU);
+  svd.matrixU();
+  VERIFY_RAISES_ASSERT(svd.matrixV())
+  VERIFY_RAISES_ASSERT(svd.solve(rhs))
+  svd.compute(a, ComputeFullV);
+  svd.matrixV();
+  VERIFY_RAISES_ASSERT(svd.matrixU())
+  VERIFY_RAISES_ASSERT(svd.solve(rhs))
+
+  if (!fullOnly && ColsAtCompileTime == Dynamic)
   {
     svd.compute(a, ComputeThinU);
     svd.matrixU();
diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md
index 0ef212a..d4d3d59 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/README.md
+++ b/unsupported/Eigen/CXX11/src/Tensor/README.md
@@ -3,8 +3,6 @@
 Tensors are multidimensional arrays of elements. Elements are typically scalars,
 but more complex types such as strings are also supported.
 
-[TOC]
-
 ## Tensor Classes
 
 You can manipulate a tensor with one of the following classes.  They all are in
@@ -21,7 +19,7 @@
 Tensors of this class are resizable.  For example, if you assign a tensor of a
 different size to a Tensor, that tensor is resized to match its new value.
 
-#### Constructor `Tensor<data_type, rank>(size0, size1, ...)`
+#### Constructor Tensor<data_type, rank>(size0, size1, ...)
 
 Constructor for a Tensor.  The constructor must be passed `rank` integers
 indicating the sizes of the instance along each of the the `rank`
@@ -34,7 +32,7 @@
     // Resize t_3d by assigning a tensor of different sizes, but same rank.
     t_3d = Tensor<float, 3>(3, 4, 3);
 
-#### Constructor `Tensor<data_type, rank>(size_array)`
+#### Constructor Tensor<data_type, rank>(size_array)
 
 Constructor where the sizes for the constructor are specified as an array of
 values instead of an explicitly list of parameters.  The array type to use is
@@ -45,7 +43,7 @@
     Tensor<string, 2> t_2d({5, 7});
 
 
-### Class `TensorFixedSize<data_type, Sizes<size0, size1, ...>>`
+### Class TensorFixedSize<data_type, Sizes<size0, size1, ...>>
 
 Class to use for tensors of fixed size, where the size is known at compile
 time.  Fixed sized tensors can provide very fast computations because all their
@@ -57,7 +55,7 @@
     // Create a 4 x 3 tensor of floats.
     TensorFixedSize<float, Sizes<4, 3>> t_4x3;
 
-### Class `TensorMap<Tensor<data_type, rank>>`
+### Class TensorMap<Tensor<data_type, rank>>
 
 This is the class to use to create a tensor on top of memory allocated and
 owned by another part of your code.  It allows to view any piece of allocated
@@ -67,7 +65,7 @@
 A TensorMap is not resizable because it does not own the memory where its data
 are stored.
 
-#### Constructor `TensorMap<Tensor<data_type, rank>>(data, size0, size1, ...)`
+#### Constructor TensorMap<Tensor<data_type, rank>>(data, size0, size1, ...)
 
 Constructor for a Tensor.  The constructor must be passed a pointer to the
 storage for the data, and "rank" size attributes.  The storage has to be
@@ -87,13 +85,13 @@
     TensorMap<Tensor<float, 1>> t_12(t_4x3.data(), 12);
 
 
-#### Class `TensorRef`
+#### Class TensorRef
 
 See Assigning to a TensorRef below.
 
 ## Accessing Tensor Elements
 
-#### `<data_type> tensor(index0, index1...)`
+#### <data_type> tensor(index0, index1...)
 
 Return the element at position `(index0, index1...)` in tensor
 `tensor`.  You must pass as many parameters as the rank of `tensor`.
@@ -278,7 +276,7 @@
 tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to
 have the rank and sizes of the expression that are assigned to them.
 
-#### Calling `eval()`.
+#### Calling eval().
 
 When you compute large composite expressions, you sometimes want to tell Eigen
 that an intermediate value in the expression tree is worth evaluating ahead of
@@ -355,7 +353,7 @@
         (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval();
 
 
-#### Assigning to a `TensorRef`.
+#### Assigning to a TensorRef.
 
 If you need to access only a few elements from the value of an expression you
 can avoid materializing the value in a full tensor by using a TensorRef.
@@ -455,24 +453,24 @@
 In the documentation of the tensor methods and Operation we mention datatypes
 that are tensor-type specific:
 
-#### `<Tensor-Type>::``Dimensions`
+#### <Tensor-Type>::Dimensions
 
 Acts like an array of ints.  Has an `int size` attribute, and can be
 indexed like an array to access individual values.  Used to represent the
 dimensions of a tensor.  See `dimensions()`.
 
-#### `<Tensor-Type>::``Index`
+#### <Tensor-Type>::Index
 
 Acts like an `int`.  Used for indexing tensors along their dimensions.  See
 `operator()`, `dimension()`, and `size()`.
 
-#### `<Tensor-Type>::``Scalar`
+#### <Tensor-Type>::Scalar
 
 Represents the datatype of individual tensor elements.  For example, for a
 `Tensor<float>`, `Scalar` is the type `float`.  See
 `setConstant()`.
 
-#### `<Operation>`
+#### <Operation>
 
 We use this pseudo type to indicate that a tensor Operation is returned by a
 method.  We indicate in the text the type and dimensions of the tensor that the
@@ -492,7 +490,7 @@
 
 ## Metadata
 
-### `int NumDimensions`
+### int NumDimensions
 
 Constant value indicating the number of dimensions of a Tensor.  This is also
 known as the tensor "rank".
@@ -501,7 +499,7 @@
       cout << "Dims " << a.NumDimensions;
       => Dims 2
 
-### `Dimensions dimensions()`
+### Dimensions dimensions()
 
 Returns an array-like object representing the dimensions of the tensor.
 The actual type of the `dimensions()` result is `<Tensor-Type>::``Dimensions`.
@@ -519,7 +517,7 @@
          << ", dim 1: " << d[1];
     => Dim size: 2, dim 0: 3, dim 1: 4
 
-### `Index dimension(Index n)`
+### Index dimension(Index n)
 
 Returns the n-th dimension of the tensor.  The actual type of the
 `dimension()` result is `<Tensor-Type>::``Index`, but you can
@@ -530,7 +528,7 @@
       cout << "Dim 1: " << dim1;
       => Dim 1: 4
 
-### `Index size()`
+### Index size()
 
 Returns the total number of elements in the tensor.  This is the product of all
 the tensor dimensions.  The actual type of the `size()` result is
@@ -605,7 +603,7 @@
 have an immediate effect on the tensor and return the tensor itself as a
 result.  These are not tensor Operations which delay evaluation.
 
-### `<Tensor-Type> setConstant(const Scalar& val)`
+### <Tensor-Type> setConstant(const Scalar& val)
 
 Sets all elements of the tensor to the constant value `val`.  `Scalar`
 is the type of data stored in the tensor.  You can pass any value that is
@@ -633,7 +631,7 @@
     yolo yolo yolo
 
 
-### `<Tensor-Type> setZero()`
+### <Tensor-Type> setZero()
 
 Fills the tensor with zeros.  Equivalent to `setConstant(Scalar(0))`.
 Returns the tensor itself in case you want to chain another call.
@@ -647,7 +645,7 @@
     0 0 0 0
 
 
-### `<Tensor-Type> setValues({..initializer_list})`
+### <Tensor-Type> setValues({..initializer_list})
 
 Fills the tensor with explicit values specified in a std::initializer_list.
 The type of the initializer list depends on the type and rank of the tensor.
@@ -683,7 +681,7 @@
     10   20   30
     1000 1000 1000
 
-### `<Tensor-Type> setRandom()`
+### <Tensor-Type> setRandom()
 
 Fills the tensor with random values.  Returns the tensor itself in case you
 want to chain another call.
@@ -750,7 +748,7 @@
 wrapped in a TensorRef.
 
 
-### `Scalar* data()` and `const Scalar* data() const`
+### Scalar* data() and const Scalar* data() const
 
 Returns a pointer to the storage for the tensor.  The pointer is const if the
 tensor was const.  This allows direct access to the data.  The layout of the
@@ -778,7 +776,7 @@
 tensor.  See "Controlling when Expression are Evaluated" for more details about
 their evaluation.
 
-### `<Operation> constant(const Scalar& val)`
+### <Operation> constant(const Scalar& val)
 
 Returns a tensor of the same type and dimensions as the original tensor but
 where all elements have the value `val`.
@@ -806,7 +804,7 @@
     0.6 0.6 0.6
     0.6 0.6 0.6
 
-### `<Operation> random()`
+### <Operation> random()
 
 Returns a tensor of the same type and dimensions as the current tensor
 but where all elements have random values.
@@ -836,7 +834,7 @@
 of the same type and dimensions as the tensor to which they are applied.  The
 requested operations are applied to each element independently.
 
-### `<Operation> operator-()`
+### <Operation> operator-()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the opposite values of the original tensor.
@@ -855,42 +853,42 @@
     -1 -1 -1
     -1 -1 -1
 
-### `<Operation> sqrt()`
+### <Operation> sqrt()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the square roots of the original tensor.
 
-### `<Operation> rsqrt()`
+### <Operation> rsqrt()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the inverse square roots of the original tensor.
 
-### `<Operation> square()`
+### <Operation> square()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the squares of the original tensor values.
 
-### `<Operation> inverse()`
+### <Operation> inverse()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the inverse of the original tensor values.
 
-### `<Operation> exp()`
+### <Operation> exp()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the exponential of the original tensor.
 
-### `<Operation> log()`
+### <Operation> log()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the natural logarithms of the original tensor.
 
-### `<Operation> abs()`
+### <Operation> abs()
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the absolute values of the original tensor.
 
-### `<Operation> pow(Scalar exponent)`
+### <Operation> pow(Scalar exponent)
 
 Returns a tensor of the same type and dimensions as the original tensor
 containing the coefficients of the original tensor to the power of the
@@ -917,17 +915,17 @@
     0 1 2
     3 4 5
 
-### `<Operation>  operator * (Scalar scale)`
+### <Operation>  operator * (Scalar scale)
 
 Multiplies all the coefficients of the input tensor by the provided scale.
 
-### `<Operation>  cwiseMax(Scalar threshold)`
+### <Operation>  cwiseMax(Scalar threshold)
 TODO
 
-### `<Operation>  cwiseMin(Scalar threshold)`
+### <Operation>  cwiseMin(Scalar threshold)
 TODO
 
-### `<Operation>  unaryExpr(const CustomUnaryOp& func)`
+### <Operation>  unaryExpr(const CustomUnaryOp& func)
 TODO
 
 
@@ -939,39 +937,39 @@
 specified it is also of the same type. The requested operations are applied to
 each pair of elements independently.
 
-### `<Operation> operator+(const OtherDerived& other)`
+### <Operation> operator+(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise sums of the inputs.
 
-### `<Operation> operator-(const OtherDerived& other)`
+### <Operation> operator-(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise differences of the inputs.
 
-### `<Operation> operator*(const OtherDerived& other)`
+### <Operation> operator*(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise products of the inputs.
 
-### `<Operation> operator/(const OtherDerived& other)`
+### <Operation> operator/(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise quotients of the inputs.
 
 This operator is not supported for integer types.
 
-### `<Operation> cwiseMax(const OtherDerived& other)`
+### <Operation> cwiseMax(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise maximums of the inputs.
 
-### `<Operation> cwiseMin(const OtherDerived& other)`
+### <Operation> cwiseMin(const OtherDerived& other)
 
 Returns a tensor of the same type and dimensions as the input tensors
 containing the coefficient wise mimimums of the inputs.
 
-### `<Operation> Logical operators`
+### <Operation> Logical operators
 
 The following logical operators are supported as well:
 
@@ -1129,50 +1127,50 @@
     276
 
 
-### `<Operation> sum(const Dimensions& new_dims)`
-### `<Operation> sum()`
+### <Operation> sum(const Dimensions& new_dims)
+### <Operation> sum()
 
 Reduce a tensor using the sum() operator.  The resulting values
 are the sum of the reduced values.
 
-### `<Operation> mean(const Dimensions& new_dims)`
-### `<Operation> mean()`
+### <Operation> mean(const Dimensions& new_dims)
+### <Operation> mean()
 
 Reduce a tensor using the mean() operator.  The resulting values
 are the mean of the reduced values.
 
-### `<Operation> maximum(const Dimensions& new_dims)`
-### `<Operation> maximum()`
+### <Operation> maximum(const Dimensions& new_dims)
+### <Operation> maximum()
 
 Reduce a tensor using the maximum() operator.  The resulting values are the
 largest of the reduced values.
 
-### `<Operation> minimum(const Dimensions& new_dims)`
-### `<Operation> minimum()`
+### <Operation> minimum(const Dimensions& new_dims)
+### <Operation> minimum()
 
 Reduce a tensor using the minimum() operator.  The resulting values
 are the smallest of the reduced values.
 
-### `<Operation> prod(const Dimensions& new_dims)`
-### `<Operation> prod()`
+### <Operation> prod(const Dimensions& new_dims)
+### <Operation> prod()
 
 Reduce a tensor using the prod() operator.  The resulting values
 are the product of the reduced values.
 
-### `<Operation> all(const Dimensions& new_dims)`
-### `<Operation> all()`
+### <Operation> all(const Dimensions& new_dims)
+### <Operation> all()
 Reduce a tensor using the all() operator.  Casts tensor to bool and then checks
 whether all elements are true.  Runs through all elements rather than
 short-circuiting, so may be significantly inefficient.
 
-### `<Operation> any(const Dimensions& new_dims)`
-### `<Operation> any()`
+### <Operation> any(const Dimensions& new_dims)
+### <Operation> any()
 Reduce a tensor using the any() operator.  Casts tensor to bool and then checks
 whether any element is true.  Runs through all elements rather than
 short-circuiting, so may be significantly inefficient.
 
 
-### `<Operation> reduce(const Dimensions& new_dims, const Reducer& reducer)`
+### <Operation> reduce(const Dimensions& new_dims, const Reducer& reducer)
 
 Reduce a tensor using a user-defined reduction operator.  See `SumReducer`
 in TensorFunctors.h for information on how to implement a reduction operator.
@@ -1208,8 +1206,8 @@
     15
 
 
-### `<Operation> trace(const Dimensions& new_dims)`
-### `<Operation> trace()`
+### <Operation> trace(const Dimensions& new_dims)
+### <Operation> trace()
 
 As a special case, if no parameter is passed to the operation, trace is computed
 along *all* dimensions of the input tensor.
@@ -1259,18 +1257,18 @@
     1  3  6
     4  9 15
 
-### `<Operation> cumsum(const Index& axis)`
+### <Operation> cumsum(const Index& axis)
 
 Perform a scan by summing consecutive entries.
 
-### `<Operation> cumprod(const Index& axis)`
+### <Operation> cumprod(const Index& axis)
 
 Perform a scan by multiplying consecutive entries.
 
 
 ## Convolutions
 
-### `<Operation> convolve(const Kernel& kernel, const Dimensions& dims)`
+### <Operation> convolve(const Kernel& kernel, const Dimensions& dims)
 
 Returns a tensor that is the output of the convolution of the input tensor with the kernel,
 along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
@@ -1313,7 +1311,7 @@
 Tensor.  They can be used to access slices of tensors, see them with different
 dimensions, or pad tensors with additional data.
 
-### `<Operation> reshape(const Dimensions& new_dims)`
+### <Operation> reshape(const Dimensions& new_dims)
 
 Returns a view of the input tensor that has been reshaped to the specified
 new dimensions.  The argument new_dims is an array of Index values.  The
@@ -1392,7 +1390,7 @@
 the reshape view of b.
 
 
-### `<Operation> shuffle(const Shuffle& shuffle)`
+### <Operation> shuffle(const Shuffle& shuffle)
 
 Returns a copy of the input tensor whose dimensions have been
 reordered according to the specified permutation. The argument shuffle
@@ -1433,7 +1431,7 @@
     output.shuffle({2, 0, 1}) = input;
 
 
-### `<Operation> stride(const Strides& strides)`
+### <Operation> stride(const Strides& strides)
 
 Returns a view of the input tensor that strides (skips stride-1
 elements) along each of the dimensions.  The argument strides is an
@@ -1459,7 +1457,7 @@
     output.stride({2, 3, 4}) = input;
 
 
-### `<Operation> slice(const StartIndices& offsets, const Sizes& extents)`
+### <Operation> slice(const StartIndices& offsets, const Sizes& extents)
 
 Returns a sub-tensor of the given tensor. For each dimension i, the slice is
 made of the coefficients stored between offset[i] and offset[i] + extents[i] in
@@ -1485,7 +1483,7 @@
      600   700
 
 
-### `<Operation> chip(const Index offset, const Index dim)`
+### <Operation> chip(const Index offset, const Index dim)
 
 A chip is a special kind of slice. It is the subtensor at the given offset in
 the dimension dim. The returned tensor has one fewer dimension than the input
@@ -1536,7 +1534,7 @@
          0     0     0
 
 
-### `<Operation> reverse(const ReverseDimensions& reverse)`
+### <Operation> reverse(const ReverseDimensions& reverse)
 
 Returns a view of the input tensor that reverses the order of the coefficients
 along a subset of the dimensions.  The argument reverse is an array of boolean
@@ -1566,7 +1564,7 @@
        0   100   200
 
 
-### `<Operation> broadcast(const Broadcast& broadcast)`
+### <Operation> broadcast(const Broadcast& broadcast)
 
 Returns a view of the input tensor in which the input is replicated one to many
 times.
@@ -1590,11 +1588,11 @@
        0   100   200    0   100   200
      300   400   500  300   400   500
 
-### `<Operation> concatenate(const OtherDerived& other, Axis axis)`
+### <Operation> concatenate(const OtherDerived& other, Axis axis)
 
 TODO
 
-### `<Operation>  pad(const PaddingDimensions& padding)`
+### <Operation>  pad(const PaddingDimensions& padding)
 
 Returns a view of the input tensor in which the input is padded with zeros.
 
@@ -1619,7 +1617,7 @@
        0     0     0    0
 
 
-### `<Operation>  extract_patches(const PatchDims& patch_dims)`
+### <Operation>  extract_patches(const PatchDims& patch_dims)
 
 Returns a tensor of coefficient patches extracted from the input tensor, where
 each patch is of dimension specified by 'patch_dims'. The returned tensor has
@@ -1706,7 +1704,7 @@
     6 7
     10 11
 
-### `<Operation>  extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type)`
+### <Operation>  extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type)
 
 Returns a tensor of coefficient image patches extracted from the input tensor,
 which is expected to have dimensions ordered as follows (depending on the data
@@ -1763,7 +1761,7 @@
 
 ## Special Operations
 
-### `<Operation> cast<T>()`
+### <Operation> cast<T>()
 
 Returns a tensor of type T with the same dimensions as the original tensor.
 The returned tensor contains the values of the original tensor converted to
@@ -1792,7 +1790,7 @@
     1 2 2
 
 
-### `<Operation>     eval()`
+### <Operation>     eval()
 
 TODO
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 8b35f79..cdd8840 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -762,7 +762,7 @@
     const Index resIncr(1);
 
     // zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
-    m_device.memset(buffer, 0, rows * sizeof(Scalar));
+    m_device.fill(buffer, buffer + rows, Scalar(0));
 
     internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
         rows, cols, lhs, rhs,
@@ -869,7 +869,7 @@
     // If a contraction kernel does not support beta, explicitly initialize
     // output buffer with zeroes.
     if (!TensorContractionKernel::HasBeta) {
-      this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
+      this->m_device.fill(buffer, buffer + m * n, Scalar(0));
     }
 
     for(Index i2=0; i2<m; i2+=mc)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index c818038..1e17e8b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -1370,8 +1370,8 @@
     // columns in right side
     const Index n = this->m_j_size;
 
-    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
-    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
+    // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar))
+    this->m_device.fill(buffer, buffer + m * n, Scalar(0));
 
     typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
                                                    LeftEvaluator, left_nocontract_t,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 21be6ea..84fac2a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -912,9 +912,9 @@
           // On 10000x2x10000 mm zeroing can easily take half of time. Zero (bn
           // x m) row. Safe to do here because all kernels that will write to
           // this memory depend on completion of this task. Note: don't call
-          // device_.memset() here. device_.memset() blocks on thread pool
+          // device_.fill() here. device_.fill() blocks on thread pool
           // worker thread, which can lead to underutilization and deadlocks.
-          memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar));
+          std::fill_n(buffer_ + n1 * bn_ * m_, bn(n1) * m_, Scalar(0));
         }
         kernel_.packRhs(&packed_rhs(n, k, n1, use_thread_local),
                         rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1));
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 46b9d3a..5bde4d6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -39,6 +39,17 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
     ::memset(buffer, c, n);
   }
+  template<typename T>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+#ifdef EIGEN_GPU_COMPILE_PHASE
+    // std::fill is not a device function, so resort to simple loop.
+    for (T* it = begin; it != end; ++it) {
+      *it = value;
+    }
+#else
+    std::fill(begin, end, value);
+#endif
+  }
   template<typename Type>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const { 
     return data;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
index ec2e3cb..8ee4478 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
@@ -281,10 +281,49 @@
     EIGEN_UNUSED_VARIABLE(err)
     gpu_assert(err == gpuSuccess);
 #else
+  EIGEN_UNUSED_VARIABLE(buffer)
+  EIGEN_UNUSED_VARIABLE(c)
+  EIGEN_UNUSED_VARIABLE(n)
   eigen_assert(false && "The default device should be used instead to generate kernel code");
 #endif
   }
 
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+#ifndef EIGEN_GPU_COMPILE_PHASE
+    const size_t count = end - begin;
+    // Split value into bytes and run memset with stride.
+    const int value_size = sizeof(value);
+    char* buffer = (char*)begin;
+    char* value_bytes = (char*)(&value);
+    gpuError_t err;
+    EIGEN_UNUSED_VARIABLE(err)
+    
+    // If all value bytes are equal, then a single memset can be much faster.
+    bool use_single_memset = true;
+    for (int i=1; i<value_size; ++i) {
+      if (value_bytes[i] != value_bytes[0]) {
+        use_single_memset = false;
+      } 
+    }
+    
+    if (use_single_memset) {
+      err = gpuMemsetAsync(buffer, value_bytes[0], count * sizeof(T), stream_->stream());
+      gpu_assert(err == gpuSuccess);
+    } else {
+      for (int b=0; b<value_size; ++b) {
+        err = gpuMemset2DAsync(buffer+b, value_size, value_bytes[b], 1, count, stream_->stream());
+        gpu_assert(err == gpuSuccess);
+      }
+    }
+#else
+    EIGEN_UNUSED_VARIABLE(begin)
+    EIGEN_UNUSED_VARIABLE(end)
+    EIGEN_UNUSED_VARIABLE(value)
+    eigen_assert(false && "The default device should be used instead to generate kernel code");
+#endif
+  }
+
   EIGEN_STRONG_INLINE size_t numThreads() const {
     // FIXME
     return 32;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index df591c2..844c093 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -327,13 +327,27 @@
     if (n == 0) {
       return;
     }
-    n /= sizeof(buffer_scalar_t);
     auto f = [&](cl::sycl::handler &cgh) {
-      auto dst_acc = get_range_accessor<write_mode>(cgh, data, n);
-      // The cast to uint8_t is here to match the behaviour of the standard
-      // memset. The cast to buffer_scalar_t is needed to match the type of the
-      // accessor (in case buffer_scalar_t is not uint8_t)
-      cgh.fill(dst_acc, static_cast<buffer_scalar_t>(static_cast<uint8_t>(c)));
+      // Get a typed range accesser to ensure we fill each byte, in case
+      // `buffer_scalar_t` is not (u)int8_t.
+      auto dst_acc = get_typed_range_accessor<write_mode, uint8_t>(cgh, data, n);
+      cgh.fill(dst_acc, static_cast<uint8_t>(c));
+    };
+    cl::sycl::event e;
+    EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
+    async_synchronize(e);
+  }
+
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    static const auto write_mode = cl::sycl::access::mode::discard_write;
+    if (begin == end) {
+      return;
+    }
+    const ptrdiff_t count = end - begin;
+    auto f = [&](cl::sycl::handler &cgh) {
+      auto dst_acc = get_typed_range_accessor<write_mode, T>(cgh, begin, count);
+      cgh.fill(dst_acc, value);
     };
     cl::sycl::event e;
     EIGEN_SYCL_TRY_CATCH(e = m_queue.submit(f));
@@ -359,6 +373,8 @@
 
     auto original_buffer = pMapper.get_buffer(ptr);
     const ptrdiff_t offset = pMapper.get_offset(ptr);
+    eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
+    eigen_assert(original_buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
     const ptrdiff_t typed_offset = offset / sizeof(T);
     eigen_assert(typed_offset >= 0);
     const auto typed_size = original_buffer.get_size() / sizeof(T);
@@ -395,6 +411,40 @@
         cgh, cl::sycl::range<1>(n_bytes), cl::sycl::id<1>(offset));
   }
 
+  /// Get a range accessor to the virtual pointer's device memory with a
+  /// specified type and count.
+  template <cl::sycl::access::mode AcMd, typename T, typename Index>
+  EIGEN_STRONG_INLINE cl::sycl::accessor<
+      T, 1, AcMd, cl::sycl::access::target::global_buffer>
+  get_typed_range_accessor(cl::sycl::handler &cgh, const void *ptr,
+                     const Index count) const {
+    static const auto global_access = cl::sycl::access::target::global_buffer;
+    eigen_assert(count >= 0);
+    std::lock_guard<std::mutex> lock(pmapper_mutex_);
+    auto buffer = pMapper.get_buffer(ptr);
+    const ptrdiff_t offset = pMapper.get_offset(ptr);
+    eigen_assert(offset >= 0);
+
+    // Technically we should create a subbuffer for the desired range,
+    // then reinterpret that.  However, I was not able to get changes to reflect
+    // in the original buffer (only the subbuffer and reinterpretted buffer).
+    // This current implementation now has the restriction that the buffer
+    // offset and original buffer size must be a multiple of sizeof(T).
+    // Note that get_range_accessor(void*) currently has the same restriction.
+    //
+    // auto subbuffer = cl::sycl::buffer<buffer_scalar_t, 1>(buffer, 
+    //     cl::sycl::id<1>(offset), cl::sycl::range<1>(n_bytes));
+    eigen_assert(offset % sizeof(T) == 0 && "The offset must be a multiple of sizeof(T)");
+    eigen_assert(buffer.get_size() % sizeof(T) == 0 && "The buffer size must be a multiple of sizeof(T)");
+    const ptrdiff_t typed_offset = offset / sizeof(T);
+    const size_t typed_size = buffer.get_size() / sizeof(T);
+    auto reint = buffer.template reinterpret<
+        typename Eigen::internal::remove_const<T>::type>(
+        cl::sycl::range<1>(typed_size));
+    return reint.template get_access<AcMd, global_access>(
+        cgh, cl::sycl::range<1>(count), cl::sycl::id<1>(typed_offset));
+  }
+
   /// Creation of sycl accessor for a buffer. This function first tries to find
   /// the buffer in the buffer_map. If found it gets the accessor from it, if
   /// not, the function then adds an entry by creating a sycl buffer for that
@@ -951,6 +1001,11 @@
   EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const {
     queue_stream()->memset(data, c, n);
   }
+  /// the fill function
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    queue_stream()->fill(begin, end, value);
+  }
   /// returning the sycl queue
   EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const {
     return queue_stream()->sycl_queue();
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index e524b53..18cc79a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -122,6 +122,11 @@
     ::memset(buffer, c, n);
   }
 
+  template<typename T>
+  EIGEN_STRONG_INLINE void fill(T* begin, T* end, const T& value) const {
+    std::fill(begin, end, value);
+  }
+
   EIGEN_STRONG_INLINE int numThreads() const {
     return num_threads_;
   }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
index cb53ce2..82ca999 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@@ -41,6 +41,7 @@
 #define gpuMalloc hipMalloc
 #define gpuFree hipFree
 #define gpuMemsetAsync hipMemsetAsync
+#define gpuMemset2DAsync hipMemset2DAsync
 #define gpuMemcpyAsync hipMemcpyAsync
 #define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
 #define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
@@ -71,6 +72,7 @@
 #define gpuMalloc cudaMalloc
 #define gpuFree cudaFree
 #define gpuMemsetAsync cudaMemsetAsync
+#define gpuMemset2DAsync cudaMemset2DAsync
 #define gpuMemcpyAsync cudaMemcpyAsync
 #define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
 #define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
index 1d142f2..e4d4bd5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
@@ -26,6 +26,7 @@
 #undef gpuMalloc
 #undef gpuFree
 #undef gpuMemsetAsync
+#undef gpuMemset2DAsync
 #undef gpuMemcpyAsync
 #undef gpuMemcpyDeviceToDevice
 #undef gpuMemcpyDeviceToHost
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index ea97cf1..b3f00f7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -446,12 +446,7 @@
   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
   {
-    for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
-      eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
-    }
-
     m_is_identity = true;
-    bool degenerate = false;
     for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
       eigen_assert(m_impl.dimensions()[i] >=
                    op.sizes()[i] + op.startIndices()[i]);
@@ -459,9 +454,6 @@
           op.startIndices()[i] != 0) {
         m_is_identity = false;
       }
-      if (op.sizes()[i] == 0) { // we have an empty size
-        degenerate = true;
-      }
     }
 
     // No strides for scalars.
@@ -479,8 +471,8 @@
       m_outputStrides[0] = 1;
       for (int i = 1; i < NumDims; ++i) {
         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
-        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);      }
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
+      }
     } else {
       m_inputStrides[NumDims-1] = 1;
       for (int i = NumDims - 2; i >= 0; --i) {
@@ -491,8 +483,8 @@
       m_outputStrides[NumDims-1] = 1;
       for (int i = NumDims - 2; i >= 0; --i) {
         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
-        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);      }
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
+      }
     }
   }
 
@@ -933,14 +925,12 @@
     typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
     const InputDimensions& input_dims = m_impl.dimensions();
 
-    // check for degenerate intervals and compute output tensor shape
-    bool degenerate = false;
+    // compute output tensor shape
     m_is_identity = true;
     for (int i = 0; i < NumDims; i++) {
       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
       if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
         m_dimensions[i] = 0;
-        degenerate = true;
       } else {
         m_dimensions[i] =
             (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
@@ -967,8 +957,7 @@
       m_outputStrides[0] = 1;
       for (int i = 1; i < NumDims; ++i) {
         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
-        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
       }
     } else {
       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
@@ -983,8 +972,7 @@
       m_outputStrides[NumDims-1] = 1;
       for (int i = NumDims - 2; i >= 0; --i) {
         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
-        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
       }
     }
   }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 0999815..e5e5efd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -142,7 +142,8 @@
         m_unshuffledInputStrides[i] =
             m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
         m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
+                  m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
       }
     } else {
       m_unshuffledInputStrides[NumDims - 1] = 1;
@@ -151,7 +152,8 @@
         m_unshuffledInputStrides[i] =
             m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
         m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
-        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
+        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
+                  m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
       }
     }
 
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 0ef159e..0f166e3 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -26,11 +26,11 @@
   make_coherent_impl<A,B>::run(a.const_cast_derived(), b.const_cast_derived());
 }
 
-template<typename _DerType, bool Enable> struct auto_diff_special_op;
+template<typename DerivativeType, bool Enable> struct auto_diff_special_op;
 
 } // end namespace internal
 
-template<typename _DerType> class AutoDiffScalar;
+template<typename DerivativeType> class AutoDiffScalar;
 
 template<typename NewDerType>
 inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) {
@@ -38,16 +38,16 @@
 }
 
 /** \class AutoDiffScalar
-  * \brief A scalar type replacement with automatic differentation capability
+  * \brief A scalar type replacement with automatic differentiation capability
   *
-  * \param _DerType the vector type used to store/represent the derivatives. The base scalar type
+  * \param DerivativeType the vector type used to store/represent the derivatives. The base scalar type
   *                 as well as the number of derivatives to compute are determined from this type.
   *                 Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf
   *                 if the number of derivatives is not known at compile time, and/or, the number
   *                 of derivatives is large.
-  *                 Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a
+  *                 Note that DerivativeType can also be a reference (e.g., \c VectorXf&) to wrap a
   *                 existing vector into an AutoDiffScalar.
-  *                 Finally, _DerType can also be any Eigen compatible expression.
+  *                 Finally, DerivativeType can also be any Eigen compatible expression.
   *
   * This class represents a scalar value while tracking its respective derivatives using Eigen's expression
   * template mechanism.
@@ -63,17 +63,17 @@
   *
   */
 
-template<typename _DerType>
+template<typename DerivativeType>
 class AutoDiffScalar
   : public internal::auto_diff_special_op
-            <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar,
-                                          typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value>
+            <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
+                                          typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value>
 {
   public:
     typedef internal::auto_diff_special_op
-            <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar,
-                       typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value> Base;
-    typedef typename internal::remove_all<_DerType>::type DerType;
+            <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
+                       typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value> Base;
+    typedef typename internal::remove_all<DerivativeType>::type DerType;
     typedef typename internal::traits<DerType>::Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real Real;
 
@@ -382,16 +382,16 @@
 
 namespace internal {
 
-template<typename _DerType>
-struct auto_diff_special_op<_DerType, true>
-//   : auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real,
+template<typename DerivativeType>
+struct auto_diff_special_op<DerivativeType, true>
+//   : auto_diff_scalar_op<DerivativeType, typename NumTraits<Scalar>::Real,
 //                            is_same<Scalar,typename NumTraits<Scalar>::Real>::value>
 {
-  typedef typename remove_all<_DerType>::type DerType;
+  typedef typename remove_all<DerivativeType>::type DerType;
   typedef typename traits<DerType>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real Real;
 
-//   typedef auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real,
+//   typedef auto_diff_scalar_op<DerivativeType, typename NumTraits<Scalar>::Real,
 //                            is_same<Scalar,typename NumTraits<Scalar>::Real>::value> Base;
 
 //   using Base::operator+;
@@ -401,8 +401,8 @@
 //   using Base::operator*;
 //   using Base::operator*=;
 
-  const AutoDiffScalar<_DerType>& derived() const { return *static_cast<const AutoDiffScalar<_DerType>*>(this); }
-  AutoDiffScalar<_DerType>& derived() { return *static_cast<AutoDiffScalar<_DerType>*>(this); }
+  const AutoDiffScalar<DerivativeType>& derived() const { return *static_cast<const AutoDiffScalar<DerivativeType>*>(this); }
+  AutoDiffScalar<DerivativeType>& derived() { return *static_cast<AutoDiffScalar<DerivativeType>*>(this); }
 
 
   inline const AutoDiffScalar<DerType&> operator+(const Real& other) const
@@ -410,12 +410,12 @@
     return AutoDiffScalar<DerType&>(derived().value() + other, derived().derivatives());
   }
 
-  friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<_DerType>& b)
+  friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<DerivativeType>& b)
   {
     return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives());
   }
 
-  inline AutoDiffScalar<_DerType>& operator+=(const Real& other)
+  inline AutoDiffScalar<DerivativeType>& operator+=(const Real& other)
   {
     derived().value() += other;
     return derived();
@@ -431,22 +431,22 @@
   }
 
   friend inline const AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >
-  operator*(const Real& other, const AutoDiffScalar<_DerType>& a)
+  operator*(const Real& other, const AutoDiffScalar<DerivativeType>& a)
   {
     return AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >(
       a.value() * other,
       a.derivatives() * other);
   }
 
-  inline AutoDiffScalar<_DerType>& operator*=(const Scalar& other)
+  inline AutoDiffScalar<DerivativeType>& operator*=(const Scalar& other)
   {
     *this = *this * other;
     return derived();
   }
 };
 
-template<typename _DerType>
-struct auto_diff_special_op<_DerType, false>
+template<typename DerivativeType>
+struct auto_diff_special_op<DerivativeType, false>
 {
   void operator*() const;
   void operator-() const;
diff --git a/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/unsupported/Eigen/src/Skyline/SkylineMatrix.h
index 7c7eace..664a97f 100644
--- a/unsupported/Eigen/src/Skyline/SkylineMatrix.h
+++ b/unsupported/Eigen/src/Skyline/SkylineMatrix.h
@@ -375,8 +375,8 @@
     /** Removes all non zeros */
     inline void setZero() {
         m_data.clear();
-        memset(m_colStartIndex, 0, (m_outerSize + 1) * sizeof (Index));
-        memset(m_rowStartIndex, 0, (m_outerSize + 1) * sizeof (Index));
+        std::fill_n(m_colStartIndex, m_outerSize + 1, Index(0));
+        std::fill_n(m_rowStartIndex, m_outerSize + 1, Index(0));
     }
 
     /** \returns the number of non zero coefficients */
@@ -435,7 +435,7 @@
                     }
 
                     //zeros new data
-                    memset(this->_upperPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_upperPtr() + start, bandIncrement - 1, Scalar(0));
 
                     return m_data.upper(m_colStartIndex[inner]);
                 } else {
@@ -466,7 +466,7 @@
                     }
 
                     //zeros new data
-                    memset(this->_lowerPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_lowerPtr() + start, bandIncrement - 1, Scalar(0));
                     return m_data.lower(m_rowStartIndex[outer]);
                 } else {
                     return m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
@@ -493,7 +493,7 @@
                     for (Index innerIdx = inner + 1; innerIdx < outerSize() + 1; innerIdx++) {
                         m_rowStartIndex[innerIdx] += bandIncrement;
                     }
-                    memset(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, bandIncrement - 1, Scalar(0));
                     return m_data.upper(m_rowStartIndex[inner] + m_data.upperProfile(inner));
                 } else {
                     return m_data.upper(m_rowStartIndex[inner] + (outer - inner));
@@ -520,7 +520,7 @@
                     for (Index innerIdx = outer + 1; innerIdx < outerSize() + 1; innerIdx++) {
                         m_colStartIndex[innerIdx] += bandIncrement;
                     }
-                    memset(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar));
+                    std::fill_n(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, bandIncrement - 1, Scalar(0));
                     return m_data.lower(m_colStartIndex[outer] + m_data.lowerProfile(outer));
                 } else {
                     return m_data.lower(m_colStartIndex[outer] + (inner - outer));
@@ -619,8 +619,8 @@
         m_data.clear();
 
         m_outerSize = diagSize;
-        memset(m_colStartIndex, 0, (cols + 1) * sizeof (Index));
-        memset(m_rowStartIndex, 0, (rows + 1) * sizeof (Index));
+        std::fill_n(m_colStartIndex, cols + 1, Index(0));
+        std::fill_n(m_rowStartIndex, rows + 1, Index(0));
     }
 
     void resizeNonZeros(Index size) {
diff --git a/unsupported/Eigen/src/Skyline/SkylineStorage.h b/unsupported/Eigen/src/Skyline/SkylineStorage.h
index cc7514f..9c55f29 100644
--- a/unsupported/Eigen/src/Skyline/SkylineStorage.h
+++ b/unsupported/Eigen/src/Skyline/SkylineStorage.h
@@ -187,11 +187,11 @@
     }
 
     inline void reset() {
-        memset(m_diag, 0, m_diagSize * sizeof (Scalar));
-        memset(m_upper, 0, m_upperSize * sizeof (Scalar));
-        memset(m_lower, 0, m_lowerSize * sizeof (Scalar));
-        memset(m_upperProfile, 0, m_diagSize * sizeof (Index));
-        memset(m_lowerProfile, 0, m_diagSize * sizeof (Index));
+        std::fill_n(m_diag, m_diagSize, Scalar(0));
+        std::fill_n(m_upper, m_upperSize, Scalar(0));
+        std::fill_n(m_lower, m_lowerSize, Scalar(0));
+        std::fill_n(m_upperProfile, m_diagSize, Index(0));
+        std::fill_n(m_lowerProfile, m_diagSize, Index(0));
     }
 
     void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar>()) {
diff --git a/unsupported/doc/examples/SYCL/CMakeLists.txt b/unsupported/doc/examples/SYCL/CMakeLists.txt
index bef4f19..1d0f721 100644
--- a/unsupported/doc/examples/SYCL/CMakeLists.txt
+++ b/unsupported/doc/examples/SYCL/CMakeLists.txt
@@ -3,8 +3,7 @@
 set(EIGEN_SYCL ON)
 list(APPEND CMAKE_EXE_LINKER_FLAGS -pthread)
 if(EIGEN_SYCL_TRISYCL)
-  set(CMAKE_CXX_STANDARD 14)
-  set(STD_CXX_FLAG "-std=c++1z")
+  set(CMAKE_CXX_STANDARD 17)
 else(EIGEN_SYCL_TRISYCL)
   if(MSVC)
     # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 1819193..d30fa62 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -55,13 +55,11 @@
 
 ei_add_test(EulerAngles)
 
-find_package(MPFR 2.3.0)
-find_package(GMP)
-if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
-  include_directories(${MPFR_INCLUDES} ./mpreal)
+find_package(MPREAL)
+if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
   ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
-  set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
- ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" )
+  include_directories(${MPREAL_INCLUDES})
+  ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
 endif()
@@ -165,8 +163,8 @@
     endif()
 
     if(EIGEN_SYCL_TRISYCL)
-      set(CMAKE_CXX_STANDARD 14)
-      set(STD_CXX_FLAG "-std=c++1z")
+      # triSYCL now requires c++17.
+      set(CMAKE_CXX_STANDARD 17)
     else()
       if(MSVC)
         # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp
index ce9d243..8e3ca0f 100644
--- a/unsupported/test/cxx11_tensor_assign.cpp
+++ b/unsupported/test/cxx11_tensor_assign.cpp
@@ -25,10 +25,8 @@
   vec1(4) = 23; vec2(4) = 4;
   vec1(5) = 42; vec2(5) = 5;
 
-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
   TensorMap<Tensor<int, 1> > vec3(col_major, 6);
   TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
 
@@ -88,10 +86,8 @@
   mat2(1,1) = 4;
   mat2(1,2) = 5;
 
-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
   TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
   TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
 
@@ -148,10 +144,8 @@
     }
   }
 
-  int col_major[2*3*7];
-  int row_major[2*3*7];
-  memset(col_major, 0, 2*3*7*sizeof(int));
-  memset(row_major, 0, 2*3*7*sizeof(int));
+  int col_major[2*3*7] = {0};
+  int row_major[2*3*7] = {0};
   TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
   TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
 
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index c9f78d2..58cfc01 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -14,6 +14,7 @@
 #define EIGEN_USE_GPU
 
 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
 #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
@@ -175,6 +176,44 @@
   context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
 }
 
+// Helper method to synchronize device.
+template<typename Device>
+void synchronize(Device& device) { /*nothing*/ }
+template<>
+void synchronize(Eigen::GpuDevice& device) {
+  device.synchronize();
+}
+
+template <typename DataType, typename TensorDevice>
+void test_device_memory(const TensorDevice& device) {
+  int count = 100;
+  Eigen::array<int, 1> tensorRange = {{count}};
+  Eigen::Tensor<DataType, 1> host(tensorRange);
+  Eigen::Tensor<DataType, 1> expected(tensorRange);
+  DataType* device_data  = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));
+  
+  // memset
+  const char byte_value = static_cast<char>(0xAB);
+  device.memset(device_data, byte_value, count * sizeof(DataType));
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  memset(expected.data(), byte_value, count * sizeof(DataType));
+  for (size_t i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  // fill
+  DataType fill_value = DataType(7);
+  std::fill_n(expected.data(), count, fill_value);
+  device.fill(device_data, device_data + count, fill_value);
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  for (int i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  device.deallocate(device_data);
+}
 
 void test_cpu() {
   Eigen::Tensor<float, 3> in1(40,50,70);
@@ -266,6 +305,9 @@
       }
     }
   }
+  
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }
 
 void test_gpu() {
@@ -386,6 +428,8 @@
 
 #endif
  
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }
 
 
diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp
index 5095cb0..d7ff38d 100644
--- a/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -18,26 +18,36 @@
 #define EIGEN_USE_SYCL
 
 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 #include <stdint.h>
 #include <iostream>
 
 template <typename DataType, int DataLayout, typename IndexType>
 void test_device_memory(const Eigen::SyclDevice &sycl_device) {
-  std::cout << "Running on : "
-            << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
-            <<std::endl;
   IndexType sizeDim1 = 100;
   array<IndexType, 1> tensorRange = {{sizeDim1}};
   Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
   Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
-  memset(in1.data(), 1, in1.size() * sizeof(DataType));
   DataType* gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
+  
+  // memset
+  memset(in1.data(), 1, in1.size() * sizeof(DataType));
   sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
   sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
   for (IndexType i=0; i<in.size(); i++) {
     VERIFY_IS_EQUAL(in(i), in1(i));
   }
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in1.data(), in1.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), in1(i));
+  }
+
   sycl_device.deallocate(gpu_in_data);
 }
 
@@ -58,6 +68,31 @@
   sycl_device.deallocate(gpu_data);
 }
 
+template<typename DataType, int DataLayout, typename IndexType>
+void test_device_attach_buffer(const Eigen::SyclDevice &sycl_device) {
+  IndexType sizeDim1 = 100;
+  
+  array<IndexType, 1> tensorRange = {{sizeDim1}};
+  Tensor<DataType, 1, DataLayout, IndexType> in(tensorRange);
+  
+  cl::sycl::buffer<buffer_scalar_t, 1> buffer(cl::sycl::range<1>(sizeDim1 * sizeof(DataType)));
+  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.attach_buffer(buffer));
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in.data(), in.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  
+  // Check that buffer is filled with the correct value.
+  auto reint = buffer.reinterpret<DataType>(cl::sycl::range<1>(sizeDim1));
+  auto access = reint.template get_access<cl::sycl::access::mode::read>();
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), access[i]);
+  }
+  
+  sycl_device.detach_buffer(gpu_in_data);
+}
+
 template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
   std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
   QueueInterface queueInterface(d);
@@ -68,10 +103,12 @@
   //test_device_exceptions<DataType, RowMajor>(sycl_device);
   /// this test throw an exception. enable it if you want to see the exception
   //test_device_exceptions<DataType, ColMajor>(sycl_device);
+  test_device_attach_buffer<DataType, ColMajor, int64_t>(sycl_device);
 }
 
 EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
   for (const auto& device :Eigen::get_sycl_supported_devices()) {
     CALL_SUBTEST(sycl_device_test_per_device<float>(device));
+    CALL_SUBTEST(sycl_device_test_per_device<OffByOneScalar<int>>(device));
   }
 }
diff --git a/unsupported/test/cxx11_tensor_shuffling.cpp b/unsupported/test/cxx11_tensor_shuffling.cpp
index 2ec85d2..89a64c0 100644
--- a/unsupported/test/cxx11_tensor_shuffling.cpp
+++ b/unsupported/test/cxx11_tensor_shuffling.cpp
@@ -215,6 +215,59 @@
 }
 
 
+template <int DataLayout>
+static void test_empty_shuffling()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,0,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> shuffles;
+  shuffles[0] = 0;
+  shuffles[1] = 1;
+  shuffles[2] = 2;
+  shuffles[3] = 3;
+
+  Tensor<float, 4, DataLayout> no_shuffle;
+  no_shuffle = tensor.shuffle(shuffles);
+
+  VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(1), 3);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(2), 0);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 0; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  shuffles[0] = 2;
+  shuffles[1] = 3;
+  shuffles[2] = 1;
+  shuffles[3] = 0;
+  Tensor<float, 4, DataLayout> shuffle;
+  shuffle = tensor.shuffle(shuffles);
+
+  VERIFY_IS_EQUAL(shuffle.dimension(0), 0);
+  VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
+  VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
+  VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 0; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
+        }
+      }
+    }
+  }
+}
+
+
 EIGEN_DECLARE_TEST(cxx11_tensor_shuffling)
 {
   CALL_SUBTEST(test_simple_shuffling<ColMajor>());
@@ -225,4 +278,6 @@
   CALL_SUBTEST(test_shuffling_as_value<RowMajor>());
   CALL_SUBTEST(test_shuffle_unshuffle<ColMajor>());
   CALL_SUBTEST(test_shuffle_unshuffle<RowMajor>());
+  CALL_SUBTEST(test_empty_shuffling<ColMajor>());
+  CALL_SUBTEST(test_empty_shuffling<RowMajor>());
 }
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp
index 4a25e99..10beb07 100644
--- a/unsupported/test/mpreal_support.cpp
+++ b/unsupported/test/mpreal_support.cpp
@@ -1,3 +1,4 @@
+#include <mpreal.h>  // Must be included before main.h.
 #include "main.h"
 #include <Eigen/MPRealSupport>
 #include <Eigen/LU>