Update Eigen to commit:33e206f714d16f03cd31fd7bf1aa9d3a0ef66a5e

CHANGELOG
=========
33e206f71 - Remove unused declarations of BLAS/LAPACK routines
d57a79e51 - Optimize float->bool cast for AVX2, based on Charles Schlosser's comments.
a5ae83277 - Fix reversal of arguments to _mm256_set_m128() in pcast<Packet4d, Packet8f>.
09945f2cc - Optimize casting for x86_64.
8f9b8e363 - Replaced all instances of internal::(U)IntPtr with std::(u)intptr_t. Remove ICC workaround.
2c8011c2d - Fix arm builds.
fd8f410bb - Fix 2624 2625
e887196d9 - Undo cmake pools changes
81cb6a51d - Fix some typos
555cec17e - Fix parsing of command-line arguments when already specified as a cmake list.
7db19baab - Remove pools if cmake is less than 3.11
0488b708b - Vectorize tensor.isnan() by using typed predicates.

PiperOrigin-RevId: 520150826
Change-Id: Ia128c3a323b976025026e5d19f5b9209c1fd1bfd
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 2b3a1c7..d3be192 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -550,7 +550,7 @@
                                : int(Kernel::AssignmentTraits::DstAlignment)
     };
     const Scalar *dst_ptr = kernel.dstDataPtr();
-    if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
+    if((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar))>0)
     {
       // the pointer is not aligned-on scalar, so alignment is not possible
       return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 1729507..e233efb 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -1225,7 +1225,7 @@
   explicit block_evaluator(const XprType& block)
     : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
   {
-    eigen_internal_assert((internal::is_constant_evaluated() || (internal::UIntPtr(block.data()) % plain_enum_max(1,evaluator<XprType>::Alignment)) == 0) \
+    eigen_internal_assert((internal::is_constant_evaluated() || (std::uintptr_t(block.data()) % plain_enum_max(1,evaluator<XprType>::Alignment)) == 0) \
       && "data is not aligned");
   }
 };
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index b409a39..11205fa 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -56,7 +56,7 @@
   #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
 #else
   #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((internal::is_constant_evaluated() || (internal::UIntPtr(array) & (sizemask)) == 0) \
+    eigen_assert((internal::is_constant_evaluated() || (std::uintptr_t(array) & (sizemask)) == 0) \
               && "this assertion is explained here: " \
               "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
               " **** READ THIS WEB PAGE !!! ****");
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 661a3c4..9f693ba 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -191,7 +191,7 @@
   internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
   EIGEN_STRONG_INLINE Scalar* data() {
     return ForceAlignment
-            ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+            ? reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
             : m_data.array;
   }
   #endif
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 8b5f030..8322b38 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -443,6 +443,12 @@
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
 
+/** \internal \returns isnan(a) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pisnan(const Packet& a) {
+  return pandnot(ptrue(a), pcmp_eq(a, a));
+}
+
 // In the general case, use bitwise select.
 template<typename Packet, typename EnableIf = void>
 struct pselect_impl {
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index bf8c163..4dda24d 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -197,7 +197,7 @@
       // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
       const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
       EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
-      eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
+      eigen_assert((   ((std::uintptr_t(m_data) % internal::traits<Derived>::Alignment) == 0)
                     || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
 #endif
     }
diff --git a/Eigen/src/Core/Reshaped.h b/Eigen/src/Core/Reshaped.h
index 81355ac..508bc25 100644
--- a/Eigen/src/Core/Reshaped.h
+++ b/Eigen/src/Core/Reshaped.h
@@ -444,7 +444,7 @@
     : mapbase_evaluator<XprType, typename XprType::PlainObject>(xpr)
   {
     // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
-    eigen_assert(((internal::UIntPtr(xpr.data()) % plain_enum_max(1, evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+    eigen_assert(((std::uintptr_t(xpr.data()) % plain_enum_max(1, evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
   }
 };
 
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 3eb4394..c5e1cc0 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -634,6 +634,7 @@
 template<> EIGEN_STRONG_INLINE Packet8f pcmp_lt(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_LT_OQ); }
 template<> EIGEN_STRONG_INLINE Packet8f pcmp_lt_or_nan(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a, b, _CMP_NGE_UQ); }
 template<> EIGEN_STRONG_INLINE Packet8f pcmp_eq(const Packet8f& a, const Packet8f& b) { return _mm256_cmp_ps(a,b,_CMP_EQ_OQ); }
+template<> EIGEN_STRONG_INLINE Packet8f pisnan(const Packet8f& a) { return _mm256_cmp_ps(a,a,_CMP_UNORD_Q); }
 
 template<> EIGEN_STRONG_INLINE Packet4d pcmp_le(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_LE_OQ); }
 template<> EIGEN_STRONG_INLINE Packet4d pcmp_lt(const Packet4d& a, const Packet4d& b) { return _mm256_cmp_pd(a,b,_CMP_LT_OQ); }
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index 320479b..db19b56 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -16,29 +16,7 @@
 
 namespace internal {
 
-// For now we use SSE to handle integers, so we can't use AVX instructions to cast
-// from int to float
-template <>
-struct type_casting_traits<float, int> {
-  enum {
-    VectorizedCast = 0,
-    SrcCoeffRatio = 1,
-    TgtCoeffRatio = 1
-  };
-};
-
-template <>
-struct type_casting_traits<int, float> {
-  enum {
-    VectorizedCast = 0,
-    SrcCoeffRatio = 1,
-    TgtCoeffRatio = 1
-  };
-};
-
-
 #ifndef EIGEN_VECTORIZE_AVX512
-
 template <>
 struct type_casting_traits<Eigen::half, float> {
   enum {
@@ -76,8 +54,17 @@
   };
 };
 
+template <>
+struct type_casting_traits<float, bool> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 2,
+    TgtCoeffRatio = 1
+  };
+};
 #endif  // EIGEN_VECTORIZE_AVX512
 
+
 template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
   return _mm256_cvttps_epi32(a);
 }
@@ -86,6 +73,40 @@
   return _mm256_cvtepi32_ps(a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d& a, const Packet4d& b) {
+  return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
+                                                         const Packet8f& b) {
+  __m256 nonzero_a = _mm256_cmp_ps(a, pzero(a), _CMP_NEQ_UQ);
+  __m256 nonzero_b = _mm256_cmp_ps(b, pzero(b), _CMP_NEQ_UQ);
+  constexpr char kFF = '\255';
+#ifndef EIGEN_VECTORIZE_AVX2
+  __m128i shuffle_mask128_a_lo = _mm_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0);
+  __m128i shuffle_mask128_a_hi = _mm_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF);
+  __m128i shuffle_mask128_b_lo = _mm_set_epi8(kFF, kFF, kFF, kFF, 12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
+  __m128i shuffle_mask128_b_hi = _mm_set_epi8(12, 8, 4, 0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
+  __m128i a_hi = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_a), 1), shuffle_mask128_a_hi);
+  __m128i a_lo = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_a), 0), shuffle_mask128_a_lo);
+  __m128i b_hi = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_b), 1), shuffle_mask128_b_hi);
+  __m128i b_lo = _mm_shuffle_epi8(_mm256_extractf128_si256(_mm256_castps_si256(nonzero_b), 0), shuffle_mask128_b_lo);
+  __m128i merged = _mm_or_si128(_mm_or_si128(b_lo, b_hi), _mm_or_si128(a_lo, a_hi));
+  return _mm_and_si128(merged, _mm_set1_epi8(1));
+ #else
+  __m256i a_shuffle_mask = _mm256_set_epi8(kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF,  12,   8,   4,   0, kFF, kFF, kFF, kFF,
+                                           kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF,  12,   8,   4,   0);
+  __m256i b_shuffle_mask = _mm256_set_epi8( 12,   8,   4,   0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF,
+                                           kFF, kFF, kFF, kFF,  12,   8,   4,   0, kFF, kFF, kFF, kFF, kFF, kFF, kFF, kFF);
+  __m256i a_shuff = _mm256_shuffle_epi8(_mm256_castps_si256(nonzero_a), a_shuffle_mask);
+  __m256i b_shuff = _mm256_shuffle_epi8(_mm256_castps_si256(nonzero_b), b_shuffle_mask);
+  __m256i a_or_b = _mm256_or_si256(a_shuff, b_shuff);
+  __m256i merged = _mm256_or_si256(a_or_b, _mm256_castsi128_si256(_mm256_extractf128_si256(a_or_b, 1)));
+  return _mm256_castsi256_si128(_mm256_and_si256(merged, _mm256_set1_epi8(1)));
+#endif
+}
+
 template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {
   return _mm256_castps_si256(a);
 }
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 98b55ea..543f424 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -353,7 +353,7 @@
 }
 template <>
 EIGEN_STRONG_INLINE Packet16i pnegate(const Packet16i& a) {
-  return _mm512_sub_epi32(_mm512_set1_epi32(0), a);
+  return _mm512_sub_epi32(_mm512_setzero_si512(), a);
 }
 
 template <>
@@ -581,65 +581,71 @@
 }
 
 template <>
+EIGEN_STRONG_INLINE Packet16f pisnan(const Packet16f& a) {
+  __mmask16 mask = _mm512_cmp_ps_mask(a, a, _CMP_UNORD_Q);
+  return _mm512_castsi512_ps(_mm512_maskz_set1_epi32(mask, 0xffffffffu));
+}
+
+template <>
 EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) {
   __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
   return _mm512_castsi512_ps(
-      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
+      _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
 }
 template<> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) {
   __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ);
   return _mm512_castsi512_ps(
-      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
+      _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
 }
 
 template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) {
   __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
   return _mm512_castsi512_ps(
-      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
+      _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
 }
 
 template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
   __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ);
   return _mm512_castsi512_ps(
-      _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
+      _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
 }
 
 template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
   __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_EQ);
-  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
+  return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
 }
 template<> EIGEN_STRONG_INLINE Packet16i pcmp_le(const Packet16i& a, const Packet16i& b) {
   __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LE);
-  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
+  return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
 }
 template<> EIGEN_STRONG_INLINE Packet16i pcmp_lt(const Packet16i& a, const Packet16i& b) {
   __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
-  return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
+  return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {
   __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);
   return _mm512_castsi512_pd(
-      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
+      _mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pcmp_le(const Packet8d& a, const Packet8d& b) {
   __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ);
   return _mm512_castsi512_pd(
-      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
+      _mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pcmp_lt(const Packet8d& a, const Packet8d& b) {
   __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ);
   return _mm512_castsi512_pd(
-      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
+      _mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pcmp_lt_or_nan(const Packet8d& a, const Packet8d& b) {
   __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ);
   return _mm512_castsi512_pd(
-      _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
+      _mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
 }
 
 template<> EIGEN_STRONG_INLINE Packet16f print<Packet16f>(const Packet16f& a) { return _mm512_roundscale_ps(a, _MM_FROUND_CUR_DIRECTION); }
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h
index 62a7429..60f49a3 100644
--- a/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -16,6 +16,33 @@
 
 namespace internal {
 
+template <>
+struct type_casting_traits<float, bool> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+struct type_casting_traits<bool, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet16b pcast<Packet16f, Packet16b>(const Packet16f& a) {
+  __mmask16 mask = _mm512_cmpneq_ps_mask(a, pzero(a));
+  return _mm512_maskz_cvtepi32_epi8(mask, _mm512_set1_epi32(1));
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16b, Packet16f>(const Packet16b& a) {
+  return _mm512_cvtepi32_ps(_mm512_and_si512(_mm512_cvtepi8_epi32(a), _mm512_set1_epi32(1)));
+}
+
 template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
   return _mm512_cvttps_epi32(a);
 }
@@ -24,6 +51,10 @@
   return _mm512_cvtepi32_ps(a);
 }
 
+template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet8d& a, const Packet8d& b) {
+  return  cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
+}
+
 template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
   return _mm512_castps_si512(a);
 }
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 21de0ac..be21532 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -1124,7 +1124,7 @@
   Packet imag_inf_result;
   imag_inf_result.v = por(pand(cst_pos_inf, real_mask), pandnot(a.v, real_mask));
   // unless otherwise specified, if either the real or imaginary component is nan, the entire result is nan
-  Packet result_is_nan = pandnot(ptrue(result), pcmp_eq(result, result));
+  Packet result_is_nan = pisnan(result);
   result = por(result_is_nan, result);
 
   return pselect(is_imag_inf, imag_inf_result, pselect(is_real_inf, real_inf_result, result));
@@ -1796,7 +1796,7 @@
   const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
   const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg);
   const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg);
-  const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));
+  const Packet x_is_nan = pisnan(x);
 
   // Predicates for sign and magnitude of y.
   const Packet abs_y = pabs(y);
@@ -1804,7 +1804,7 @@
   const Packet abs_y_is_zero = pcmp_eq(abs_y, cst_zero);
   const Packet y_is_neg = pcmp_lt(y, cst_zero);
   const Packet y_is_pos = pandnot(ptrue(y), por(abs_y_is_zero, y_is_neg));
-  const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
+  const Packet y_is_nan = pisnan(y);
   const Packet abs_y_is_inf = pcmp_eq(abs_y, cst_pos_inf);
   EIGEN_CONSTEXPR Scalar huge_exponent =
       (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) / NumTraits<Scalar>::epsilon();
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index e31f717..fac0219 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -150,7 +150,7 @@
   return shuffle2<true>(a,b,eigen_neon_shuffle_mask(2, 2, 3, 3));
 }
 #define vec4f_duplane(a, p) \
-  vdupq_lane_f32(vget_low_f32(a), p)
+  Packet4f(vdupq_lane_f32(vget_low_f32(a), p))
 
 #define EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@@ -3773,7 +3773,7 @@
   return shuffle(a, b, 3);
 }
 #define vec2d_duplane(a, p) \
-  vdupq_laneq_f64(a, p)
+  Packet2d(vdupq_laneq_f64(a, p))
 
 template<> struct packet_traits<double>  : default_packet_traits
 {
diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
index a6346ea..2ab0943 100644
--- a/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -18,6 +18,15 @@
 
 #ifndef EIGEN_VECTORIZE_AVX
 template <>
+struct type_casting_traits<float, bool> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 4,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
 struct type_casting_traits<float, int> {
   enum {
     VectorizedCast = 1,
@@ -36,6 +45,16 @@
 };
 
 template <>
+struct type_casting_traits<float, double> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 2
+  };
+};
+#endif
+
+template <>
 struct type_casting_traits<double, float> {
   enum {
     VectorizedCast = 1,
@@ -45,14 +64,20 @@
 };
 
 template <>
-struct type_casting_traits<float, double> {
-  enum {
-    VectorizedCast = 1,
-    SrcCoeffRatio = 1,
-    TgtCoeffRatio = 2
-  };
-};
-#endif
+EIGEN_STRONG_INLINE Packet16b pcast<Packet4f, Packet16b>(const Packet4f& a,
+                                                         const Packet4f& b,
+                                                         const Packet4f& c,
+                                                         const Packet4f& d) {
+  __m128 zero = pzero(a);
+  __m128 nonzero_a = _mm_cmpneq_ps(a, zero);
+  __m128 nonzero_b = _mm_cmpneq_ps(b, zero);
+  __m128 nonzero_c = _mm_cmpneq_ps(c, zero);
+  __m128 nonzero_d = _mm_cmpneq_ps(d, zero);
+  __m128i ab_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_a), _mm_castps_si128(nonzero_b));
+  __m128i cd_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_c), _mm_castps_si128(nonzero_d));
+  __m128i merged = _mm_packs_epi16(ab_bytes, cd_bytes);
+  return _mm_and_si128(merged, _mm_set1_epi8(1));
+}
 
 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
   return _mm_cvttps_epi32(a);
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index e909f13..8354c0a 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -859,22 +859,39 @@
   * \brief Template functor to compute whether a scalar is NaN
   * \sa class CwiseUnaryOp, ArrayBase::isnan()
   */
-template<typename Scalar> struct scalar_isnan_op {
-  typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
+template<typename Scalar, bool UseTypedPredicate=false>
+struct scalar_isnan_op {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const Scalar& a) const {
 #if defined(SYCL_DEVICE_ONLY)
     return numext::isnan(a);
 #else
-    return (numext::isnan)(a);
+    return numext::isnan EIGEN_NOT_A_MACRO (a);
 #endif
   }
 };
+
+
 template<typename Scalar>
-struct functor_traits<scalar_isnan_op<Scalar> >
+struct scalar_isnan_op<Scalar, true> {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const {
+#if defined(SYCL_DEVICE_ONLY)
+    return (numext::isnan(a) ? ptrue(a) : pzero(a));
+#else
+    return (numext::isnan EIGEN_NOT_A_MACRO (a)  ? ptrue(a) : pzero(a));
+#endif
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+    return pisnan(a);
+  }
+};
+
+template<typename Scalar, bool UseTypedPredicate>
+struct functor_traits<scalar_isnan_op<Scalar, UseTypedPredicate> >
 {
   enum {
     Cost = NumTraits<Scalar>::MulCost,
-    PacketAccess = false
+    PacketAccess = packet_traits<Scalar>::HasCmp && UseTypedPredicate
   };
 };
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 38bddac..9f2a53d 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -311,8 +311,8 @@
       this->m_blockA = m_staticA;
       this->m_blockB = m_staticB;
 #else
-      this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
-      this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+      this->m_blockA = reinterpret_cast<LhsScalar*>((std::uintptr_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+      this->m_blockB = reinterpret_cast<RhsScalar*>((std::uintptr_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
 #endif
     }
 
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 979d974..41fbe15 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -66,7 +66,7 @@
 
   template <typename Packet>
   EIGEN_DEVICE_FUNC bool aligned(Index i) const {
-    return (UIntPtr(m_data+i)%sizeof(Packet))==0;
+    return (std::uintptr_t(m_data+i)%sizeof(Packet))==0;
   }
 
   protected:
@@ -253,7 +253,7 @@
   EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
 
   EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
-    if (UIntPtr(m_data)%sizeof(Scalar)) {
+    if (std::uintptr_t(m_data)%sizeof(Scalar)) {
       return -1;
     }
     return internal::first_default_aligned(m_data, size);
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 0175087..67a6976 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -136,7 +136,7 @@
   * Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable.
   * This rules out read-only expressions.
   *
-  * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note
+  * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but not
   * the other:
   *   \li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit
   *   \li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index dd94f88..6a176de 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -351,7 +351,7 @@
 
 /// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE
 /// compliant Arm fp16 type
-#if EIGEN_ARCH_ARM64
+#if EIGEN_ARCH_ARM_OR_ARM64
   #ifndef EIGEN_HAS_ARM64_FP16
     #if defined(__ARM_FP16_FORMAT_IEEE)
       #define EIGEN_HAS_ARM64_FP16 1
@@ -363,7 +363,7 @@
 
 /// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture
 /// supports Neon vector intrinsics for fp16.
-#if EIGEN_ARCH_ARM64
+#if EIGEN_ARCH_ARM_OR_ARM64
   #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
     #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
       #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1
@@ -375,7 +375,7 @@
 
 /// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture
 /// supports Neon scalar intrinsics for fp16.
-#if EIGEN_ARCH_ARM64
+#if EIGEN_ARCH_ARM_OR_ARM64
   #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
     #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
       #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index a39c718..13d029c 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -541,7 +541,7 @@
     // so that all elements of the array have the same alignment.
     return 0;
   }
-  else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
+  else if( (std::uintptr_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
   {
     // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
     // Consequently, no element of the array is well aligned.
@@ -549,7 +549,7 @@
   }
   else
   {
-    Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
+    Index first = (AlignmentSize - (Index((std::uintptr_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
     return (first < size) ? first : size;
   }
 }
@@ -583,7 +583,7 @@
 template<typename T> struct smart_copy_helper<T,true> {
   EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
   {
-    IntPtr size = IntPtr(end)-IntPtr(start);
+    std::intptr_t size = std::intptr_t(end)-std::intptr_t(start);
     if(size==0) return;
     eigen_internal_assert(start!=0 && end!=0 && target!=0);
     EIGEN_USING_STD(memcpy)
@@ -607,7 +607,7 @@
 template<typename T> struct smart_memmove_helper<T,true> {
   static inline void run(const T* start, const T* end, T* target)
   {
-    IntPtr size = IntPtr(end)-IntPtr(start);
+    std::intptr_t size = std::intptr_t(end)-std::intptr_t(start);
     if(size==0) return;
     eigen_internal_assert(start!=0 && end!=0 && target!=0);
     std::memmove(target, start, size);
@@ -617,7 +617,7 @@
 template<typename T> struct smart_memmove_helper<T,false> {
   static inline void run(const T* start, const T* end, T* target)
   {
-    if (UIntPtr(target) < UIntPtr(start))
+    if (std::uintptr_t(target) < std::uintptr_t(start))
     {
       std::copy(start, end, target);
     }
@@ -799,7 +799,7 @@
   #if EIGEN_DEFAULT_ALIGN_BYTES>0
     // We always manually re-align the result of EIGEN_ALLOCA.
     // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
-    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
+    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((std::uintptr_t(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
   #else
     #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
   #endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 740b10f..a192aac 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -27,9 +27,6 @@
 
 #endif
 
-// Recent versions of ICC require <cstdint> for pointer types below.
-#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600)
-
 // Define portable (u)int{32,64} types
 #include <cstdint>
 
@@ -93,17 +90,6 @@
   * we however don't want to add a dependency to Boost.
   */
 
-// Only recent versions of ICC complain about using ptrdiff_t to hold pointers,
-// and older versions do not provide *intptr_t types.
-#if EIGEN_ICC_NEEDS_CSTDINT
-typedef std::intptr_t  IntPtr;
-typedef std::uintptr_t UIntPtr;
-#else
-typedef std::ptrdiff_t IntPtr;
-typedef std::size_t UIntPtr;
-#endif
-#undef EIGEN_ICC_NEEDS_CSTDINT
-
 struct true_type {  enum { value = 1 }; };
 struct false_type { enum { value = 0 }; };
 
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 0d6e114..bea09cb 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -135,7 +135,7 @@
     using Base::operator+=;
     using Base::operator-=;
 
-    typedef Eigen::Map<SparseMatrix<Scalar,Flags,StorageIndex>> Map;
+    typedef Eigen::Map<SparseMatrix<Scalar,Options_,StorageIndex>> Map;
     typedef Diagonal<SparseMatrix> DiagonalReturnType;
     typedef Diagonal<const SparseMatrix> ConstDiagonalReturnType;
     typedef typename Base::InnerIterator InnerIterator;
@@ -1107,15 +1107,18 @@
   ei_declare_aligned_stack_constructed_variable(StorageIndex, tmp, numext::maxi(mat.innerSize(), mat.outerSize()), 0);
   // scan triplets to determine allocation size before constructing matrix
   IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1);
+  Index nonZeros = 0;
   for (InputIterator it(begin); it != end; ++it) {
     eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols());
-    StorageIndex j = static_cast<StorageIndex>(IsRowMajor ? it->row() : it->col());
+    StorageIndex j = convert_index<StorageIndex>(IsRowMajor ? it->row() : it->col());
     outerIndexMap.coeffRef(j + 1)++;
+    if (nonZeros == NumTraits<StorageIndex>::highest()) internal::throw_std_bad_alloc();
+    nonZeros++;
   }
 
   // finalize outer indices and allocate memory
   std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin());
-  Index nonZeros = mat.outerIndexPtr()[mat.outerSize()];
+  eigen_assert(nonZeros == mat.outerIndexPtr()[mat.outerSize()]);
   mat.resizeNonZeros(nonZeros);
 
   // use tmp to track nonzero insertions
@@ -1124,8 +1127,8 @@
 
   // push triplets to back of each inner vector
   for (InputIterator it(begin); it != end; ++it) {
-    StorageIndex j = static_cast<StorageIndex>(IsRowMajor ? it->row() : it->col());
-    StorageIndex i = static_cast<StorageIndex>(IsRowMajor ? it->col() : it->row());
+    StorageIndex j = convert_index<StorageIndex>(IsRowMajor ? it->row() : it->col());
+    StorageIndex i = convert_index<StorageIndex>(IsRowMajor ? it->col() : it->row());
     mat.data().index(back.coeff(j)) = i;
     mat.data().value(back.coeff(j)) = it->value();
     back.coeffRef(j)++;
@@ -1154,29 +1157,34 @@
   StorageIndex previous_i = kEmptyIndexValue;
   // scan triplets to determine allocation size before constructing matrix
   IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1);
+  Index nonZeros = 0;
   for (InputIterator it(begin); it != end; ++it) {
     eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols());
-    StorageIndex j = IsRowMajor ? it->row() : it->col();
-    StorageIndex i = IsRowMajor ? it->col() : it->row();
+    StorageIndex j = convert_index<StorageIndex>(IsRowMajor ? it->row() : it->col());
+    StorageIndex i = convert_index<StorageIndex>(IsRowMajor ? it->col() : it->row());
     eigen_assert(j > previous_j || (j == previous_j && i >= previous_i));
     // identify duplicates by examining previous location
     bool duplicate = (previous_j == j) && (previous_i == i);
-    if (!duplicate) outerIndexMap.coeffRef(j + 1)++;
+    if (!duplicate) {
+      outerIndexMap.coeffRef(j + 1)++;
+      if (nonZeros == NumTraits<StorageIndex>::highest()) internal::throw_std_bad_alloc();
+      nonZeros++;
+    }
     previous_j = j;
     previous_i = i;
   }
   
   // finalize outer indices and allocate memory
   std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin());
-  Index nonZeros = mat.outerIndexPtr()[mat.outerSize()];
+  eigen_assert(nonZeros == mat.outerIndexPtr()[mat.outerSize()]);
   mat.resizeNonZeros(nonZeros);
 
   previous_i = kEmptyIndexValue;
   previous_j = kEmptyIndexValue;
   Index back = 0;
   for (InputIterator it(begin); it != end; ++it) {
-    StorageIndex j = IsRowMajor ? it->row() : it->col();
-    StorageIndex i = IsRowMajor ? it->col() : it->row();
+    StorageIndex j = convert_index<StorageIndex>(IsRowMajor ? it->row() : it->col());
+    StorageIndex i = convert_index<StorageIndex>(IsRowMajor ? it->col() : it->row());
     bool duplicate = (previous_j == j) && (previous_i == i);
     if (duplicate) {
       mat.data().value(back - 1) = dup_func(mat.data().value(back - 1), it->value());
diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h
index 25215b1..0170eef 100644
--- a/Eigen/src/misc/blas.h
+++ b/Eigen/src/misc/blas.h
@@ -1,440 +1,66 @@
-#ifndef BLAS_H
-#define BLAS_H
+#ifndef EIGEN_MISC_BLAS_H
+#define EIGEN_MISC_BLAS_H
 
-#ifdef __cplusplus
-extern "C"
-{
-#endif
+extern "C" {
 
 #define BLASFUNC(FUNC) FUNC##_
 
-#ifdef __WIN64__
-typedef long long BLASLONG;
-typedef unsigned long long BLASULONG;
-#else
-typedef long BLASLONG;
-typedef unsigned long BLASULONG;
-#endif
+/* Level 1 routines */
 
-int    BLASFUNC(xerbla)(const char *, int *info, int);
-
-float  BLASFUNC(sdot)  (int *, float  *, int *, float  *, int *);
-float  BLASFUNC(sdsdot)(int *, float  *,        float  *, int *, float  *, int *);
-
-double BLASFUNC(dsdot) (int *, float  *, int *, float  *, int *);
-double BLASFUNC(ddot)  (int *, double *, int *, double *, int *);
-double BLASFUNC(qdot)  (int *, double *, int *, double *, int *);
-
-int  BLASFUNC(cdotuw)  (int *, float  *, int *, float  *, int *, float*);
-int  BLASFUNC(cdotcw)  (int *, float  *, int *, float  *, int *, float*);
-int  BLASFUNC(zdotuw)  (int *, double  *, int *, double  *, int *, double*);
-int  BLASFUNC(zdotcw)  (int *, double  *, int *, double  *, int *, double*);
-
-int    BLASFUNC(saxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
-int    BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);
-int    BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
-int    BLASFUNC(caxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
-int    BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
-int    BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
-int    BLASFUNC(caxpyc)(const int *, const float  *, const float  *, const int *, float  *, const int *);
-int    BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
-int    BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
-
-int    BLASFUNC(scopy) (int *, float  *, int *, float  *, int *);
-int    BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
-int    BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
-int    BLASFUNC(ccopy) (int *, float  *, int *, float  *, int *);
-int    BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
-int    BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
-
-int    BLASFUNC(sswap) (int *, float  *, int *, float  *, int *);
-int    BLASFUNC(dswap) (int *, double *, int *, double *, int *);
-int    BLASFUNC(qswap) (int *, double *, int *, double *, int *);
-int    BLASFUNC(cswap) (int *, float  *, int *, float  *, int *);
-int    BLASFUNC(zswap) (int *, double *, int *, double *, int *);
-int    BLASFUNC(xswap) (int *, double *, int *, double *, int *);
-
-float  BLASFUNC(sasum) (int *, float  *, int *);
-float  BLASFUNC(scasum)(int *, float  *, int *);
-double BLASFUNC(dasum) (int *, double *, int *);
-double BLASFUNC(qasum) (int *, double *, int *);
-double BLASFUNC(dzasum)(int *, double *, int *);
-double BLASFUNC(qxasum)(int *, double *, int *);
-
-int    BLASFUNC(isamax)(int *, float  *, int *);
-int    BLASFUNC(idamax)(int *, double *, int *);
-int    BLASFUNC(iqamax)(int *, double *, int *);
-int    BLASFUNC(icamax)(int *, float  *, int *);
-int    BLASFUNC(izamax)(int *, double *, int *);
-int    BLASFUNC(ixamax)(int *, double *, int *);
-
-int    BLASFUNC(ismax) (int *, float  *, int *);
-int    BLASFUNC(idmax) (int *, double *, int *);
-int    BLASFUNC(iqmax) (int *, double *, int *);
-int    BLASFUNC(icmax) (int *, float  *, int *);
-int    BLASFUNC(izmax) (int *, double *, int *);
-int    BLASFUNC(ixmax) (int *, double *, int *);
-
-int    BLASFUNC(isamin)(int *, float  *, int *);
-int    BLASFUNC(idamin)(int *, double *, int *);
-int    BLASFUNC(iqamin)(int *, double *, int *);
-int    BLASFUNC(icamin)(int *, float  *, int *);
-int    BLASFUNC(izamin)(int *, double *, int *);
-int    BLASFUNC(ixamin)(int *, double *, int *);
-
-int    BLASFUNC(ismin)(int *, float  *, int *);
-int    BLASFUNC(idmin)(int *, double *, int *);
-int    BLASFUNC(iqmin)(int *, double *, int *);
-int    BLASFUNC(icmin)(int *, float  *, int *);
-int    BLASFUNC(izmin)(int *, double *, int *);
-int    BLASFUNC(ixmin)(int *, double *, int *);
-
-float  BLASFUNC(samax) (int *, float  *, int *);
-double BLASFUNC(damax) (int *, double *, int *);
-double BLASFUNC(qamax) (int *, double *, int *);
-float  BLASFUNC(scamax)(int *, float  *, int *);
-double BLASFUNC(dzamax)(int *, double *, int *);
-double BLASFUNC(qxamax)(int *, double *, int *);
-
-float  BLASFUNC(samin) (int *, float  *, int *);
-double BLASFUNC(damin) (int *, double *, int *);
-double BLASFUNC(qamin) (int *, double *, int *);
-float  BLASFUNC(scamin)(int *, float  *, int *);
-double BLASFUNC(dzamin)(int *, double *, int *);
-double BLASFUNC(qxamin)(int *, double *, int *);
-
-float  BLASFUNC(smax)  (int *, float  *, int *);
-double BLASFUNC(dmax)  (int *, double *, int *);
-double BLASFUNC(qmax)  (int *, double *, int *);
-float  BLASFUNC(scmax) (int *, float  *, int *);
-double BLASFUNC(dzmax) (int *, double *, int *);
-double BLASFUNC(qxmax) (int *, double *, int *);
-
-float  BLASFUNC(smin)  (int *, float  *, int *);
-double BLASFUNC(dmin)  (int *, double *, int *);
-double BLASFUNC(qmin)  (int *, double *, int *);
-float  BLASFUNC(scmin) (int *, float  *, int *);
-double BLASFUNC(dzmin) (int *, double *, int *);
-double BLASFUNC(qxmin) (int *, double *, int *);
-
-int    BLASFUNC(sscal) (int *,  float  *, float  *, int *);
-int    BLASFUNC(dscal) (int *,  double *, double *, int *);
-int    BLASFUNC(qscal) (int *,  double *, double *, int *);
-int    BLASFUNC(cscal) (int *,  float  *, float  *, int *);
-int    BLASFUNC(zscal) (int *,  double *, double *, int *);
-int    BLASFUNC(xscal) (int *,  double *, double *, int *);
-int    BLASFUNC(csscal)(int *,  float  *, float  *, int *);
-int    BLASFUNC(zdscal)(int *,  double *, double *, int *);
-int    BLASFUNC(xqscal)(int *,  double *, double *, int *);
-
-float  BLASFUNC(snrm2) (int *, float  *, int *);
-float  BLASFUNC(scnrm2)(int *, float  *, int *);
-
-double BLASFUNC(dnrm2) (int *, double *, int *);
-double BLASFUNC(qnrm2) (int *, double *, int *);
-double BLASFUNC(dznrm2)(int *, double *, int *);
-double BLASFUNC(qxnrm2)(int *, double *, int *);
-
-int    BLASFUNC(srot)  (int *, float  *, int *, float  *, int *, float  *, float  *);
-int    BLASFUNC(drot)  (int *, double *, int *, double *, int *, double *, double *);
-int    BLASFUNC(qrot)  (int *, double *, int *, double *, int *, double *, double *);
-int    BLASFUNC(csrot) (int *, float  *, int *, float  *, int *, float  *, float  *);
-int    BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
-int    BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
-
-int    BLASFUNC(srotg) (float  *, float  *, float  *, float  *);
-int    BLASFUNC(drotg) (double *, double *, double *, double *);
-int    BLASFUNC(qrotg) (double *, double *, double *, double *);
-int    BLASFUNC(crotg) (float  *, float  *, float  *, float  *);
-int    BLASFUNC(zrotg) (double *, double *, double *, double *);
-int    BLASFUNC(xrotg) (double *, double *, double *, double *);
-
-int    BLASFUNC(srotmg)(float  *, float  *, float  *, float  *, float  *);
-int    BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
-
-int    BLASFUNC(srotm) (int *, float  *, int *, float  *, int *, float  *);
-int    BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
-int    BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
+int BLASFUNC(saxpy)(const int *, const float  *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(caxpy)(const int *, const float  *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
 
 /* Level 2 routines */
 
-int BLASFUNC(sger)(int *,    int *, float *,  float *, int *,
-		   float *,  int *, float *,  int *);
-int BLASFUNC(dger)(int *,    int *, double *, double *, int *,
-		   double *, int *, double *, int *);
-int BLASFUNC(qger)(int *,    int *, double *, double *, int *,
-		   double *, int *, double *, int *);
-int BLASFUNC(cgeru)(int *,    int *, float *,  float *, int *,
-		    float *,  int *, float *,  int *);
-int BLASFUNC(cgerc)(int *,    int *, float *,  float *, int *,
-		    float *,  int *, float *,  int *);
-int BLASFUNC(zgeru)(int *,    int *, double *, double *, int *,
-		    double *, int *, double *, int *);
-int BLASFUNC(zgerc)(int *,    int *, double *, double *, int *,
-		    double *, int *, double *, int *);
-int BLASFUNC(xgeru)(int *,    int *, double *, double *, int *,
-		    double *, int *, double *, int *);
-int BLASFUNC(xgerc)(int *,    int *, double *, double *, int *,
-		    double *, int *, double *, int *);
-
 int BLASFUNC(sgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
 int BLASFUNC(cgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
 
-int BLASFUNC(stpsv) (char *, char *, char *, int *, float  *, float  *, int *);
-int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(ctpsv) (char *, char *, char *, int *, float  *, float  *, int *);
-int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ssymv)(const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
-
-int BLASFUNC(stpmv) (char *, char *, char *, int *, float  *, float  *, int *);
-int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(ctpmv) (char *, char *, char *, int *, float  *, float  *, int *);
-int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
-
-int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
-int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
-int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-
-int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
-int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
-int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
-
-int BLASFUNC(ssymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(sspmv) (char *, int *, float  *, float *,
-		     float  *, int *, float *, float *, int *);
-int BLASFUNC(dspmv) (char *, int *, double  *, double *,
-		     double  *, int *, double *, double *, int *);
-int BLASFUNC(qspmv) (char *, int *, double  *, double *,
-		     double  *, int *, double *, double *, int *);
-
-int BLASFUNC(ssyr) (const char *, const int *, const float   *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(dsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
-int BLASFUNC(qsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
-
-int BLASFUNC(ssyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(dsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(qsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(csyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
-int BLASFUNC(zsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
-int BLASFUNC(xsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
-
-int BLASFUNC(sspr) (char *, int *, float   *, float  *, int *,
-		    float  *);
-int BLASFUNC(dspr) (char *, int *, double  *, double *, int *,
-		    double *);
-int BLASFUNC(qspr) (char *, int *, double  *, double *, int *,
-		    double *);
-
-int BLASFUNC(sspr2) (char *, int *, float   *,
-		     float  *, int *, float  *, int *, float  *);
-int BLASFUNC(dspr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-int BLASFUNC(qspr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-int BLASFUNC(cspr2) (char *, int *, float   *,
-		     float  *, int *, float  *, int *, float  *);
-int BLASFUNC(zspr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-int BLASFUNC(xspr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-
-int BLASFUNC(cher) (char *, int *, float   *, float  *, int *,
-		    float  *, int *);
-int BLASFUNC(zher) (char *, int *, double  *, double *, int *,
-		    double *, int *);
-int BLASFUNC(xher) (char *, int *, double  *, double *, int *,
-		    double *, int *);
-
-int BLASFUNC(chpr) (char *, int *, float   *, float  *, int *, float  *);
-int BLASFUNC(zhpr) (char *, int *, double  *, double *, int *, double *);
-int BLASFUNC(xhpr) (char *, int *, double  *, double *, int *, double *);
-
-int BLASFUNC(cher2) (char *, int *, float   *,
-		     float  *, int *, float  *, int *, float  *, int *);
-int BLASFUNC(zher2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xher2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *, int *);
-
-int BLASFUNC(chpr2) (char *, int *, float   *,
-		     float  *, int *, float  *, int *, float  *);
-int BLASFUNC(zhpr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-int BLASFUNC(xhpr2) (char *, int *, double  *,
-		     double *, int *, double *, int *, double *);
-
-int BLASFUNC(chemv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(chpmv) (char *, int *, float  *, float *,
-		     float  *, int *, float *, float *, int *);
-int BLASFUNC(zhpmv) (char *, int *, double  *, double *,
-		     double  *, int *, double *, double *, int *);
-int BLASFUNC(xhpmv) (char *, int *, double  *, double *,
-		     double  *, int *, double *, double *, int *);
-
-int BLASFUNC(snorm)(char *, int *, int *, float  *, int *);
-int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
-int BLASFUNC(cnorm)(char *, int *, int *, float  *, int *);
-int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
-
-int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
-		    float  *, int *, float  *, float  *, int *);
-int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
-		    float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-
-int BLASFUNC(ssbmv)(char *, int *, int *, float  *, float  *, int *,
-		    float  *, int *, float  *, float  *, int *);
-int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(csbmv)(char *, int *, int *, float  *, float  *, int *,
-		    float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-
-int BLASFUNC(chbmv)(char *, int *, int *, float  *, float  *, int *,
-		    float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
-int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
-		    double *, int *, double *, double *, int *);
+int BLASFUNC(chemv)(const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
 
 /* Level 3 routines */
 
 int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
 int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
-	   float  *, int *, float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
-	   double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
-	   double *, int *, double *, int *, double *, double *, int *);
-
-int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
-		     float *, float  *, int *, float  *, int *,
-		     float *, float  *, int *);
-int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
-		     double *, double  *, int *, double  *, int *,
-		     double *, double  *, int *);
-int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
-		     float *, float  *, int *, float  *, int *,
-		     float *, float  *, int *);
-int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
-		     double *, double  *, int *, double  *, int *,
-		     double *, double  *, int *);
 
 int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
 int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
 int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
 int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
 
 int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
 int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
 int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
 int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
 
 int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(csymm3m)(char *, char *, int *, int *, float  *, float  *, int *, float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
 
 int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
 
 int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(chemm3m)(char *, char *, int *, int *, float  *, float  *, int *,
-	   float  *, int *, float  *, float  *, int *);
-int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
-	   double *, int *, double *, double *, int *);
-int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
-	   double *, int *, double *, double *, int *);
 
 int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
 int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
-int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
-int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+#undef BLASFUNC
 
-
-#ifdef __cplusplus
 }
-#endif
 
 #endif
diff --git a/blas/blas.h b/blas/blas.h
new file mode 100644
index 0000000..25215b1
--- /dev/null
+++ b/blas/blas.h
@@ -0,0 +1,440 @@
+#ifndef BLAS_H
+#define BLAS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define BLASFUNC(FUNC) FUNC##_
+
+#ifdef __WIN64__
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+int    BLASFUNC(xerbla)(const char *, int *info, int);
+
+float  BLASFUNC(sdot)  (int *, float  *, int *, float  *, int *);
+float  BLASFUNC(sdsdot)(int *, float  *,        float  *, int *, float  *, int *);
+
+double BLASFUNC(dsdot) (int *, float  *, int *, float  *, int *);
+double BLASFUNC(ddot)  (int *, double *, int *, double *, int *);
+double BLASFUNC(qdot)  (int *, double *, int *, double *, int *);
+
+int  BLASFUNC(cdotuw)  (int *, float  *, int *, float  *, int *, float*);
+int  BLASFUNC(cdotcw)  (int *, float  *, int *, float  *, int *, float*);
+int  BLASFUNC(zdotuw)  (int *, double  *, int *, double  *, int *, double*);
+int  BLASFUNC(zdotcw)  (int *, double  *, int *, double  *, int *, double*);
+
+int    BLASFUNC(saxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(caxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(caxpyc)(const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+
+int    BLASFUNC(scopy) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(ccopy) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
+
+int    BLASFUNC(sswap) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(dswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(qswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(cswap) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(zswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(xswap) (int *, double *, int *, double *, int *);
+
+float  BLASFUNC(sasum) (int *, float  *, int *);
+float  BLASFUNC(scasum)(int *, float  *, int *);
+double BLASFUNC(dasum) (int *, double *, int *);
+double BLASFUNC(qasum) (int *, double *, int *);
+double BLASFUNC(dzasum)(int *, double *, int *);
+double BLASFUNC(qxasum)(int *, double *, int *);
+
+int    BLASFUNC(isamax)(int *, float  *, int *);
+int    BLASFUNC(idamax)(int *, double *, int *);
+int    BLASFUNC(iqamax)(int *, double *, int *);
+int    BLASFUNC(icamax)(int *, float  *, int *);
+int    BLASFUNC(izamax)(int *, double *, int *);
+int    BLASFUNC(ixamax)(int *, double *, int *);
+
+int    BLASFUNC(ismax) (int *, float  *, int *);
+int    BLASFUNC(idmax) (int *, double *, int *);
+int    BLASFUNC(iqmax) (int *, double *, int *);
+int    BLASFUNC(icmax) (int *, float  *, int *);
+int    BLASFUNC(izmax) (int *, double *, int *);
+int    BLASFUNC(ixmax) (int *, double *, int *);
+
+int    BLASFUNC(isamin)(int *, float  *, int *);
+int    BLASFUNC(idamin)(int *, double *, int *);
+int    BLASFUNC(iqamin)(int *, double *, int *);
+int    BLASFUNC(icamin)(int *, float  *, int *);
+int    BLASFUNC(izamin)(int *, double *, int *);
+int    BLASFUNC(ixamin)(int *, double *, int *);
+
+int    BLASFUNC(ismin)(int *, float  *, int *);
+int    BLASFUNC(idmin)(int *, double *, int *);
+int    BLASFUNC(iqmin)(int *, double *, int *);
+int    BLASFUNC(icmin)(int *, float  *, int *);
+int    BLASFUNC(izmin)(int *, double *, int *);
+int    BLASFUNC(ixmin)(int *, double *, int *);
+
+float  BLASFUNC(samax) (int *, float  *, int *);
+double BLASFUNC(damax) (int *, double *, int *);
+double BLASFUNC(qamax) (int *, double *, int *);
+float  BLASFUNC(scamax)(int *, float  *, int *);
+double BLASFUNC(dzamax)(int *, double *, int *);
+double BLASFUNC(qxamax)(int *, double *, int *);
+
+float  BLASFUNC(samin) (int *, float  *, int *);
+double BLASFUNC(damin) (int *, double *, int *);
+double BLASFUNC(qamin) (int *, double *, int *);
+float  BLASFUNC(scamin)(int *, float  *, int *);
+double BLASFUNC(dzamin)(int *, double *, int *);
+double BLASFUNC(qxamin)(int *, double *, int *);
+
+float  BLASFUNC(smax)  (int *, float  *, int *);
+double BLASFUNC(dmax)  (int *, double *, int *);
+double BLASFUNC(qmax)  (int *, double *, int *);
+float  BLASFUNC(scmax) (int *, float  *, int *);
+double BLASFUNC(dzmax) (int *, double *, int *);
+double BLASFUNC(qxmax) (int *, double *, int *);
+
+float  BLASFUNC(smin)  (int *, float  *, int *);
+double BLASFUNC(dmin)  (int *, double *, int *);
+double BLASFUNC(qmin)  (int *, double *, int *);
+float  BLASFUNC(scmin) (int *, float  *, int *);
+double BLASFUNC(dzmin) (int *, double *, int *);
+double BLASFUNC(qxmin) (int *, double *, int *);
+
+int    BLASFUNC(sscal) (int *,  float  *, float  *, int *);
+int    BLASFUNC(dscal) (int *,  double *, double *, int *);
+int    BLASFUNC(qscal) (int *,  double *, double *, int *);
+int    BLASFUNC(cscal) (int *,  float  *, float  *, int *);
+int    BLASFUNC(zscal) (int *,  double *, double *, int *);
+int    BLASFUNC(xscal) (int *,  double *, double *, int *);
+int    BLASFUNC(csscal)(int *,  float  *, float  *, int *);
+int    BLASFUNC(zdscal)(int *,  double *, double *, int *);
+int    BLASFUNC(xqscal)(int *,  double *, double *, int *);
+
+float  BLASFUNC(snrm2) (int *, float  *, int *);
+float  BLASFUNC(scnrm2)(int *, float  *, int *);
+
+double BLASFUNC(dnrm2) (int *, double *, int *);
+double BLASFUNC(qnrm2) (int *, double *, int *);
+double BLASFUNC(dznrm2)(int *, double *, int *);
+double BLASFUNC(qxnrm2)(int *, double *, int *);
+
+int    BLASFUNC(srot)  (int *, float  *, int *, float  *, int *, float  *, float  *);
+int    BLASFUNC(drot)  (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(qrot)  (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(csrot) (int *, float  *, int *, float  *, int *, float  *, float  *);
+int    BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
+
+int    BLASFUNC(srotg) (float  *, float  *, float  *, float  *);
+int    BLASFUNC(drotg) (double *, double *, double *, double *);
+int    BLASFUNC(qrotg) (double *, double *, double *, double *);
+int    BLASFUNC(crotg) (float  *, float  *, float  *, float  *);
+int    BLASFUNC(zrotg) (double *, double *, double *, double *);
+int    BLASFUNC(xrotg) (double *, double *, double *, double *);
+
+int    BLASFUNC(srotmg)(float  *, float  *, float  *, float  *, float  *);
+int    BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
+
+int    BLASFUNC(srotm) (int *, float  *, int *, float  *, int *, float  *);
+int    BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
+int    BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
+
+/* Level 2 routines */
+
+int BLASFUNC(sger)(int *,    int *, float *,  float *, int *,
+		   float *,  int *, float *,  int *);
+int BLASFUNC(dger)(int *,    int *, double *, double *, int *,
+		   double *, int *, double *, int *);
+int BLASFUNC(qger)(int *,    int *, double *, double *, int *,
+		   double *, int *, double *, int *);
+int BLASFUNC(cgeru)(int *,    int *, float *,  float *, int *,
+		    float *,  int *, float *,  int *);
+int BLASFUNC(cgerc)(int *,    int *, float *,  float *, int *,
+		    float *,  int *, float *,  int *);
+int BLASFUNC(zgeru)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(zgerc)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(xgeru)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(xgerc)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+
+int BLASFUNC(sgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(stpsv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpsv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(stpmv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpmv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(ssymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(sspmv) (char *, int *, float  *, float *,
+		     float  *, int *, float *, float *, int *);
+int BLASFUNC(dspmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+int BLASFUNC(qspmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyr) (const char *, const int *, const float   *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(csyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(zsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(sspr) (char *, int *, float   *, float  *, int *,
+		    float  *);
+int BLASFUNC(dspr) (char *, int *, double  *, double *, int *,
+		    double *);
+int BLASFUNC(qspr) (char *, int *, double  *, double *, int *,
+		    double *);
+
+int BLASFUNC(sspr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(dspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(qspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(cspr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(zspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(xspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+
+int BLASFUNC(cher) (char *, int *, float   *, float  *, int *,
+		    float  *, int *);
+int BLASFUNC(zher) (char *, int *, double  *, double *, int *,
+		    double *, int *);
+int BLASFUNC(xher) (char *, int *, double  *, double *, int *,
+		    double *, int *);
+
+int BLASFUNC(chpr) (char *, int *, float   *, float  *, int *, float  *);
+int BLASFUNC(zhpr) (char *, int *, double  *, double *, int *, double *);
+int BLASFUNC(xhpr) (char *, int *, double  *, double *, int *, double *);
+
+int BLASFUNC(cher2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(zher2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *, int *);
+int BLASFUNC(xher2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(chpr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(zhpr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(xhpr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+
+int BLASFUNC(chemv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chpmv) (char *, int *, float  *, float *,
+		     float  *, int *, float *, float *, int *);
+int BLASFUNC(zhpmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+int BLASFUNC(xhpmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+
+int BLASFUNC(snorm)(char *, int *, int *, float  *, int *);
+int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
+int BLASFUNC(cnorm)(char *, int *, int *, float  *, int *);
+int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
+
+int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(csbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+int BLASFUNC(chbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+/* Level 3 routines */
+
+int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
+	   float  *, int *, float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
+	   double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
+	   double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
+		     float *, float  *, int *, float  *, int *,
+		     float *, float  *, int *);
+int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
+		     double *, double  *, int *, double  *, int *,
+		     double *, double  *, int *);
+int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
+		     float *, float  *, int *, float  *, int *,
+		     float *, float  *, int *);
+int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
+		     double *, double  *, int *, double  *, int *,
+		     double *, double  *, int *);
+
+int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(csymm3m)(char *, char *, int *, int *, float  *, float  *, int *, float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm3m)(char *, char *, int *, int *, float  *, float  *, int *,
+	   float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+	   double *, int *, double *, double *, int *);
+int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+	   double *, int *, double *, double *, int *);
+
+int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/blas/common.h b/blas/common.h
index a938cb1..0364602 100644
--- a/blas/common.h
+++ b/blas/common.h
@@ -27,7 +27,7 @@
 #error the token SCALAR must be defined to compile this file
 #endif
 
-#include "../Eigen/src/misc/blas.h"
+#include "blas.h"
 
 #define NOTR    0
 #define TR      1
diff --git a/doc/CustomizingEigen_Plugins.dox b/doc/CustomizingEigen_Plugins.dox
index d88f240..9ab0200 100644
--- a/doc/CustomizingEigen_Plugins.dox
+++ b/doc/CustomizingEigen_Plugins.dox
@@ -59,7 +59,7 @@
 { return CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); }
 \endcode
 
-Then one can the following declaration in the config.h or whatever prerequisites header file of his project:
+Then one can add the following declaration in the config.h or whatever prerequisites header file of his project:
 \code
 #define EIGEN_MATRIXBASE_PLUGIN "MatrixBaseAddons.h"
 \endcode
diff --git a/doc/FunctionsTakingEigenTypes.dox b/doc/FunctionsTakingEigenTypes.dox
index 6b4e492..3e74546 100644
--- a/doc/FunctionsTakingEigenTypes.dox
+++ b/doc/FunctionsTakingEigenTypes.dox
@@ -126,7 +126,7 @@
 MatrixXf x,y,z;
 MatrixXf C = cov(x,y+z);
 \endcode
-In this special case, the example is fine and will be working because both parameters are declared as \e const references. The compiler creates a temporary and evaluates the expression x+z into this temporary. Once the function is processed, the temporary is released and the result is assigned to C.
+In this special case, the example is fine and will be working because both parameters are declared as \e const references. The compiler creates a temporary and evaluates the expression y+z into this temporary. Once the function is processed, the temporary is released and the result is assigned to C.
 
 \b Note: Functions taking \e const references to Matrix (or Array) can process expressions at the cost of temporaries.
 
diff --git a/doc/SparseQuickReference.dox b/doc/SparseQuickReference.dox
index b8264a4..68ac2dc 100644
--- a/doc/SparseQuickReference.dox
+++ b/doc/SparseQuickReference.dox
@@ -153,7 +153,7 @@
 \code 
 perm.indices();      // Reference to the vector of indices
 sm1.twistedBy(perm); // Permute rows and columns
-sm2 = sm1 * perm;    // Permute the columns
+sm2 = sm1 * perm;    // Permute the rows
 sm2 = perm * sm1;    // Permute the columns
 \endcode 
 </td>
diff --git a/doc/examples/matrixfree_cg.cpp b/doc/examples/matrixfree_cg.cpp
index 7469938..413c9a3 100644
--- a/doc/examples/matrixfree_cg.cpp
+++ b/doc/examples/matrixfree_cg.cpp
@@ -9,7 +9,7 @@
 
 namespace Eigen {
 namespace internal {
-  // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits:
+  // MatrixReplacement looks-like a SparseMatrix, so let's inherit its traits:
   template<>
   struct traits<MatrixReplacement> :  public Eigen::internal::traits<Eigen::SparseMatrix<double> >
   {};
diff --git a/Eigen/src/misc/lapack.h b/lapack/lapack.h
similarity index 99%
rename from Eigen/src/misc/lapack.h
rename to lapack/lapack.h
index 249f357..121d233 100644
--- a/Eigen/src/misc/lapack.h
+++ b/lapack/lapack.h
@@ -1,7 +1,7 @@
 #ifndef LAPACK_H
 #define LAPACK_H
 
-#include "blas.h"
+#include "../blas/blas.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/lapack/lapack_common.h b/lapack/lapack_common.h
index c872a81..5ca6dd7 100644
--- a/lapack/lapack_common.h
+++ b/lapack/lapack_common.h
@@ -11,7 +11,7 @@
 #define EIGEN_LAPACK_COMMON_H
 
 #include "../blas/common.h"
-#include "../Eigen/src/misc/lapack.h"
+#include "lapack.h"
 
 #define EIGEN_LAPACK_FUNC(FUNC,ARGLIST)               \
   extern "C" { int EIGEN_BLAS_FUNC(FUNC) ARGLIST; }   \
diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp
index cdc10ee..f75ff6c 100644
--- a/test/dynalloc.cpp
+++ b/test/dynalloc.cpp
@@ -26,7 +26,7 @@
   for(int i = 1; i < 1000; i++)
   {
     char *p = (char*)internal::handmade_aligned_malloc(i, alignment);
-    VERIFY(internal::UIntPtr(p)%ALIGNMENT==0);
+    VERIFY(std::uintptr_t(p)%ALIGNMENT==0);
     // if the buffer is wrongly allocated this will give a bad write --> check with valgrind
     for(int j = 0; j < i; j++) p[j]=0;
     internal::handmade_aligned_free(p);
@@ -38,7 +38,7 @@
   for(int i = ALIGNMENT; i < 1000; i++)
   {
     char *p = (char*)internal::aligned_malloc(i);
-    VERIFY(internal::UIntPtr(p)%ALIGNMENT==0);
+    VERIFY(std::uintptr_t(p)%ALIGNMENT==0);
     // if the buffer is wrongly allocated this will give a bad write --> check with valgrind
     for(int j = 0; j < i; j++) p[j]=0;
     internal::aligned_free(p);
@@ -50,7 +50,7 @@
   for(int i = ALIGNMENT; i < 1000; i++)
   {
     float *p = internal::aligned_new<float>(i);
-    VERIFY(internal::UIntPtr(p)%ALIGNMENT==0);
+    VERIFY(std::uintptr_t(p)%ALIGNMENT==0);
     // if the buffer is wrongly allocated this will give a bad write --> check with valgrind
     for(int j = 0; j < i; j++) p[j]=0;
     internal::aligned_delete(p,i);
@@ -62,7 +62,7 @@
   for(int i = ALIGNMENT; i < 400; i++)
   {
     ei_declare_aligned_stack_constructed_variable(float,p,i,0);
-    VERIFY(internal::UIntPtr(p)%ALIGNMENT==0);
+    VERIFY(std::uintptr_t(p)%ALIGNMENT==0);
     // if the buffer is wrongly allocated this will give a bad write --> check with valgrind
     for(int j = 0; j < i; j++) p[j]=0;
   }
@@ -92,7 +92,7 @@
   {
     T* obj = new T;
     VERIFY(T::NeedsToAlign==1);
-    VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0);
+    VERIFY(std::uintptr_t(obj)%ALIGNMENT==0);
     delete obj;
   }
 }
@@ -153,15 +153,15 @@
   }
 
   {
-    MyStruct foo0;  VERIFY(internal::UIntPtr(foo0.avec.data())%ALIGNMENT==0);
-    MyClassA fooA;  VERIFY(internal::UIntPtr(fooA.avec.data())%ALIGNMENT==0);
+    MyStruct foo0;  VERIFY(std::uintptr_t(foo0.avec.data())%ALIGNMENT==0);
+    MyClassA fooA;  VERIFY(std::uintptr_t(fooA.avec.data())%ALIGNMENT==0);
   }
   
   // dynamic allocation, single object
   for (int i=0; i<g_repeat*100; ++i)
   {
-    MyStruct *foo0 = new MyStruct();  VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0);
-    MyClassA *fooA = new MyClassA();  VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0);
+    MyStruct *foo0 = new MyStruct();  VERIFY(std::uintptr_t(foo0->avec.data())%ALIGNMENT==0);
+    MyClassA *fooA = new MyClassA();  VERIFY(std::uintptr_t(fooA->avec.data())%ALIGNMENT==0);
     delete foo0;
     delete fooA;
   }
@@ -170,8 +170,8 @@
   const int N = 10;
   for (int i=0; i<g_repeat*100; ++i)
   {
-    MyStruct *foo0 = new MyStruct[N];  VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0);
-    MyClassA *fooA = new MyClassA[N];  VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0);
+    MyStruct *foo0 = new MyStruct[N];  VERIFY(std::uintptr_t(foo0->avec.data())%ALIGNMENT==0);
+    MyClassA *fooA = new MyClassA[N];  VERIFY(std::uintptr_t(fooA->avec.data())%ALIGNMENT==0);
     delete[] foo0;
     delete[] fooA;
   }
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index 95bfb45..0f9b6f8 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -512,7 +512,7 @@
     float *destMem = new float[(M*N) + 1];
     // In case of no alignment, avoid division by zero.
     constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
-    float *dest = (internal::UIntPtr(destMem)%alignment) == 0 ? destMem+1 : destMem;
+    float *dest = (std::uintptr_t(destMem)%alignment) == 0 ? destMem+1 : destMem;
 
     const Matrix<float, Dynamic, Dynamic, RowMajor> a = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(M, K);
     const Matrix<float, Dynamic, Dynamic, RowMajor> b = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(K, N);
diff --git a/test/first_aligned.cpp b/test/first_aligned.cpp
index ed99450..aaa4d01 100644
--- a/test/first_aligned.cpp
+++ b/test/first_aligned.cpp
@@ -41,7 +41,7 @@
   test_first_aligned_helper(array_double+1, 50);
   test_first_aligned_helper(array_double+2, 50);
   
-  double *array_double_plus_4_bytes = (double*)(internal::UIntPtr(array_double)+4);
+  double *array_double_plus_4_bytes = (double*)(std::uintptr_t(array_double)+4);
   test_none_aligned_helper(array_double_plus_4_bytes, 50);
   test_none_aligned_helper(array_double_plus_4_bytes+1, 50);
   
diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp
index 1dd6959..7270d03 100644
--- a/test/mapped_matrix.cpp
+++ b/test/mapped_matrix.cpp
@@ -22,7 +22,7 @@
   Scalar* array3 = new Scalar[size+1];
   // In case of no alignment, avoid division by zero.
   constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
-  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
+  Scalar* array3unaligned = (std::uintptr_t(array3)%alignment) == 0 ? array3+1 : array3;
   Scalar  array4[EIGEN_TESTMAP_MAX_SIZE];
 
   Map<VectorType, AlignedMax>(array1, size) = VectorType::Random(size);
@@ -64,7 +64,7 @@
   for(Index i = 0; i < sizep1; i++) array3[i] = Scalar(1);
     // In case of no alignment, avoid division by zero.
   constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
-  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
+  Scalar* array3unaligned = (std::uintptr_t(array3)%alignment) == 0 ? array3+1 : array3;
   Scalar array4[256];
   if(size<=256)
     for(int i = 0; i < size; i++) array4[i] = Scalar(1);
@@ -129,7 +129,7 @@
   Scalar* array3 = new Scalar[size+1];
     // In case of no alignment, avoid division by zero.
   constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
-  Scalar* array3unaligned = (internal::UIntPtr(array3)%alignment) == 0 ? array3+1 : array3;
+  Scalar* array3unaligned = (std::uintptr_t(array3)%alignment) == 0 ? array3+1 : array3;
 
   VectorType::MapAligned(array1, size) = VectorType::Random(size);
   VectorType::Map(array2, size) = VectorType::Map(array1, size);
diff --git a/test/mapstride.cpp b/test/mapstride.cpp
index 42ceb0c..107e1e3 100644
--- a/test/mapstride.cpp
+++ b/test/mapstride.cpp
@@ -22,7 +22,7 @@
   Scalar* a_array = internal::aligned_new<Scalar>(arraysize+1);
   Scalar* array = a_array;
   if(Alignment!=Aligned)
-    array = (Scalar*)(internal::IntPtr(a_array) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
+    array = (Scalar*)(std::intptr_t(a_array) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
 
   {
     Map<VectorType, Alignment, InnerStride<3> > map(array, size);
@@ -61,16 +61,16 @@
   Scalar* a_array1 = internal::aligned_new<Scalar>(arraysize+1);
   Scalar* array1 = a_array1;
   if(Alignment!=Aligned)
-    array1 = (Scalar*)(internal::IntPtr(a_array1) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
+    array1 = (Scalar*)(std::intptr_t(a_array1) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
 
   Scalar a_array2[256];
   Scalar* array2 = a_array2;
   if(Alignment!=Aligned) {
-    array2 = (Scalar*)(internal::IntPtr(a_array2) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
+    array2 = (Scalar*)(std::intptr_t(a_array2) + (internal::packet_traits<Scalar>::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits<Scalar>::Real)));
   } else {
     // In case there is no alignment, default to pointing to the start.
     constexpr int alignment = (std::max<int>)(EIGEN_MAX_ALIGN_BYTES, 1);
-    array2 = (Scalar*)(((internal::UIntPtr(a_array2)+alignment-1)/alignment)*alignment);
+    array2 = (Scalar*)(((std::uintptr_t(a_array2)+alignment-1)/alignment)*alignment);
   }
   Index maxsize2 = a_array2 - array2 + 256;
   
diff --git a/test/rvalue_types.cpp b/test/rvalue_types.cpp
index 9af7c92..01df21b 100644
--- a/test/rvalue_types.cpp
+++ b/test/rvalue_types.cpp
@@ -15,8 +15,6 @@
 
 #include <Eigen/Core>
 
-using internal::UIntPtr;
-
 template <typename MatrixType>
 void rvalue_copyassign(const MatrixType& m)
 {
@@ -25,18 +23,18 @@
   
   // create a temporary which we are about to destroy by moving
   MatrixType tmp = m;
-  UIntPtr src_address = reinterpret_cast<UIntPtr>(tmp.data());
+  std::uintptr_t src_address = reinterpret_cast<std::uintptr_t>(tmp.data());
   
   Eigen::internal::set_is_malloc_allowed(false); // moving from an rvalue reference shall never allocate
   // move the temporary to n
   MatrixType n = std::move(tmp);
-  UIntPtr dst_address = reinterpret_cast<UIntPtr>(n.data());
+  std::uintptr_t dst_address = reinterpret_cast<std::uintptr_t>(n.data());
   if (MatrixType::RowsAtCompileTime==Dynamic|| MatrixType::ColsAtCompileTime==Dynamic)
   {
     // verify that we actually moved the guts
     VERIFY_IS_EQUAL(src_address, dst_address);
     VERIFY_IS_EQUAL(tmp.size(), 0);
-    VERIFY_IS_EQUAL(reinterpret_cast<UIntPtr>(tmp.data()), UIntPtr(0));
+    VERIFY_IS_EQUAL(reinterpret_cast<std::uintptr_t>(tmp.data()), std::uintptr_t(0));
   }
 
   // verify that the content did not change
@@ -55,24 +53,24 @@
 
   Eigen::internal::set_is_malloc_allowed(false); // moving from an rvalue reference shall never allocate
 
-  UIntPtr t0_address = reinterpret_cast<UIntPtr>(t0.indices().data());
+  std::uintptr_t t0_address = reinterpret_cast<std::uintptr_t>(t0.indices().data());
 
   // Move constructors:
   TranspositionsType t1 = std::move(t0);
-  UIntPtr t1_address = reinterpret_cast<UIntPtr>(t1.indices().data());
+  std::uintptr_t t1_address = reinterpret_cast<std::uintptr_t>(t1.indices().data());
   VERIFY_IS_EQUAL(t0_address, t1_address);
   // t0 must be de-allocated:
   VERIFY_IS_EQUAL(t0.size(), 0);
-  VERIFY_IS_EQUAL(reinterpret_cast<UIntPtr>(t0.indices().data()), UIntPtr(0));
+  VERIFY_IS_EQUAL(reinterpret_cast<std::uintptr_t>(t0.indices().data()), std::uintptr_t(0));
 
 
   // Move assignment:
   t0 = std::move(t1);
-  t0_address = reinterpret_cast<UIntPtr>(t0.indices().data());
+  t0_address = reinterpret_cast<std::uintptr_t>(t0.indices().data());
   VERIFY_IS_EQUAL(t0_address, t1_address);
   // t1 must be de-allocated:
   VERIFY_IS_EQUAL(t1.size(), 0);
-  VERIFY_IS_EQUAL(reinterpret_cast<UIntPtr>(t1.indices().data()), UIntPtr(0));
+  VERIFY_IS_EQUAL(reinterpret_cast<std::uintptr_t>(t1.indices().data()), std::uintptr_t(0));
 
   Eigen::internal::set_is_malloc_allowed(true);
 }
diff --git a/test/stdvector.cpp b/test/stdvector.cpp
index 18de240..2ca584b 100644
--- a/test/stdvector.cpp
+++ b/test/stdvector.cpp
@@ -34,7 +34,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(MatrixType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
@@ -69,7 +69,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(TransformType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
@@ -104,7 +104,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(QuaternionType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
diff --git a/test/stdvector_overload.cpp b/test/stdvector_overload.cpp
index da04f8a..2fbbb31 100644
--- a/test/stdvector_overload.cpp
+++ b/test/stdvector_overload.cpp
@@ -48,7 +48,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(MatrixType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
@@ -83,7 +83,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(TransformType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
@@ -118,7 +118,7 @@
   VERIFY_IS_APPROX(v[21], y);
   v.push_back(x);
   VERIFY_IS_APPROX(v[22], x);
-  VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType));
+  VERIFY((std::uintptr_t)&(v[22]) == (std::uintptr_t)&(v[21]) + sizeof(QuaternionType));
 
   // do a lot of push_back such that the vector gets internally resized
   // (with memory reallocation)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 5e6cdb1..9ca9ca1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -611,12 +611,13 @@
       return operator!=(constant(threshold));
     }
 
-    // Checks
+    // Predicates.
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorConversionOp<bool, const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar, true>, const Derived>>
     (isnan)() const {
-      return unaryExpr(internal::scalar_isnan_op<Scalar>());
+      return unaryExpr(internal::scalar_isnan_op<Scalar, true>()).template cast<bool>();
     }
+
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived>
     (isinf)() const {
@@ -1219,4 +1220,3 @@
 } // end namespace Eigen
 
 #endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H
-
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index 3c9a808..839ff69 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -269,7 +269,7 @@
   template<typename IndexType, typename Index, Index First, Index... Is>
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   array<Index, 1 + sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, First, Is...>) {
-    return { idx[First], idx[Is]... };
+    return { static_cast<Index>(idx[First]), static_cast<Index>(idx[Is])... };
   }
   template<typename IndexType, typename Index>
   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
diff --git a/unsupported/test/cxx11_maxsizevector.cpp b/unsupported/test/cxx11_maxsizevector.cpp
index 46b689a..22b51ba 100644
--- a/unsupported/test/cxx11_maxsizevector.cpp
+++ b/unsupported/test/cxx11_maxsizevector.cpp
@@ -18,13 +18,13 @@
 #endif
     std::cout << '+';
     ++Foo::object_count;
-    eigen_assert((internal::UIntPtr(this) & (127)) == 0);
+    eigen_assert((std::uintptr_t(this) & (127)) == 0);
   }
   Foo(const Foo&)
   {
     std::cout << 'c';
     ++Foo::object_count;
-    eigen_assert((internal::UIntPtr(this) & (127)) == 0);
+    eigen_assert((std::uintptr_t(this) & (127)) == 0);
   }
 
   ~Foo()
diff --git a/unsupported/test/cxx11_tensor_comparisons.cpp b/unsupported/test/cxx11_tensor_comparisons.cpp
index d90ef7a..86c7335 100644
--- a/unsupported/test/cxx11_tensor_comparisons.cpp
+++ b/unsupported/test/cxx11_tensor_comparisons.cpp
@@ -79,8 +79,36 @@
 }
 
 
+
+static void test_isnan()
+{
+  Tensor<Scalar, 3> mat(2,3,7);
+
+  mat.setRandom();
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        if (internal::random<bool>()) {
+          mat(i,j,k) = std::numeric_limits<Scalar>::quiet_NaN();
+        }
+      }
+    }
+  }
+  Tensor<bool, 3> nan(2,3,7);
+  nan = (mat.isnan)();
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(nan(i,j,k), (std::isnan)(mat(i,j,k)));
+      }
+    }
+  }
+
+}
+
 EIGEN_DECLARE_TEST(cxx11_tensor_comparisons)
 {
   CALL_SUBTEST(test_orderings());
   CALL_SUBTEST(test_equality());
+  CALL_SUBTEST(test_isnan());
 }