Update Eigen to commit:9ea520fc4510b49408f1445b603b1f5dad267c2c

CHANGELOG
=========
9ea520fc4 - Ensure that mc is not smaller than Traits::nr
dd8c71e62 - Fix typecasting for arm32
b2cb49e28 - Static asserts to check for matching NumDimensions
283dec7f2 - Update file GeneralMatrixVector.h
66b9f4ed5 - Fix (u)int64_t->float conversion on arm
d1b03fb5c - Gemv microoptimization

PiperOrigin-RevId: 587143778
Change-Id: Ia7e9e827a65966b5642968ef5b1097db5b7e61cf
diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h
index a265e4d..68566b0 100644
--- a/Eigen/src/Core/arch/NEON/TypeCasting.h
+++ b/Eigen/src/Core/arch/NEON/TypeCasting.h
@@ -1109,15 +1109,32 @@
 struct type_casting_traits<numext::int64_t, float> {
   enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
 };
+
 template <>
 EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
-  return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b)));
-}
-template <>
-EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) {
-  return vcvt_f32_s32(vmovn_s64(a));
+#if EIGEN_ARCH_ARM64
+  return vcombine_f32(vcvt_f32_f64(vcvtq_f64_s64(a)), vcvt_f32_f64(vcvtq_f64_s64(b)));
+#else
+  EIGEN_ALIGN_MAX int64_t lvals[4];
+  pstore(lvals, a);
+  pstore(lvals + 2, b);
+  EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1]),
+                                    static_cast<float>(lvals[2]), static_cast<float>(lvals[3])};
+  return pload<Packet4f>(fvals);
+#endif
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) {
+#if EIGEN_ARCH_ARM64
+  return vcvt_f32_f64(vcvtq_f64_s64(a));
+#else
+  EIGEN_ALIGN_MAX int64_t lvals[2];
+  pstore(lvals, a);
+  EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1])};
+  return pload<Packet2f>(fvals);
+#endif
+}
 
 template <>
 struct type_casting_traits<numext::int64_t, numext::int32_t> {
@@ -1233,11 +1250,27 @@
 };
 template <>
 EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {
-  return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
+#if EIGEN_ARCH_ARM64
+  return vcombine_f32(vcvt_f32_f64(vcvtq_f64_u64(a)), vcvt_f32_f64(vcvtq_f64_u64(b)));
+#else
+  EIGEN_ALIGN_MAX uint64_t uvals[4];
+  pstore(uvals, a);
+  pstore(uvals + 2, b);
+  EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1]),
+                                    static_cast<float>(uvals[2]), static_cast<float>(uvals[3])};
+  return pload<Packet4f>(fvals);
+#endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul, Packet2f>(const Packet2ul& a) {
-  return vcvt_f32_u32(vmovn_u64(a));
+#if EIGEN_ARCH_ARM64
+  return vcvt_f32_f64(vcvtq_f64_u64(a));
+#else
+  EIGEN_ALIGN_MAX uint64_t uvals[2];
+  pstore(uvals, a);
+  EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1])};
+  return pload<Packet2f>(fvals);
+#endif
 }
 
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 55b637b..2e0dcb9 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -78,11 +78,14 @@
     ResMapper res(res_, resStride, resIncr);
 
     Index kc = blocking.kc();
-    Index mc = (std::min)(size,blocking.mc());
+    // Ensure that mc >= nr and <= size
+    Index mc = (std::min)(size,(std::max)(static_cast<decltype(blocking.mc())>(Traits::nr),blocking.mc()));
 
-    // !!! mc must be a multiple of nr:
-    if(mc > Traits::nr)
-      mc = (mc/Traits::nr)*Traits::nr;
+    // !!! mc must be a multiple of nr
+    if (mc > Traits::nr) {
+      using UnsignedIndex = typename make_unsigned<Index>::type;
+      mc = (UnsignedIndex(mc)/Traits::nr)*Traits::nr;
+    }
 
     std::size_t sizeA = kc*mc;
     std::size_t sizeB = kc*size;
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 475ac85..cef0ade 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -362,9 +362,10 @@
          HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
   };
 
-  const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize);
-  const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf);
-  const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter);
+  using UnsignedIndex = typename make_unsigned<Index>::type;
+  const Index fullColBlockEnd = LhsPacketSize * (UnsignedIndex(cols) / LhsPacketSize);
+  const Index halfColBlockEnd = LhsPacketSizeHalf * (UnsignedIndex(cols) / LhsPacketSizeHalf);
+  const Index quarterColBlockEnd = LhsPacketSizeQuarter * (UnsignedIndex(cols) / LhsPacketSizeQuarter);
 
   Index i=0;
   for(; i<n8; i+=8)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0614a20..fbbc98a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -182,6 +182,7 @@
 ei_add_test(type_alias)
 ei_add_test(nullary)
 ei_add_test(mixingtypes)
+ei_add_test(float_conversion)
 ei_add_test(io)
 ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
 ei_add_test(vectorization_logic)
diff --git a/test/float_conversion.cpp b/test/float_conversion.cpp
new file mode 100644
index 0000000..fd8d45d
--- /dev/null
+++ b/test/float_conversion.cpp
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <sstream>
+
+#include "main.h"
+
+template<typename From, typename To>
+void test_conversion() {
+  typedef Array<From, Dynamic, 1> ArrayXFrom;
+  typedef Array<To, Dynamic, 1> ArrayXTo;
+  typedef Array<double, Dynamic, 1> ArrayXDouble;
+
+  Index size = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  double from_min = static_cast<double>((std::numeric_limits<From>::min)());
+  double from_range = static_cast<double>((std::numeric_limits<From>::max)()) - from_min;
+
+  // ArrayXFrom::Random() only generates 32-bit values (#2749), so we generate
+  // doubles and scale to fit the range.
+  ArrayXDouble doubles = (ArrayXDouble::Random(size)+1.0)*(from_range/2.0) + from_min;
+  ArrayXFrom from = doubles.template cast<From>();
+  ArrayXTo to(size);
+  for (Index i = 0; i < size; ++i) {
+    to(i) = static_cast<To>(from(i));
+  }
+  VERIFY_IS_APPROX(from.template cast<To>(), to);
+}
+
+template<typename To>
+void test_conversion_to() {
+  CALL_SUBTEST((test_conversion<int64_t, To>()));
+  CALL_SUBTEST((test_conversion<uint64_t, To>()));
+  CALL_SUBTEST((test_conversion<int32_t, To>()));
+  CALL_SUBTEST((test_conversion<uint32_t, To>()));
+  CALL_SUBTEST((test_conversion<int16_t, To>()));
+  CALL_SUBTEST((test_conversion<uint16_t, To>()));
+  CALL_SUBTEST((test_conversion<int8_t, To>()));
+  CALL_SUBTEST((test_conversion<uint8_t, To>()));
+}
+
+EIGEN_DECLARE_TEST(float_conversion)
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST(test_conversion_to<float>());
+    CALL_SUBTEST(test_conversion_to<double>());
+  }
+}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index a24097a..e596147 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -299,6 +299,7 @@
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
     {
+      EIGEN_STATIC_ASSERT(OtherDerived::NumDimensions == Base::NumDimensions, Number_of_dimensions_must_match)
       typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
       Assign assign(*this, other.derived());
       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
@@ -309,6 +310,7 @@
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other)
     {
+      EIGEN_STATIC_ASSERT(OtherDerived::NumDimensions == Base::NumDimensions, Number_of_dimensions_must_match)
       typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
       Assign assign(*this, other.derived());
       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());