Update Eigen to commit:e16d70bd4e9cdebd2fbdae63b1a4d86493fbbde6

CHANGELOG
=========
e16d70bd4 - Fix FFT when destination does not have unit stride.
99c18bce6 - Msvc muluh
8e4797178 - Bit shifting functions
9700fc847 - Reorganize CMake and minimize configuration for non-top-level builds.

PiperOrigin-RevId: 631975359
Change-Id: I537052ef6b9ed6e3ecf90e2aebf5c3898ffc919c
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 381d8ff..8a07d50 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -709,33 +709,21 @@
 }
 
 /** \internal \returns \a a arithmetically shifted by N bits to the right */
-template <int N>
-EIGEN_DEVICE_FUNC inline int parithmetic_shift_right(const int& a) {
-  return a >> N;
-}
-template <int N>
-EIGEN_DEVICE_FUNC inline long int parithmetic_shift_right(const long int& a) {
-  return a >> N;
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
+  return numext::arithmetic_shift_right(a, N);
 }
 
 /** \internal \returns \a a logically shifted by N bits to the right */
-template <int N>
-EIGEN_DEVICE_FUNC inline int plogical_shift_right(const int& a) {
-  return static_cast<int>(static_cast<unsigned int>(a) >> N);
-}
-template <int N>
-EIGEN_DEVICE_FUNC inline long int plogical_shift_right(const long int& a) {
-  return static_cast<long>(static_cast<unsigned long>(a) >> N);
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
+  return numext::logical_shift_right(a, N);
 }
 
 /** \internal \returns \a a shifted by N bits to the left */
-template <int N>
-EIGEN_DEVICE_FUNC inline int plogical_shift_left(const int& a) {
-  return a << N;
-}
-template <int N>
-EIGEN_DEVICE_FUNC inline long int plogical_shift_left(const long int& a) {
-  return a << N;
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
+  return numext::logical_shift_left(a, N);
 }
 
 /** \internal \returns the significant and exponent of the underlying floating point numbers
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 6bb9a12..d42fc93 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -1746,6 +1746,23 @@
 #undef SYCL_SPECIALIZE_BINARY_FUNC
 #endif
 
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) {
+  return a << n;
+}
+
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) {
+  using UnsignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  return bit_cast<Scalar, UnsignedScalar>(bit_cast<UnsignedScalar, Scalar>(a) >> n);
+}
+
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) {
+  using SignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::signed_type;
+  return bit_cast<Scalar, SignedScalar>(bit_cast<SignedScalar, Scalar>(a) >> n);
+}
+
 }  // end namespace numext
 
 namespace internal {
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 2848b78..a6e2de4 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -101,10 +101,10 @@
 template <typename Tgt, typename Src>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) {
   // The behaviour of memcpy is not specified for non-trivially copyable types
-  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED);
+  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED)
   EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,
-                      THIS_TYPE_IS_NOT_SUPPORTED);
-  EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED);
+                      THIS_TYPE_IS_NOT_SUPPORTED)
+  EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED)
 
   Tgt tgt;
   // Load src into registers first. This allows the memcpy to be elided by CUDA.
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2b0c05c..c1bbc7c 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -219,7 +219,9 @@
  */
 template <typename Scalar, int N>
 struct scalar_shift_right_op {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a >> N; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const {
+    return numext::arithmetic_shift_right(a);
+  }
   template <typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
     return internal::parithmetic_shift_right<N>(a);
@@ -237,7 +239,9 @@
  */
 template <typename Scalar, int N>
 struct scalar_shift_left_op {
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a << N; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const {
+    return numext::logical_shift_left(a);
+  }
   template <typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
     return internal::plogical_shift_left<N>(a);
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 2f6f89e..9f75c1b 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -91,6 +91,7 @@
   if(EIGEN_TEST_CUSTOM_LINKER_FLAGS)
     target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS})
   endif()
+  target_link_libraries(${targetname} Eigen3::Eigen)
 
   if(${ARGC} GREATER 3)
     set(libs_to_link ${ARGV3})
diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt
index 1babd13..e61c99b 100644
--- a/lapack/CMakeLists.txt
+++ b/lapack/CMakeLists.txt
@@ -110,6 +110,7 @@
   if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
     target_link_libraries(${target} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
   endif()
+  target_link_libraries(${target} Eigen3::Eigen)
   add_dependencies(lapack ${target})
   install(TARGETS ${target}
           RUNTIME DESTINATION bin
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index b5ad3c4..3b36328 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -1068,24 +1068,45 @@
   }
 }
 
-template <int N>
-struct shift_left {
-  template <typename Scalar>
-  Scalar operator()(const Scalar& v) const {
-    return (v << N);
+template <typename Scalar>
+struct shift_imm_traits {
+  enum { Cost = 1, PacketAccess = internal::packet_traits<Scalar>::HasShift };
+};
+
+template <int N, typename Scalar>
+struct logical_left_shift_op {
+  Scalar operator()(const Scalar& v) const { return numext::logical_shift_left(v, N); }
+  template <typename Packet>
+  Packet packetOp(const Packet& v) const {
+    return internal::plogical_shift_left<N>(v);
+  }
+};
+template <int N, typename Scalar>
+struct logical_right_shift_op {
+  Scalar operator()(const Scalar& v) const { return numext::logical_shift_right(v, N); }
+  template <typename Packet>
+  Packet packetOp(const Packet& v) const {
+    return internal::plogical_shift_right<N>(v);
+  }
+};
+template <int N, typename Scalar>
+struct arithmetic_right_shift_op {
+  Scalar operator()(const Scalar& v) const { return numext::arithmetic_shift_right(v, N); }
+  template <typename Packet>
+  Packet packetOp(const Packet& v) const {
+    return internal::parithmetic_shift_right<N>(v);
   }
 };
 
-template <int N>
-struct arithmetic_shift_right {
-  template <typename Scalar>
-  Scalar operator()(const Scalar& v) const {
-    return (v >> N);
-  }
-};
+template <int N, typename Scalar>
+struct internal::functor_traits<logical_left_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+template <int N, typename Scalar>
+struct internal::functor_traits<logical_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+template <int N, typename Scalar>
+struct internal::functor_traits<arithmetic_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
 
 template <typename ArrayType>
-struct signed_shift_test_impl {
+struct shift_test_impl {
   typedef typename ArrayType::Scalar Scalar;
   static constexpr size_t Size = sizeof(Scalar);
   static constexpr size_t MaxShift = (CHAR_BIT * Size) - 1;
@@ -1099,20 +1120,24 @@
 
     ArrayType m1 = ArrayType::Random(rows, cols), m2(rows, cols), m3(rows, cols);
 
-    m2 = m1.unaryExpr(internal::scalar_shift_right_op<Scalar, N>());
-    m3 = m1.unaryExpr(arithmetic_shift_right<N>());
+    m2 = m1.unaryExpr([](const Scalar& v) { return numext::logical_shift_left(v, N); });
+    m3 = m1.unaryExpr(logical_left_shift_op<N, Scalar>());
     VERIFY_IS_CWISE_EQUAL(m2, m3);
 
-    m2 = m1.unaryExpr(internal::scalar_shift_left_op<Scalar, N>());
-    m3 = m1.unaryExpr(shift_left<N>());
+    m2 = m1.unaryExpr([](const Scalar& v) { return numext::logical_shift_right(v, N); });
+    m3 = m1.unaryExpr(logical_right_shift_op<N, Scalar>());
+    VERIFY_IS_CWISE_EQUAL(m2, m3);
+
+    m2 = m1.unaryExpr([](const Scalar& v) { return numext::arithmetic_shift_right(v, N); });
+    m3 = m1.unaryExpr(arithmetic_right_shift_op<N, Scalar>());
     VERIFY_IS_CWISE_EQUAL(m2, m3);
 
     run<N + 1>(m);
   }
 };
 template <typename ArrayType>
-void signed_shift_test(const ArrayType& m) {
-  signed_shift_test_impl<ArrayType>::run(m);
+void shift_test(const ArrayType& m) {
+  shift_test_impl<ArrayType>::run(m);
 }
 
 template <typename ArrayType>
@@ -1361,10 +1386,10 @@
         ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_7(array_generic(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
                                                                 internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_8(signed_shift_test(
+    CALL_SUBTEST_8(shift_test(
         ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_9(signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
-                                                                    internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_9(shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
+                                                             internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_10(array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
                                                                     internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_11(array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
diff --git a/test/numext.cpp b/test/numext.cpp
index a2d511b..ebe9fb0 100644
--- a/test/numext.cpp
+++ b/test/numext.cpp
@@ -292,6 +292,27 @@
   check_signbit_impl<T>::run();
 }
 
+template <typename T>
+void check_shift() {
+  using SignedT = typename numext::get_integer_by_size<sizeof(T)>::signed_type;
+  using UnsignedT = typename numext::get_integer_by_size<sizeof(T)>::unsigned_type;
+  constexpr int kNumBits = CHAR_BIT * sizeof(T);
+  for (int i = 0; i < 1000; ++i) {
+    const T a = internal::random<T>();
+    for (int s = 1; s < kNumBits; s++) {
+      T a_bsll = numext::logical_shift_left(a, s);
+      T a_bsll_ref = a << s;
+      VERIFY_IS_EQUAL(a_bsll, a_bsll_ref);
+      T a_bsrl = numext::logical_shift_right(a, s);
+      T a_bsrl_ref = numext::bit_cast<T, UnsignedT>(numext::bit_cast<UnsignedT, T>(a) >> s);
+      VERIFY_IS_EQUAL(a_bsrl, a_bsrl_ref);
+      T a_bsra = numext::arithmetic_shift_right(a, s);
+      T a_bsra_ref = numext::bit_cast<T, SignedT>(numext::bit_cast<SignedT, T>(a) >> s);
+      VERIFY_IS_EQUAL(a_bsra, a_bsra_ref);
+    }
+  }
+}
+
 EIGEN_DECLARE_TEST(numext) {
   for (int k = 0; k < g_repeat; ++k) {
     CALL_SUBTEST(check_negate<signed char>());
@@ -354,5 +375,15 @@
     CALL_SUBTEST(check_signbit<int16_t>());
     CALL_SUBTEST(check_signbit<int32_t>());
     CALL_SUBTEST(check_signbit<int64_t>());
+
+    CALL_SUBTEST(check_shift<int8_t>());
+    CALL_SUBTEST(check_shift<int16_t>());
+    CALL_SUBTEST(check_shift<int32_t>());
+    CALL_SUBTEST(check_shift<int64_t>());
+
+    CALL_SUBTEST(check_shift<uint8_t>());
+    CALL_SUBTEST(check_shift<uint16_t>());
+    CALL_SUBTEST(check_shift<uint32_t>());
+    CALL_SUBTEST(check_shift<uint64_t>());
   }
 }
diff --git a/unsupported/CMakeLists.txt b/unsupported/CMakeLists.txt
index 67d1f62..3904601 100644
--- a/unsupported/CMakeLists.txt
+++ b/unsupported/CMakeLists.txt
@@ -4,7 +4,7 @@
 endif()
 if(EIGEN_BUILD_TESTING)
   if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
-    add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
+    add_subdirectory(test) # CTest automatic test building relies on the "all" target.
   else()
     add_subdirectory(test EXCLUDE_FROM_ALL)
   endif()
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index fdb9759..fddc648 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -101,6 +101,8 @@
   return __umul64hi(a, b);
 #elif defined(SYCL_DEVICE_ONLY)
   return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
+#elif EIGEN_COMP_MSVC && (EIGEN_ARCH_x86_64 || EIGEN_ARCH_ARM64)
+  return __umulh(a, static_cast<uint64_t>(b));
 #elif EIGEN_HAS_BUILTIN_INT128
   __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
   return static_cast<uint64_t>(v >> 64);
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 630be1e..557fdf6 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -231,11 +231,12 @@
                         THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
 
     if (nfft < 1) nfft = src.size();
-
-    if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum))
-      dst.derived().resize((nfft >> 1) + 1);
-    else
-      dst.derived().resize(nfft);
+    
+    Index dst_size = nfft;
+    if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum)) {
+      dst_size = (nfft >> 1) + 1;
+    }
+    dst.derived().resize(dst_size);
 
     if (src.innerStride() != 1 || src.size() < nfft) {
       Matrix<src_type, 1, Dynamic> tmp;
@@ -245,9 +246,21 @@
       } else {
         tmp = src;
       }
-      fwd(&dst[0], &tmp[0], nfft);
+      if (dst.innerStride() != 1) {
+        Matrix<dst_type, 1, Dynamic> out(1, dst_size);
+        fwd(&out[0], &tmp[0], nfft);
+        dst.derived() = out;
+      } else {
+        fwd(&dst[0], &tmp[0], nfft);
+      }
     } else {
-      fwd(&dst[0], &src[0], nfft);
+      if (dst.innerStride() != 1) {
+        Matrix<dst_type, 1, Dynamic> out(1, dst_size);
+        fwd(&out[0], &src[0], nfft);
+        dst.derived() = out;
+      } else {
+        fwd(&dst[0], &src[0], nfft);
+      }
     }
   }
 
@@ -326,9 +339,22 @@
       } else {
         tmp = src;
       }
-      inv(&dst[0], &tmp[0], nfft);
+
+      if (dst.innerStride() != 1) {
+        Matrix<dst_type, 1, Dynamic> out(1, nfft);
+        inv(&out[0], &tmp[0], nfft);
+        dst.derived() = out;
+      } else {
+        inv(&dst[0], &tmp[0], nfft);
+      }
     } else {
-      inv(&dst[0], &src[0], nfft);
+      if (dst.innerStride() != 1) {
+        Matrix<dst_type, 1, Dynamic> out(1, nfft);
+        inv(&out[0], &src[0], nfft);
+        dst.derived() = out;
+      } else {
+        inv(&dst[0], &src[0], nfft);
+      }
     }
   }
 
diff --git a/unsupported/test/fft_test_shared.h b/unsupported/test/fft_test_shared.h
index 0e040ad..3adcd90 100644
--- a/unsupported/test/fft_test_shared.h
+++ b/unsupported/test/fft_test_shared.h
@@ -164,9 +164,41 @@
 }
 
 template <typename T>
+void test_complex_strided(int nfft) {
+  typedef typename FFT<T>::Complex Complex;
+  typedef typename Eigen::Vector<Complex, Dynamic> ComplexVector;
+  constexpr int kInputStride = 3;
+  constexpr int kOutputStride = 7;
+  constexpr int kInvOutputStride = 13;
+
+  FFT<T> fft;
+
+  ComplexVector inbuf(nfft * kInputStride);
+  inbuf.setRandom();
+  ComplexVector outbuf(nfft * kOutputStride);
+  outbuf.setRandom();
+  ComplexVector invoutbuf(nfft * kInvOutputStride);
+  invoutbuf.setRandom();
+
+  using StridedComplexVector = Map<ComplexVector, /*MapOptions=*/0, InnerStride<Dynamic>>;
+  StridedComplexVector input(inbuf.data(), nfft, InnerStride<Dynamic>(kInputStride));
+  StridedComplexVector output(outbuf.data(), nfft, InnerStride<Dynamic>(kOutputStride));
+  StridedComplexVector inv_output(invoutbuf.data(), nfft, InnerStride<Dynamic>(kInvOutputStride));
+
+  for (int k = 0; k < nfft; ++k)
+    input[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5));
+  fft.fwd(output, input);
+
+  VERIFY(T(fft_rmse(output, input)) < test_precision<T>());  // gross check
+  fft.inv(inv_output, output);
+  VERIFY(T(dif_rmse(inv_output, input)) < test_precision<T>());  // gross check
+}
+
+template <typename T>
 void test_complex(int nfft) {
   test_complex_generic<StdVectorContainer, T>(nfft);
   test_complex_generic<EigenVectorContainer, T>(nfft);
+  test_complex_strided<T>(nfft);
 }
 
 template <typename T, int nrows, int ncols>