Automated g4 rollback of changelist 343392735. *** Reason for rollback *** Breaks tests: https://test.corp.google.com/ui#cl=343456731&flags=CAMQAqAEAg==&id=OCL:343456731:BASE:343463333:1605870623135:b93bb9fc&t=//borg/containermgr/core_allocation:core_allocator_regtest_lib_test *** Original change description *** BEGIN_PUBLIC Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/a8fdcae55d1f002966fc9b963597a404f30baa09 END_PUBLIC We missed the `TensorRandom` change in the last update. *** PiperOrigin-RevId: 343470420

commit: 9dd607ae3fca3838404b3454d47edfba4901840e [log] [tgz]
author: Googler <noreply@google.com> Fri Nov 20 04:26:04 2020 -0800
committer: Antonio Sanchez <cantonios@google.com> Wed Jun 30 15:17:10 2021 -0700
tree: 4b993c6f31b93a80bafa5e3aea0ed818c5af6ebc
parent: f4db0a59f0f90a0e787565123c75e6cb6aa1013f [diff]
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index ca2a86d..63ceace 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h

@@ -69,7 +69,7 @@
   template<class T>
   explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val)
       : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
-
+  
   explicit EIGEN_DEVICE_FUNC bfloat16(float f)
       : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
 
@@ -88,7 +88,8 @@
     // +0.0 and -0.0 become false, everything else becomes true.
     return (value & 0x7fff) != 0;
   }
-#endif
+#endif 
+
 };
 } // namespace Eigen
 
@@ -271,16 +272,10 @@
   return output;
 }
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw
-raw_uint16_to_bfloat16(numext::uint16_t value) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) {
   return __bfloat16_raw(value);
 }
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t
-raw_bfloat16_as_uint16(const __bfloat16_raw& bf) {
-  return bf.value;
-}
-
 // float_to_bfloat16_rtne template specialization that does not make any
 // assumption about the value of its function argument (ff).
 template <>
@@ -459,7 +454,7 @@
 // float_to_bfloat16_rtne template specialization that assumes that its function
 // argument (ff) is either a normal floating point number, or +/-infinity, or
 // zero. Used to improve the runtime performance of conversion from an integer
-// type to bfloat16.
+// type to bfloat16.  
 template <>
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true>(float ff) {
 #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16))
@@ -696,19 +691,7 @@
   return (bfloat16_impl::isfinite)(h);
 }
 
-template <>
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16
-bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
-  return Eigen::bfloat16(Eigen::bfloat16_impl::raw_uint16_to_bfloat16(src));
-}
-
-template <>
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t
-bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
-  return Eigen::bfloat16_impl::raw_bfloat16_as_uint16(src);
-}
-
-}  // namespace numext
+} // namespace numext
 }  // namespace Eigen
 
 #endif // EIGEN_BFLOAT16_H

diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index e31f742..0bc1e9d 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h

@@ -159,12 +159,6 @@
   explicit EIGEN_DEVICE_FUNC half(std::complex<RealScalar> c)
       : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(c.real()))) {}
 
-  EIGEN_DEVICE_FUNC operator float()
-      const {  // NOLINT: Allow implicit conversion to float, because it is
-               // lossless.
-    return half_impl::half_to_float(*this);
-  }
-
   EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
     // +0.0 and -0.0 become false, everything else becomes true.
   #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
@@ -173,6 +167,47 @@
     return (x & 0x7fff) != 0;
   #endif
   }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
+    return static_cast<signed char>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
+    return static_cast<unsigned char>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
+    return static_cast<short>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(numext::uint16_t) const {
+    return static_cast<numext::uint16_t>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
+    return static_cast<int>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
+    return static_cast<unsigned int>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
+    return static_cast<long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
+    return static_cast<unsigned long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
+    return static_cast<long long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
+    return static_cast<unsigned long long>(half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
+    return half_impl::half_to_float(*this);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
+    return static_cast<double>(half_impl::half_to_float(*this));
+  }
+
+  template<typename RealScalar>
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(std::complex<RealScalar>) const {
+    return std::complex<RealScalar>(static_cast<RealScalar>(*this), RealScalar(0));
+  }
 };
 
 } // end namespace Eigen
@@ -459,20 +494,6 @@
 #endif
 }
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR numext::uint16_t
-raw_half_as_uint16(const __half_raw& h) {
-  // HIP/CUDA/Default have a member 'x' of type uint16_t.
-  // For ARM64 native half, the member 'x' is of type __fp16, so we need to
-  // bit-cast. For SYCL, cl::sycl::half is _Float16, so cast directly.
-#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
-  return numext::bit_cast<numext::uint16_t>(h.x);
-#elif defined(SYCL_DEVICE_ONLY)
-  return numext::bit_cast<numext::uint16_t>(h);
-#else
-  return h.x;
-#endif
-}
-
 union float32_bits {
   unsigned int u;
   float f;
@@ -652,12 +673,6 @@
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
   return half(::tanhf(float(a)));
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {
-  return half(::asinf(float(a)));
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
-  return half(::acosf(float(a)));
-}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
   defined(EIGEN_HIP_DEVICE_COMPILE)
@@ -666,9 +681,6 @@
   return half(::floorf(float(a)));
 #endif
 }
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) {
-  return half(::rintf(float(a)));
-}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
 #if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
   defined(EIGEN_HIP_DEVICE_COMPILE)
@@ -800,11 +812,10 @@
 }
 #endif
 
+#if defined(EIGEN_GPU_COMPILE_PHASE)
 namespace Eigen {
 namespace numext {
 
-#if defined(EIGEN_GPU_COMPILE_PHASE)
-
 template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isnan)(const Eigen::half& h) {
   return (half_impl::isnan)(h);
@@ -819,22 +830,8 @@
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const Eigen::half& h) {
   return (half_impl::isfinite)(h);
 }
-
-#endif
-
-template <>
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half
-bit_cast<Eigen::half, uint16_t>(const uint16_t& src) {
-  return Eigen::half(Eigen::half_impl::raw_uint16_to_half(src));
-}
-
-template <>
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t
-bit_cast<uint16_t, Eigen::half>(const Eigen::half& src) {
-  return Eigen::half_impl::raw_half_as_uint16(src);
-}
-
 }  // namespace numext
 }  // namespace Eigen
+#endif
 
 #endif // EIGEN_HALF_H

diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 5ea108f..30edd70 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h

@@ -3849,15 +3849,16 @@
 typedef float16x4_t Packet4hf;
 typedef float16x8_t Packet8hf;
 
+// TODO(tellenbach): Enable packets of size 8 as soon as the GEBP can handle them
 template <>
 struct packet_traits<Eigen::half> : default_packet_traits {
-  typedef Packet8hf type;
+  typedef Packet4hf type;
   typedef Packet4hf half;
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size = 8,
-    HasHalfPacket = 1,
+    size = 4,
+    HasHalfPacket = 0,
 
     HasCmp = 1,
     HasCast = 1,
@@ -3903,7 +3904,7 @@
 template <>
 struct unpacket_traits<Packet8hf> {
   typedef Eigen::half type;
-  typedef Packet4hf half;
+  typedef Packet8hf half;
   enum {
     size = 8,
     alignment = Aligned16,
@@ -3914,11 +3915,6 @@
 };
 
 template <>
-EIGEN_DEVICE_FUNC Packet4hf predux_half_dowto4<Packet8hf>(const Packet8hf& a) {
-  return vadd_f16(vget_low_f16(a), vget_high_f16(a));
-}
-
-template <>
 EIGEN_STRONG_INLINE Packet8hf pset1<Packet8hf>(const Eigen::half& from) {
   return vdupq_n_f16(from.x);
 }
@@ -4436,11 +4432,11 @@
   for (int i = 0; i < 4; ++i) {
     EIGEN_UNROLL_LOOP
     for (int j = 0; j < 4; ++j) {
-      out[i][j] = in[j][2 * i];
+      out[i][j] = in[j][2*i];
     }
     EIGEN_UNROLL_LOOP
     for (int j = 0; j < 4; ++j) {
-      out[i][j + 4] = in[j][2 * i + 1];
+      out[i][j+4] = in[j][2*i+1];
     }
   }
 

diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 72ce400..9db119b 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h

@@ -10,7 +10,7 @@
 #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
 #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
 
-namespace Eigen {
+namespace Eigen { 
 
 namespace internal {
 
@@ -25,16 +25,16 @@
   Index rows = lhs.innerSize();
   Index cols = rhs.outerSize();
   eigen_assert(lhs.outerSize() == rhs.innerSize());
-
+  
   ei_declare_aligned_stack_constructed_variable(bool,   mask,     rows, 0);
   ei_declare_aligned_stack_constructed_variable(ResScalar, values,   rows, 0);
   ei_declare_aligned_stack_constructed_variable(Index,  indices,  rows, 0);
-
+  
   std::memset(mask,0,sizeof(bool)*rows);
 
   evaluator<Lhs> lhsEval(lhs);
   evaluator<Rhs> rhsEval(rhs);
-
+  
   // estimate the number of non zero entries
   // given a rhs column containing Y non zeros, we assume that the respective Y columns
   // of the lhs differs in average of one non zeros, thus the number of non zeros for
@@ -141,7 +141,7 @@
     typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
     typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux;
     typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;
-
+    
     // If the result is tall and thin (in the extreme case a column vector)
     // then it is faster to sort the coefficients inplace instead of transposing twice.
     // FIXME, the following heuristic is probably not very good.
@@ -155,7 +155,7 @@
     else
     {
       ColMajorMatrixAux resCol(lhs.rows(),rhs.cols());
-      // resort to transpose to sort the entries
+      // ressort to transpose to sort the entries
       internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false);
       RowMajorMatrix resRow(resCol);
       res = resRow.markAsRValue();
@@ -347,6 +347,6 @@
 
 } // end namespace internal
 
-}  // end namespace Eigen
+} // end namespace Eigen
 
 #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H

diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp
index ccea72d..09df2b2 100644
--- a/test/bfloat16_float.cpp
+++ b/test/bfloat16_float.cpp

@@ -13,10 +13,6 @@
 
 #include <Eigen/src/Core/arch/Default/BFloat16.h>
 
-#define VERIFY_BFLOAT16_BITS_EQUAL(h, bits)                \
-  VERIFY_IS_EQUAL((numext::bit_cast<numext::uint16_t>(h)), \
-                  (static_cast<numext::uint16_t>(bits)))
-
 // Make sure it's possible to forward declare Eigen::bfloat16
 namespace Eigen {
 struct bfloat16;
@@ -62,45 +58,31 @@
 {
   using Eigen::bfloat16_impl::__bfloat16_raw;
 
-  // Round-trip casts
-  VERIFY_IS_EQUAL(numext::bit_cast<bfloat16>(
-                      numext::bit_cast<numext::uint16_t>(bfloat16(1.0f))),
-                  bfloat16(1.0f));
-  VERIFY_IS_EQUAL(numext::bit_cast<bfloat16>(
-                      numext::bit_cast<numext::uint16_t>(bfloat16(0.5f))),
-                  bfloat16(0.5f));
-  VERIFY_IS_EQUAL(numext::bit_cast<bfloat16>(
-                      numext::bit_cast<numext::uint16_t>(bfloat16(-0.33333f))),
-                  bfloat16(-0.33333f));
-  VERIFY_IS_EQUAL(numext::bit_cast<bfloat16>(
-                      numext::bit_cast<numext::uint16_t>(bfloat16(0.0f))),
-                  bfloat16(0.0f));
-
   // Conversion from float.
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1.0f), 0x3f80);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f), 0x3f00);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.33333f), 0x3eab);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.38e38f), 0x7f7e);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3.40e38f), 0x7f80);  // Becomes infinity.
+  VERIFY_IS_EQUAL(bfloat16(1.0f).value, 0x3f80);
+  VERIFY_IS_EQUAL(bfloat16(0.5f).value, 0x3f00);
+  VERIFY_IS_EQUAL(bfloat16(0.33333f).value, 0x3eab);
+  VERIFY_IS_EQUAL(bfloat16(3.38e38f).value, 0x7f7e);
+  VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80);  // Becomes infinity.
 
   // Verify round-to-nearest-even behavior.
   float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00)));
   float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01)));
   float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02)));
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val1 + val2)), 0x3c00);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.5f * (val2 + val3)), 0x3c02);
+  VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
+  VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);
 
   // Conversion from int.
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-1), 0xbf80);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0), 0x0000);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(1), 0x3f80);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(2), 0x4000);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(3), 0x4040);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(12), 0x4140);
+  VERIFY_IS_EQUAL(bfloat16(-1).value, 0xbf80);
+  VERIFY_IS_EQUAL(bfloat16(0).value, 0x0000);
+  VERIFY_IS_EQUAL(bfloat16(1).value, 0x3f80);
+  VERIFY_IS_EQUAL(bfloat16(2).value, 0x4000);
+  VERIFY_IS_EQUAL(bfloat16(3).value, 0x4040);
+  VERIFY_IS_EQUAL(bfloat16(12).value, 0x4140);
 
   // Conversion from bool.
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(false), 0x0000);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(true), 0x3f80);
+  VERIFY_IS_EQUAL(bfloat16(false).value, 0x0000);
+  VERIFY_IS_EQUAL(bfloat16(true).value, 0x3f80);
 
   // Conversion to bool
   VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(3)), true);
@@ -120,8 +102,8 @@
   VERIFY_IS_EQUAL(bfloat16(0.0f), bfloat16(0.0f));
   VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(0.0f));
   VERIFY_IS_EQUAL(bfloat16(-0.0f), bfloat16(-0.0f));
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(0.0f), 0x0000);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(-0.0f), 0x8000);
+  VERIFY_IS_EQUAL(bfloat16(0.0f).value, 0x0000);
+  VERIFY_IS_EQUAL(bfloat16(-0.0f).value, 0x8000);
 
   // Flush denormals to zero
   for (float denorm = -std::numeric_limits<float>::denorm_min();
@@ -135,16 +117,16 @@
     VERIFY_IS_EQUAL(bfloat16(denorm), false);
 
     if (std::signbit(denorm)) {
-      VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x8000);
+      VERIFY_IS_EQUAL(bf_trunc.value, 0x8000);
     } else {
-      VERIFY_BFLOAT16_BITS_EQUAL(bf_trunc, 0x0000);
+      VERIFY_IS_EQUAL(bf_trunc.value, 0x0000);
     }
     bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne<false>(denorm);
     VERIFY_IS_EQUAL(static_cast<float>(bf_round), 0.0f);
     if (std::signbit(denorm)) {
-      VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x8000);
+      VERIFY_IS_EQUAL(bf_round.value, 0x8000);
     } else {
-      VERIFY_BFLOAT16_BITS_EQUAL(bf_round, 0x0000);
+      VERIFY_IS_EQUAL(bf_round.value, 0x0000);
     }
   }
 
@@ -249,72 +231,33 @@
   VERIFY((numext::isinf)(bfloat16(__bfloat16_raw(0x7f80))));
   VERIFY((numext::isnan)(bfloat16(__bfloat16_raw(0x7fc0))));
 
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)),
-                             0x7fc0);
-  VERIFY_BFLOAT16_BITS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)),
-                             0xffc0);
-  VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
-                                 BinaryToFloat(0x0, 0xff, 0x40, 0x0)),
-                             0x7fc0);
-  VERIFY_BFLOAT16_BITS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
-                                 BinaryToFloat(0x1, 0xff, 0x40, 0x0)),
-                             0xffc0);
+  VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x0, 0xff, 0x40, 0x0)).value, 0x7fc0);
+  VERIFY_IS_EQUAL(bfloat16(BinaryToFloat(0x1, 0xff, 0x40, 0x0)).value, 0xffc0);
+  VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
+                      BinaryToFloat(0x0, 0xff, 0x40, 0x0))
+                      .value,
+                  0x7fc0);
+  VERIFY_IS_EQUAL(Eigen::bfloat16_impl::truncate_to_bfloat16(
+                      BinaryToFloat(0x1, 0xff, 0x40, 0x0))
+                      .value,
+                  0xffc0);
 }
 
 void test_numtraits()
 {
-  std::cout << "epsilon       = " << NumTraits<bfloat16>::epsilon() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   NumTraits<bfloat16>::epsilon())
-            << ")" << std::endl;
-  std::cout << "highest       = " << NumTraits<bfloat16>::highest() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   NumTraits<bfloat16>::highest())
-            << ")" << std::endl;
-  std::cout << "lowest        = " << NumTraits<bfloat16>::lowest() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<bfloat16>::lowest())
-            << ")" << std::endl;
-  std::cout << "min           = " << (std::numeric_limits<bfloat16>::min)()
-            << "  (0x" << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   (std::numeric_limits<bfloat16>::min)())
-            << ")" << std::endl;
-  std::cout << "denorm min    = "
-            << (std::numeric_limits<bfloat16>::denorm_min)() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   (std::numeric_limits<bfloat16>::denorm_min)())
-            << ")" << std::endl;
-  std::cout << "infinity      = " << NumTraits<bfloat16>::infinity() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   NumTraits<bfloat16>::infinity())
-            << ")" << std::endl;
-  std::cout << "quiet nan     = " << NumTraits<bfloat16>::quiet_NaN() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   NumTraits<bfloat16>::quiet_NaN())
-            << ")" << std::endl;
-  std::cout << "signaling nan = "
-            << std::numeric_limits<bfloat16>::signaling_NaN() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   std::numeric_limits<bfloat16>::signaling_NaN())
-            << ")" << std::endl;
+  std::cout << "epsilon       = " << NumTraits<bfloat16>::epsilon() << "  (0x" << std::hex << NumTraits<bfloat16>::epsilon().value << ")" << std::endl;
+  std::cout << "highest       = " << NumTraits<bfloat16>::highest() << "  (0x" << std::hex << NumTraits<bfloat16>::highest().value << ")" << std::endl;
+  std::cout << "lowest        = " << NumTraits<bfloat16>::lowest() << "  (0x" << std::hex << NumTraits<bfloat16>::lowest().value << ")" << std::endl;
+  std::cout << "min           = " << (std::numeric_limits<bfloat16>::min)() << "  (0x" << std::hex << (std::numeric_limits<bfloat16>::min)().value << ")" << std::endl;
+  std::cout << "denorm min    = " << (std::numeric_limits<bfloat16>::denorm_min)() << "  (0x" << std::hex << (std::numeric_limits<bfloat16>::denorm_min)().value << ")" << std::endl;
+  std::cout << "infinity      = " << NumTraits<bfloat16>::infinity() << "  (0x" << std::hex << NumTraits<bfloat16>::infinity().value << ")" << std::endl;
+  std::cout << "quiet nan     = " << NumTraits<bfloat16>::quiet_NaN() << "  (0x" << std::hex << NumTraits<bfloat16>::quiet_NaN().value << ")" << std::endl;
+  std::cout << "signaling nan = " << std::numeric_limits<bfloat16>::signaling_NaN() << "  (0x" << std::hex << std::numeric_limits<bfloat16>::signaling_NaN().value << ")" << std::endl;
 
   VERIFY(NumTraits<bfloat16>::IsSigned);
 
-  VERIFY_IS_EQUAL(numext::bit_cast<numext::uint16_t>(
-                      std::numeric_limits<bfloat16>::infinity()),
-                  numext::bit_cast<numext::uint16_t>(
-                      bfloat16(std::numeric_limits<float>::infinity())));
-  VERIFY_IS_EQUAL(numext::bit_cast<numext::uint16_t>(
-                      std::numeric_limits<bfloat16>::quiet_NaN()),
-                  numext::bit_cast<numext::uint16_t>(
-                      bfloat16(std::numeric_limits<float>::quiet_NaN())));
+  VERIFY_IS_EQUAL( std::numeric_limits<bfloat16>::infinity().value, bfloat16(std::numeric_limits<float>::infinity()).value );
+  VERIFY_IS_EQUAL( std::numeric_limits<bfloat16>::quiet_NaN().value, bfloat16(std::numeric_limits<float>::quiet_NaN()).value );
   VERIFY( (std::numeric_limits<bfloat16>::min)() > bfloat16(0.f) );
   VERIFY( (std::numeric_limits<bfloat16>::denorm_min)() > bfloat16(0.f) );
   VERIFY_IS_EQUAL( (std::numeric_limits<bfloat16>::denorm_min)()/bfloat16(2), bfloat16(0.f) );

diff --git a/test/half_float.cpp b/test/half_float.cpp
index 09ce856..b301b37 100644
--- a/test/half_float.cpp
+++ b/test/half_float.cpp

@@ -11,10 +11,6 @@
 
 #include <Eigen/src/Core/arch/Default/Half.h>
 
-#define VERIFY_HALF_BITS_EQUAL(h, bits)                    \
-  VERIFY_IS_EQUAL((numext::bit_cast<numext::uint16_t>(h)), \
-                  (static_cast<numext::uint16_t>(bits)))
-
 // Make sure it's possible to forward declare Eigen::half
 namespace Eigen {
 struct half;
@@ -26,51 +22,75 @@
 {
   using Eigen::half_impl::__half_raw;
 
-  // Round-trip bit-cast with uint16.
-  VERIFY_IS_EQUAL(
-      numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(1.0f))),
-      half(1.0f));
-  VERIFY_IS_EQUAL(
-      numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(0.5f))),
-      half(0.5f));
-  VERIFY_IS_EQUAL(numext::bit_cast<half>(
-                      numext::bit_cast<numext::uint16_t>(half(-0.33333f))),
-                  half(-0.33333f));
-  VERIFY_IS_EQUAL(
-      numext::bit_cast<half>(numext::bit_cast<numext::uint16_t>(half(0.0f))),
-      half(0.0f));
-
+  // We don't use a uint16_t raw member x if the platform has native Arm __fp16
+  // support
+#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
   // Conversion from float.
-  VERIFY_HALF_BITS_EQUAL(half(1.0f), 0x3c00);
-  VERIFY_HALF_BITS_EQUAL(half(0.5f), 0x3800);
-  VERIFY_HALF_BITS_EQUAL(half(0.33333f), 0x3555);
-  VERIFY_HALF_BITS_EQUAL(half(0.0f), 0x0000);
-  VERIFY_HALF_BITS_EQUAL(half(-0.0f), 0x8000);
-  VERIFY_HALF_BITS_EQUAL(half(65504.0f), 0x7bff);
-  VERIFY_HALF_BITS_EQUAL(half(65536.0f), 0x7c00);  // Becomes infinity.
+  VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
+  VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
+  VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
+  VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
+  VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
+  VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
+  VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00);  // Becomes infinity.
 
   // Denormals.
-  VERIFY_HALF_BITS_EQUAL(half(-5.96046e-08f), 0x8001);
-  VERIFY_HALF_BITS_EQUAL(half(5.96046e-08f), 0x0001);
-  VERIFY_HALF_BITS_EQUAL(half(1.19209e-07f), 0x0002);
+  VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
+  VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
+  VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
 
   // Verify round-to-nearest-even behavior.
   float val1 = float(half(__half_raw(0x3c00)));
   float val2 = float(half(__half_raw(0x3c01)));
   float val3 = float(half(__half_raw(0x3c02)));
-  VERIFY_HALF_BITS_EQUAL(half(0.5f * (val1 + val2)), 0x3c00);
-  VERIFY_HALF_BITS_EQUAL(half(0.5f * (val2 + val3)), 0x3c02);
+  VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00);
+  VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02);
 
   // Conversion from int.
-  VERIFY_HALF_BITS_EQUAL(half(-1), 0xbc00);
-  VERIFY_HALF_BITS_EQUAL(half(0), 0x0000);
-  VERIFY_HALF_BITS_EQUAL(half(1), 0x3c00);
-  VERIFY_HALF_BITS_EQUAL(half(2), 0x4000);
-  VERIFY_HALF_BITS_EQUAL(half(3), 0x4200);
+  VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
+  VERIFY_IS_EQUAL(half(0).x, 0x0000);
+  VERIFY_IS_EQUAL(half(1).x, 0x3c00);
+  VERIFY_IS_EQUAL(half(2).x, 0x4000);
+  VERIFY_IS_EQUAL(half(3).x, 0x4200);
 
   // Conversion from bool.
-  VERIFY_HALF_BITS_EQUAL(half(false), 0x0000);
-  VERIFY_HALF_BITS_EQUAL(half(true), 0x3c00);
+  VERIFY_IS_EQUAL(half(false).x, 0x0000);
+  VERIFY_IS_EQUAL(half(true).x, 0x3c00);
+#endif
+
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+   // Conversion from float.
+  VERIFY_IS_EQUAL(half(1.0f).x, __fp16(1.0f));
+  VERIFY_IS_EQUAL(half(0.5f).x, __fp16(0.5f));
+  VERIFY_IS_EQUAL(half(0.33333f).x, __fp16(0.33333f));
+  VERIFY_IS_EQUAL(half(0.0f).x, __fp16(0.0f));
+  VERIFY_IS_EQUAL(half(-0.0f).x, __fp16(-0.0f));
+  VERIFY_IS_EQUAL(half(65504.0f).x, __fp16(65504.0f));
+  VERIFY_IS_EQUAL(half(65536.0f).x, __fp16(65536.0f));  // Becomes infinity.
+
+  // Denormals.
+  VERIFY_IS_EQUAL(half(-5.96046e-08f).x, __fp16(-5.96046e-08f));
+  VERIFY_IS_EQUAL(half(5.96046e-08f).x, __fp16(5.96046e-08f));
+  VERIFY_IS_EQUAL(half(1.19209e-07f).x, __fp16(1.19209e-07f));
+
+  // Verify round-to-nearest-even behavior.
+  float val1 = float(half(__half_raw(0x3c00)));
+  float val2 = float(half(__half_raw(0x3c01)));
+  float val3 = float(half(__half_raw(0x3c02)));
+  VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, __fp16(0.5f * (val1 + val2)));
+  VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, __fp16(0.5f * (val2 + val3)));
+
+  // Conversion from int.
+  VERIFY_IS_EQUAL(half(-1).x, __fp16(-1));
+  VERIFY_IS_EQUAL(half(0).x, __fp16(0));
+  VERIFY_IS_EQUAL(half(1).x, __fp16(1));
+  VERIFY_IS_EQUAL(half(2).x, __fp16(2));
+  VERIFY_IS_EQUAL(half(3).x, __fp16(3));
+
+  // Conversion from bool.
+  VERIFY_IS_EQUAL(half(false).x, __fp16(false));
+  VERIFY_IS_EQUAL(half(true).x, __fp16(true));
+#endif
 
   // Conversion to float.
   VERIFY_IS_EQUAL(float(half(__half_raw(0x0000))), 0.0f);
@@ -123,57 +143,24 @@
 
 void test_numtraits()
 {
-  std::cout << "epsilon       = " << NumTraits<half>::epsilon() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<half>::epsilon())
-            << ")" << std::endl;
-  std::cout << "highest       = " << NumTraits<half>::highest() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<half>::highest())
-            << ")" << std::endl;
-  std::cout << "lowest        = " << NumTraits<half>::lowest() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<half>::lowest())
-            << ")" << std::endl;
-  std::cout << "min           = " << (std::numeric_limits<half>::min)()
-            << "  (0x" << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   half((std::numeric_limits<half>::min)()))
-            << ")" << std::endl;
-  std::cout << "denorm min    = " << (std::numeric_limits<half>::denorm_min)()
-            << "  (0x" << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   half((std::numeric_limits<half>::denorm_min)()))
-            << ")" << std::endl;
-  std::cout << "infinity      = " << NumTraits<half>::infinity() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<half>::infinity())
-            << ")" << std::endl;
-  std::cout << "quiet nan     = " << NumTraits<half>::quiet_NaN() << "  (0x"
-            << std::hex
-            << numext::bit_cast<numext::uint16_t>(NumTraits<half>::quiet_NaN())
-            << ")" << std::endl;
-  std::cout << "signaling nan = " << std::numeric_limits<half>::signaling_NaN()
-            << "  (0x" << std::hex
-            << numext::bit_cast<numext::uint16_t>(
-                   std::numeric_limits<half>::signaling_NaN())
-            << ")" << std::endl;
+  std::cout << "epsilon       = " << NumTraits<half>::epsilon() << "  (0x" << std::hex << NumTraits<half>::epsilon().x << ")" << std::endl;
+  std::cout << "highest       = " << NumTraits<half>::highest() << "  (0x" << std::hex << NumTraits<half>::highest().x << ")" << std::endl;
+  std::cout << "lowest        = " << NumTraits<half>::lowest() << "  (0x" << std::hex << NumTraits<half>::lowest().x << ")" << std::endl;
+  std::cout << "min           = " << (std::numeric_limits<half>::min)() << "  (0x" << std::hex << half((std::numeric_limits<half>::min)()).x << ")" << std::endl;
+  std::cout << "denorm min    = " << (std::numeric_limits<half>::denorm_min)() << "  (0x" << std::hex << half((std::numeric_limits<half>::denorm_min)()).x << ")" << std::endl;
+  std::cout << "infinity      = " << NumTraits<half>::infinity() << "  (0x" << std::hex << NumTraits<half>::infinity().x << ")" << std::endl;
+  std::cout << "quiet nan     = " << NumTraits<half>::quiet_NaN() << "  (0x" << std::hex << NumTraits<half>::quiet_NaN().x << ")" << std::endl;
+  std::cout << "signaling nan = " << std::numeric_limits<half>::signaling_NaN() << "  (0x" << std::hex << std::numeric_limits<half>::signaling_NaN().x << ")" << std::endl;
 
   VERIFY(NumTraits<half>::IsSigned);
 
-  VERIFY_IS_EQUAL(
-      numext::bit_cast<numext::uint16_t>(std::numeric_limits<half>::infinity()),
-      numext::bit_cast<numext::uint16_t>(
-          half(std::numeric_limits<float>::infinity())));
-  VERIFY_IS_EQUAL(numext::bit_cast<numext::uint16_t>(
-                      std::numeric_limits<half>::quiet_NaN()),
-                  numext::bit_cast<numext::uint16_t>(
-                      half(std::numeric_limits<float>::quiet_NaN())));
-  VERIFY_IS_EQUAL(numext::bit_cast<numext::uint16_t>(
-                      std::numeric_limits<half>::signaling_NaN()),
-                  numext::bit_cast<numext::uint16_t>(
-                      half(std::numeric_limits<float>::signaling_NaN())));
+  VERIFY_IS_EQUAL( std::numeric_limits<half>::infinity().x, half(std::numeric_limits<float>::infinity()).x );
 
+// If we have a native fp16 types this becomes a nan == nan comparision so we have to disable it
+#if !defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+  VERIFY_IS_EQUAL( std::numeric_limits<half>::quiet_NaN().x, half(std::numeric_limits<float>::quiet_NaN()).x );
+  VERIFY_IS_EQUAL( std::numeric_limits<half>::signaling_NaN().x, half(std::numeric_limits<float>::signaling_NaN()).x );
+#endif
   VERIFY( (std::numeric_limits<half>::min)() > half(0.f) );
   VERIFY( (std::numeric_limits<half>::denorm_min)() > half(0.f) );
   VERIFY( (std::numeric_limits<half>::min)()/half(2) > half(0.f) );

diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index feef148..81425b8 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp

@@ -1073,7 +1073,7 @@
     CALL_SUBTEST_10(test::runner<uint64_t>::run());
     CALL_SUBTEST_11(test::runner<std::complex<float> >::run());
     CALL_SUBTEST_12(test::runner<std::complex<double> >::run());
-    CALL_SUBTEST_13(test::runner<half>::run());
+    CALL_SUBTEST_13((packetmath<half, internal::packet_traits<half>::type>()));
     CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
     CALL_SUBTEST_15(test::runner<bfloat16>::run());
     g_first_pass = false;

diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 11310d6..c8caebe 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp

@@ -100,7 +100,6 @@
     VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose());
     VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose());
 
-#ifndef EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
     // make sure the right product implementation is called:
     if((!SparseMatrixType::IsRowMajor) && m2.rows()<=m3.cols())
     {
@@ -108,7 +107,6 @@
       VERIFY_EVALUATION_COUNT(m4 = (m2*m3).pruned(0), 1);
       VERIFY_EVALUATION_COUNT(m4 = (m2*m3).eval().pruned(0), 4);
     }
-#endif
 
     // and that pruning is effective:
     {
@@ -153,7 +151,7 @@
     VERIFY_IS_APPROX(dm4.noalias()-=m2*refMat3, refMat4-=refMat2*refMat3);
     VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3));
     VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5);
-
+    
     // sparse * dense vector
     VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3.col(0), refMat4.col(0)=refMat2*refMat3.col(0));
     VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3t.transpose().col(0), refMat4.col(0)=refMat2*refMat3t.transpose().col(0));
@@ -184,7 +182,7 @@
       VERIFY_IS_APPROX( m4=m2.middleCols(c,1)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
       VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count());
       VERIFY_IS_APPROX(dm4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
-
+      
       VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.col(c).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose());
       VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count());
       VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.middleCols(c,1).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose());
@@ -213,23 +211,23 @@
     }
 
     VERIFY_IS_APPROX(m6=m6*m6, refMat6=refMat6*refMat6);
-
+    
     // sparse matrix * sparse vector
     ColSpVector cv0(cols), cv1;
     DenseVector dcv0(cols), dcv1;
     initSparse(2*density,dcv0, cv0);
-
+    
     RowSpVector rv0(depth), rv1;
     RowDenseVector drv0(depth), drv1(rv1);
     initSparse(2*density,drv0, rv0);
 
-    VERIFY_IS_APPROX(cv1 = m3 * cv0, dcv1 = refMat3 * dcv0);
+    VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);    
     VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3);
     VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0);
     VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
     VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0);
   }
-
+  
   // test matrix - diagonal product
   {
     DenseMatrix refM2 = DenseMatrix::Zero(rows, cols);
@@ -245,7 +243,7 @@
     VERIFY_IS_APPROX(m3=m2.transpose()*d2, refM3=refM2.transpose()*d2);
     VERIFY_IS_APPROX(m3=d2*m2, refM3=d2*refM2);
     VERIFY_IS_APPROX(m3=d1*m2.transpose(), refM3=d1*refM2.transpose());
-
+    
     // also check with a SparseWrapper:
     DenseVector v1 = DenseVector::Random(cols);
     DenseVector v2 = DenseVector::Random(rows);
@@ -254,12 +252,12 @@
     VERIFY_IS_APPROX(m3=m2.transpose()*v2.asDiagonal(), refM3=refM2.transpose()*v2.asDiagonal());
     VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2, refM3=v2.asDiagonal()*refM2);
     VERIFY_IS_APPROX(m3=v1.asDiagonal()*m2.transpose(), refM3=v1.asDiagonal()*refM2.transpose());
-
+    
     VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2*v1.asDiagonal(), refM3=v2.asDiagonal()*refM2*v1.asDiagonal());
 
     VERIFY_IS_APPROX(v2=m2*v1.asDiagonal()*v1, refM2*v1.asDiagonal()*v1);
     VERIFY_IS_APPROX(v3=v2.asDiagonal()*m2*v1, v2.asDiagonal()*refM2*v1);
-
+    
     // evaluate to a dense matrix to check the .row() and .col() iterator functions
     VERIFY_IS_APPROX(d3=m2*d1, refM3=refM2*d1);
     VERIFY_IS_APPROX(d3=m2.transpose()*d2, refM3=refM2.transpose()*d2);
@@ -312,20 +310,20 @@
     VERIFY_IS_APPROX(x.noalias()+=mUp.template selfadjointView<Upper>()*b, refX+=refS*b);
     VERIFY_IS_APPROX(x.noalias()-=mLo.template selfadjointView<Lower>()*b, refX-=refS*b);
     VERIFY_IS_APPROX(x.noalias()+=mS.template selfadjointView<Upper|Lower>()*b, refX+=refS*b);
-
+    
     // sparse selfadjointView with sparse matrices
     SparseMatrixType mSres(rows,rows);
     VERIFY_IS_APPROX(mSres = mLo.template selfadjointView<Lower>()*mS,
                      refX = refLo.template selfadjointView<Lower>()*refS);
     VERIFY_IS_APPROX(mSres = mS * mLo.template selfadjointView<Lower>(),
                      refX = refS * refLo.template selfadjointView<Lower>());
-
+    
     // sparse triangularView with dense matrices
     VERIFY_IS_APPROX(x=mA.template triangularView<Upper>()*b, refX=refA.template triangularView<Upper>()*b);
     VERIFY_IS_APPROX(x=mA.template triangularView<Lower>()*b, refX=refA.template triangularView<Lower>()*b);
     VERIFY_IS_APPROX(x=b*mA.template triangularView<Upper>(), refX=b*refA.template triangularView<Upper>());
     VERIFY_IS_APPROX(x=b*mA.template triangularView<Lower>(), refX=b*refA.template triangularView<Lower>());
-
+    
     // sparse triangularView with sparse matrices
     VERIFY_IS_APPROX(mSres = mA.template triangularView<Lower>()*mS,   refX = refA.template triangularView<Lower>()*refS);
     VERIFY_IS_APPROX(mSres = mS * mA.template triangularView<Lower>(), refX = refS * refA.template triangularView<Lower>());
@@ -370,9 +368,9 @@
 
   Vector d(1);
   d[0] = 2;
-
+  
   double res = 2;
-
+  
   VERIFY_IS_APPROX( ( cmA*d.asDiagonal() ).eval().coeff(0,0), res );
   VERIFY_IS_APPROX( ( d.asDiagonal()*rmA ).eval().coeff(0,0), res );
   VERIFY_IS_APPROX( ( rmA*d.asDiagonal() ).eval().coeff(0,0), res );

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index c413d65..ea286fe 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h

@@ -91,22 +91,24 @@
 
 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state, uint64_t stream) {
-  // Generate 10 random bits for the mantissa, merge with exponent.
+  Eigen::half result;
+  // Generate 10 random bits for the mantissa
   unsigned rnd = PCG_XSH_RS_generator(state, stream);
-  const uint16_t half_bits =
-      static_cast<uint16_t>(rnd & 0x3ffu) | (static_cast<uint16_t>(15) << 10);
-  Eigen::half result = Eigen::numext::bit_cast<Eigen::half>(half_bits);
+  result.x = static_cast<uint16_t>(rnd & 0x3ffu);
+  // Set the exponent
+  result.x |= (static_cast<uint16_t>(15) << 10);
   // Return the final result
   return result - Eigen::half(1.0f);
 }
 
 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 Eigen::bfloat16 RandomToTypeUniform<Eigen::bfloat16>(uint64_t* state, uint64_t stream) {
-  // Generate 7 random bits for the mantissa, merge with exponent.
+  Eigen::bfloat16 result;
+  // Generate 7 random bits for the mantissa
   unsigned rnd = PCG_XSH_RS_generator(state, stream);
-  const uint16_t half_bits =
-      static_cast<uint16_t>(rnd & 0x7fu) | (static_cast<uint16_t>(127) << 7);
-  Eigen::bfloat16 result = Eigen::numext::bit_cast<Eigen::bfloat16>(half_bits);
+  result.value = static_cast<uint16_t>(rnd & 0x7fu);
+  // Set the exponent
+  result.value |= (static_cast<uint16_t>(127) << 7);
   // Return the final result
   return result - Eigen::bfloat16(1.0f);
 }
@@ -167,24 +169,19 @@
       uint64_t seed = 0) {
     m_state = PCG_XSH_RS_state(seed);
     #ifdef EIGEN_USE_SYCL
-    // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
+    // In SYCL it is not possible to build PCG_XSH_RS_state in one step. 
     // Therefor, we need two step to initializate the m_state.
     // IN SYCL, the constructor of the functor is s called on the CPU
-    // and we get the clock seed here from the CPU. However, This seed is
-    // the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc
-    // is not a global function.
-    // and only  available on the Operator() function (which is called on the
-    // GPU). Thus for CUDA (((CLOCK  + global_thread_id)*
-    // 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread
-    // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL)
-    // is passed to each thread and each thread adds the  (global_thread_id*
-    // 6364136223846793005ULL) for itself only once, in order to complete the
-    // construction similar to CUDA Therefore, the thread Id injection is not
-    // available at this stage.
-    // However when the operator() is called the thread ID will be avilable. So
-    // inside the opeator,
-    // we add the thrreadID, BlockId,... (which is equivalent of i)
-    // to the seed and construct the unique m_state per thead similar to cuda.
+    // and we get the clock seed here from the CPU. However, This seed is 
+    //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
+    // and only  available on the Operator() function (which is called on the GPU).
+    // Thus for CUDA (((CLOCK  + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread 
+    // but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds  
+    // the  (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction 
+    // similar to CUDA Therefore, the thread Id injection is not available at this stage. 
+    //However when the operator() is called the thread ID will be avilable. So inside the opeator, 
+    // we add the thrreadID, BlockId,... (which is equivalent of i) 
+    //to the seed and construct the unique m_state per thead similar to cuda.  
     m_exec_once =false;
    #endif
   }
@@ -285,18 +282,16 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
     m_state = PCG_XSH_RS_state(seed);
     #ifdef EIGEN_USE_SYCL
-    // In SYCL it is not possible to build PCG_XSH_RS_state in one step.
+    // In SYCL it is not possible to build PCG_XSH_RS_state in one step. 
     // Therefor, we need two steps to initializate the m_state.
     // IN SYCL, the constructor of the functor is s called on the CPU
-    // and we get the clock seed here from the CPU. However, This seed is
-    // the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc
-    // is not a global function.
-    // and only  available on the Operator() function (which is called on the
-    // GPU). Therefore, the thread Id injection is not available at this stage.
-    // However when the operator()
-    // is called the thread ID will be avilable. So inside the opeator,
-    // we add the thrreadID, BlockId,... (which is equivalent of i)
-    // to the seed and construct the unique m_state per thead similar to cuda.
+    // and we get the clock seed here from the CPU. However, This seed is 
+    //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
+    // and only  available on the Operator() function (which is called on the GPU).
+    // Therefore, the thread Id injection is not available at this stage. However when the operator() 
+    //is called the thread ID will be avilable. So inside the opeator, 
+    // we add the thrreadID, BlockId,... (which is equivalent of i) 
+    //to the seed and construct the unique m_state per thead similar to cuda.  
     m_exec_once =false;
    #endif
   }

diff --git a/unsupported/test/cxx11_tensor_random.cpp b/unsupported/test/cxx11_tensor_random.cpp
index 95dd614..4740d58 100644
--- a/unsupported/test/cxx11_tensor_random.cpp
+++ b/unsupported/test/cxx11_tensor_random.cpp

@@ -11,9 +11,9 @@
 
 #include <Eigen/CXX11/Tensor>
 
-template <typename Scalar>
-static void test_default() {
-  Tensor<Scalar, 1> vec(6);
+static void test_default()
+{
+  Tensor<float, 1> vec(6);
   vec.setRandom();
 
   // Fixme: we should check that the generated numbers follow a uniform
@@ -23,10 +23,10 @@
   }
 }
 
-template <typename Scalar>
-static void test_normal() {
-  Tensor<Scalar, 1> vec(6);
-  vec.template setRandom<Eigen::internal::NormalRandomGenerator<Scalar>>();
+static void test_normal()
+{
+  Tensor<float, 1> vec(6);
+  vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>();
 
   // Fixme: we should check that the generated numbers follow a gaussian
   // distribution instead.
@@ -35,6 +35,7 @@
   }
 }
 
+
 struct MyGenerator {
   MyGenerator() { }
   MyGenerator(const MyGenerator&) { }
@@ -71,13 +72,7 @@
 
 EIGEN_DECLARE_TEST(cxx11_tensor_random)
 {
-  CALL_SUBTEST((test_default<float>()));
-  CALL_SUBTEST((test_normal<float>()));
-  CALL_SUBTEST((test_default<double>()));
-  CALL_SUBTEST((test_normal<double>()));
-  CALL_SUBTEST((test_default<Eigen::half>()));
-  CALL_SUBTEST((test_normal<Eigen::half>()));
-  CALL_SUBTEST((test_default<Eigen::bfloat16>()));
-  CALL_SUBTEST((test_normal<Eigen::bfloat16>()));
+  CALL_SUBTEST(test_default());
+  CALL_SUBTEST(test_normal());
   CALL_SUBTEST(test_custom());
 }

diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp
index cbb799a..b5d656f 100644
--- a/unsupported/test/sparse_extra.cpp
+++ b/unsupported/test/sparse_extra.cpp

@@ -22,9 +22,6 @@
 #endif
 
 #define EIGEN_NO_DEPRECATED_WARNING
-// Disable counting of temporaries, since sparse_product(DynamicSparseMatrix)
-// has an extra copy-assignment.
-#define EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
 #include "sparse_product.cpp"
 
 #if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
commit	9dd607ae3fca3838404b3454d47edfba4901840e	[log] [tgz]
author	Googler <noreply@google.com>	Fri Nov 20 04:26:04 2020 -0800
committer	Antonio Sanchez <cantonios@google.com>	Wed Jun 30 15:17:10 2021 -0700
tree	4b993c6f31b93a80bafa5e3aea0ed818c5af6ebc
parent	f4db0a59f0f90a0e787565123c75e6cb6aa1013f [diff]