Update Eigen to commit:59b3ef5409452095e27e8d39a2ca81139bb8c971
CHANGELOG
=========
59b3ef540 - Partially Vectorize Cast
PiperOrigin-RevId: 539191252
Change-Id: I46c7dde9ca32e92c5bca4d7193321600b1e9fa6d
diff --git a/Eigen/Core b/Eigen/Core
index 1e7e38c..1a9b470 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -186,6 +186,7 @@
// Generic half float support
#include "src/Core/arch/Default/Half.h"
#include "src/Core/arch/Default/BFloat16.h"
+#include "src/Core/arch/Default/TypeCasting.h"
#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
#if defined EIGEN_VECTORIZE_AVX512
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 32371c5..e233efb 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -621,207 +621,6 @@
Data m_d;
};
-// ----------------------- Casting ---------------------
-
-template <typename SrcType, typename DstType, typename ArgType>
-struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, IndexBased> {
- using CastOp = core_cast_op<SrcType, DstType>;
- using XprType = CwiseUnaryOp<CastOp, ArgType>;
-
- // Use the largest packet type by default
- using SrcPacketType = typename packet_traits<SrcType>::type;
- static constexpr int SrcPacketSize = unpacket_traits<SrcPacketType>::size;
- static constexpr int SrcPacketBytes = SrcPacketSize * sizeof(SrcType);
-
- enum {
- CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<CastOp>::Cost),
- PacketAccess = functor_traits<CastOp>::PacketAccess,
- ActualPacketAccessBit = PacketAccess ? PacketAccessBit : 0,
- Flags = evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | ActualPacketAccessBit),
- IsRowMajor = (evaluator<ArgType>::Flags & RowMajorBit),
- Alignment = evaluator<ArgType>::Alignment
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& xpr)
- : m_argImpl(xpr.nestedExpression()), m_rows(xpr.rows()), m_cols(xpr.cols()) {
- EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<CastOp>::Cost);
- EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
- }
-
- template <typename DstPacketType>
- using AltSrcScalarOp = std::enable_if_t<(unpacket_traits<DstPacketType>::size < SrcPacketSize && !find_packet_by_size<SrcType, unpacket_traits<DstPacketType>::size>::value), bool>;
- template <typename DstPacketType>
- using SrcPacketArgs1 = std::enable_if_t<(find_packet_by_size<SrcType, unpacket_traits<DstPacketType>::size>::value), bool>;
- template <typename DstPacketType>
- using SrcPacketArgs2 = std::enable_if_t<(unpacket_traits<DstPacketType>::size) == (2 * SrcPacketSize), bool>;
- template <typename DstPacketType>
- using SrcPacketArgs4 = std::enable_if_t<(unpacket_traits<DstPacketType>::size) == (4 * SrcPacketSize), bool>;
- template <typename DstPacketType>
- using SrcPacketArgs8 = std::enable_if_t<(unpacket_traits<DstPacketType>::size) == (8 * SrcPacketSize), bool>;
-
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index, Index col, Index packetSize) const {
- return col + packetSize <= cols();
- }
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index, Index packetSize) const {
- return row + packetSize <= rows();
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index packetSize) const {
- return index + packetSize <= size();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index row, Index col, Index offset) const {
- Index actualRow = IsRowMajor ? row : row + offset;
- Index actualCol = IsRowMajor ? col + offset : col;
- return m_argImpl.coeff(actualRow, actualCol);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index index, Index offset) const {
- Index actualIndex = index + offset;
- return m_argImpl.coeff(actualIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index row, Index col) const {
- return cast<SrcType, DstType>(srcCoeff(row, col, 0));
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index index) const { return cast<SrcType, DstType>(srcCoeff(index, 0)); }
-
- template <int LoadMode, typename PacketType = SrcPacketType>
- EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const {
- constexpr int PacketSize = unpacket_traits<PacketType>::size;
- Index actualRow = IsRowMajor ? row : row + (offset * PacketSize);
- Index actualCol = IsRowMajor ? col + (offset * PacketSize) : col;
- eigen_assert(check_array_bounds(actualRow, actualCol, PacketSize) && "Array index out of bounds");
- return m_argImpl.template packet<LoadMode, PacketType>(actualRow, actualCol);
- }
- template <int LoadMode, typename PacketType = SrcPacketType>
- EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const {
- constexpr int PacketSize = unpacket_traits<PacketType>::size;
- Index actualIndex = index + (offset * PacketSize);
- eigen_assert(check_array_bounds(actualIndex, PacketSize) && "Array index out of bounds");
- return m_argImpl.template packet<LoadMode, PacketType>(actualIndex);
- }
-
- // There is no source packet type with equal or fewer elements than DstPacketType.
- // This is problematic as the evaluation loop may attempt to access data outside the bounds of the array.
- // For example, consider the cast utilizing pcast<Packet4f,Packet2d> with an array of size 4: {0.0f,1.0f,2.0f,3.0f}.
- // The first iteration of the evaulation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which is acceptable.
- // The second iteration will load 16 bytes: {2.0f,3.0f,?,?}, which is outside the bounds of the array.
-
- // Instead, perform runtime check to determine if the load would access data outside the bounds of the array.
- // If not, perform full load. Otherwise, revert to a scalar loop to perform a partial load.
- // In either case, perform a vectorized cast of the source packet.
- template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
- constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
- constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
- constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
- SrcPacketType src;
- if (EIGEN_PREDICT_TRUE(check_array_bounds(row, col, SrcPacketSize))) {
- src = srcPacket<SrcLoadMode>(row, col, 0);
- } else {
- Array<SrcType, SrcPacketSize, 1> srcArray;
- for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(row, col, k);
- for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0);
- src = pload<SrcPacketType>(srcArray.data());
- }
- return pcast<SrcPacketType, DstPacketType>(src);
- }
- // Use the source packet type with the same size as DstPacketType, if it exists
- template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
- constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
- using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
- constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
- constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
- return pcast<SizedSrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode, SizedSrcPacketType>(row, col, 0));
- }
- // unpacket_traits<DstPacketType>::size == 2 * SrcPacketSize
- template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(row, col, 0), srcPacket<SrcLoadMode>(row, col, 1));
- }
- // unpacket_traits<DstPacketType>::size == 4 * SrcPacketSize
- template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(row, col, 0), srcPacket<SrcLoadMode>(row, col, 1),
- srcPacket<SrcLoadMode>(row, col, 2), srcPacket<SrcLoadMode>(row, col, 3));
- }
- // unpacket_traits<DstPacketType>::size == 8 * SrcPacketSize
- template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(row, col, 0), srcPacket<SrcLoadMode>(row, col, 1),
- srcPacket<SrcLoadMode>(row, col, 2), srcPacket<SrcLoadMode>(row, col, 3),
- srcPacket<SrcLoadMode>(row, col, 4), srcPacket<SrcLoadMode>(row, col, 5),
- srcPacket<SrcLoadMode>(row, col, 6), srcPacket<SrcLoadMode>(row, col, 7));
- }
-
- // Analagous routines for linear access.
- template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
- constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
- constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
- constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
- SrcPacketType src;
- if (EIGEN_PREDICT_TRUE(check_array_bounds(index, SrcPacketSize))) {
- src = srcPacket<SrcLoadMode>(index, 0);
- } else {
- Array<SrcType, SrcPacketSize, 1> srcArray;
- for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(index, k);
- for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0);
- src = pload<SrcPacketType>(srcArray.data());
- }
- return pcast<SrcPacketType, DstPacketType>(src);
- }
- template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
- constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
- using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
- constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
- constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
- return pcast<SizedSrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode, SizedSrcPacketType>(index, 0));
- }
- template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1));
- }
- template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1),
- srcPacket<SrcLoadMode>(index, 2), srcPacket<SrcLoadMode>(index, 3));
- }
- template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
- EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
- constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
- return pcast<SrcPacketType, DstPacketType>(
- srcPacket<SrcLoadMode>(index, 0), srcPacket<SrcLoadMode>(index, 1),
- srcPacket<SrcLoadMode>(index, 2), srcPacket<SrcLoadMode>(index, 3),
- srcPacket<SrcLoadMode>(index, 4), srcPacket<SrcLoadMode>(index, 5),
- srcPacket<SrcLoadMode>(index, 6), srcPacket<SrcLoadMode>(index, 7));
- }
-
- constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows; }
- constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols; }
- constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_rows * m_cols; }
-
- protected:
- const evaluator<ArgType> m_argImpl;
- const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows;
- const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols;
-};
-
// -------------------- CwiseTernaryOp --------------------
// this is a ternary expression
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index bfc7ae6..8cb80bb 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -146,67 +146,14 @@
template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
-/** \internal A convenience utility for determining if the type is a scalar.
- * This is used to enable some generic packet implementations.
- */
-template <typename Packet>
-struct is_scalar {
- using Scalar = typename unpacket_traits<Packet>::type;
- enum { value = internal::is_same<Packet, Scalar>::value };
-};
-
-// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
-// 1) the packets are the same type, or
-// 2) the packets differ only in sign.
-// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
-template <typename SrcPacket, typename TgtPacket,
- bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
-struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
-template <>
-struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
-template <>
-struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
-template <>
-struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
-template <>
-struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
-
-template <typename SrcPacket, typename TgtPacket>
-struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
- using SrcScalar = typename unpacket_traits<SrcPacket>::type;
- static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
- using TgtScalar = typename unpacket_traits<TgtPacket>::type;
- static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
- static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
-};
-
-// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
-template <typename SrcPacket, typename TgtPacket>
-struct is_degenerate {
- static constexpr bool value =
- is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
-};
-
-template <typename Packet>
-struct is_half {
- using Scalar = typename unpacket_traits<Packet>::type;
- static constexpr int Size = unpacket_traits<Packet>::size;
- using DefaultPacket = typename packet_traits<Scalar>::type;
- static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
- static constexpr bool value = Size < DefaultSize;
-};
-
-template <typename Src, typename Tgt>
-struct type_casting_traits {
+template <typename Src, typename Tgt> struct type_casting_traits {
enum {
- VectorizedCast =
- is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
+ VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
-
/** \internal Wrapper to ensure that multiple packet types can map to the same
same underlying vector type. */
template<typename T, int unique_id = 0>
@@ -224,84 +171,45 @@
T m_val;
};
-template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
-struct preinterpret_generic;
-template <typename Target, typename Packet>
-struct preinterpret_generic<Target, Packet, false> {
- // the packets are not the same, attempt scalar bit_cast
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
- return numext::bit_cast<Target, Packet>(a);
- }
+/** \internal A convenience utility for determining if the type is a scalar.
+ * This is used to enable some generic packet implementations.
+ */
+template<typename Packet>
+struct is_scalar {
+ using Scalar = typename unpacket_traits<Packet>::type;
+ enum {
+ value = internal::is_same<Packet, Scalar>::value
+ };
};
-template <typename Packet>
-struct preinterpret_generic<Packet, Packet, true> {
- // the packets are the same type: do nothing
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
-};
-
-/** \internal \returns reinterpret_cast<Target>(a) */
-template <typename Target, typename Packet>
-EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
- return preinterpret_generic<Target, Packet>::run(a);
-}
-
-template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value, bool TgtIsHalf = is_half<TgtPacket>::value>
-struct pcast_generic;
-
-template <typename SrcPacket, typename TgtPacket>
-struct pcast_generic<SrcPacket, TgtPacket, false, false> {
- // the packets are not degenerate: attempt scalar static_cast
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
- return cast_impl<SrcPacket, TgtPacket>::run(a);
- }
-};
-
-template <typename Packet>
-struct pcast_generic<Packet, Packet, true, false> {
- // the packets are the same: do nothing
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
-};
-
-template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
-struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
- // the packets are degenerate: preinterpret is equivalent to pcast
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
-};
-
-
-
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
- return pcast_generic<SrcPacket, TgtPacket>::run(a);
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+ return static_cast<TgtPacket>(a);
}
template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
- return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
+ return static_cast<TgtPacket>(a);
}
template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
- const SrcPacket& d) {
- return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
+ return static_cast<TgtPacket>(a);
}
template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
- const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
- const SrcPacket& h) {
- return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
+ const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
+ return static_cast<TgtPacket>(a);
}
-template <typename SrcPacket, typename TgtPacket>
-struct pcast_generic<SrcPacket, TgtPacket, false, true> {
- // TgtPacket is a half packet of some other type
- // perform cast and truncate result
- using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
- return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
- }
-};
+/** \internal \returns reinterpret_cast<Target>(a) */
+template <typename Target, typename Packet>
+EIGEN_DEVICE_FUNC inline Target
+preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index d7851e3..40ee3f5 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -430,13 +430,6 @@
}
};
-template <typename OldType>
-struct cast_impl<OldType, bool> {
- EIGEN_DEVICE_FUNC
- static inline bool run(const OldType& x) { return x != OldType(0); }
-};
-
-
// Casting from S -> Complex<T> leads to an implicit conversion from S to T,
// generating warnings on clang. Here we explicitly cast the real component.
template<typename OldType, typename NewType>
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index 461f3a6..386543e 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -80,14 +80,6 @@
return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
}
-template <> EIGEN_STRONG_INLINE Packet4f pcast<Packet4d, Packet4f>(const Packet4d& a) {
- return _mm256_cvtpd_ps(a);
-}
-
-template <> EIGEN_STRONG_INLINE Packet4i pcast<Packet4d, Packet4i>(const Packet4d& a) {
- return _mm256_cvttpd_epi32(a);
-}
-
template <>
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
const Packet8f& b) {
@@ -126,44 +118,6 @@
return _mm256_castsi256_ps(a);
}
-template<> EIGEN_STRONG_INLINE Packet8ui preinterpret<Packet8ui, Packet8i>(const Packet8i& a) {
- return Packet8ui(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i, Packet8ui>(const Packet8ui& a) {
- return Packet8i(a);
-}
-
-// truncation operations
-
-template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet8f>(const Packet8f& a) {
- return _mm256_castps256_ps128(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4d>(const Packet4d& a) {
- return _mm256_castpd256_pd128(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet8i>(const Packet8i& a) {
- return _mm256_castsi256_si128(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet8ui>(const Packet8ui& a) {
- return _mm256_castsi256_si128(a);
-}
-
-
-#ifdef EIGEN_VECTORIZE_AVX2
-template<> EIGEN_STRONG_INLINE Packet4ul preinterpret<Packet4ul, Packet4l>(const Packet4l& a) {
- return Packet4ul(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4ul>(const Packet4ul& a) {
- return Packet4l(a);
-}
-
-#endif
-
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
return half2float(a);
}
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h
index 2f38d7f..02e6335 100644
--- a/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -59,13 +59,6 @@
return cat256i(_mm512_cvttpd_epi32(a), _mm512_cvttpd_epi32(b));
}
-template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8d, Packet8i>(const Packet8d& a) {
- return _mm512_cvtpd_epi32(a);
-}
-template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8d, Packet8f>(const Packet8d& a) {
- return _mm512_cvtpd_ps(a);
-}
-
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
return _mm512_castps_si512(a);
}
@@ -114,19 +107,12 @@
return _mm512_castpd128_pd512(a);
}
-template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i, Packet16i>(const Packet16i& a) {
- return _mm512_castsi512_si256(a);
-}
-template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet16i>(const Packet16i& a) {
- return _mm512_castsi512_si128(a);
+template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16f>(const Packet16f& a) {
+ return a;
}
-template<> EIGEN_STRONG_INLINE Packet8h preinterpret<Packet8h, Packet16h>(const Packet16h& a) {
- return _mm256_castsi256_si128(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8bf preinterpret<Packet8bf, Packet16bf>(const Packet16bf& a) {
- return _mm256_castsi256_si128(a);
+template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8d>(const Packet8d& a) {
+ return a;
}
#ifndef EIGEN_VECTORIZE_AVX512FP16
@@ -205,13 +191,6 @@
};
};
-template<> EIGEN_STRONG_INLINE Packet16h preinterpret<Packet16h, Packet32h>(const Packet32h& a) {
- return _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0));
-}
-template<> EIGEN_STRONG_INLINE Packet8h preinterpret<Packet8h, Packet32h>(const Packet32h& a) {
- return _mm256_castsi256_si128(preinterpret<Packet16h>(a));
-}
-
template <>
EIGEN_STRONG_INLINE Packet16f pcast<Packet32h, Packet16f>(const Packet32h& a) {
// Discard second-half of input.
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 17ce135..c8ca33a 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -1014,49 +1014,4 @@
} // end namespace std
#endif
-namespace Eigen {
-namespace internal {
-
-template <>
-struct cast_impl<float, half> {
- EIGEN_DEVICE_FUNC
- static inline half run(const float& a) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
- (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
- return __float2half(a);
-#else
- return half(a);
-#endif
- }
-};
-
-template <>
-struct cast_impl<int, half> {
- EIGEN_DEVICE_FUNC
- static inline half run(const int& a) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
- (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
- return __float2half(static_cast<float>(a));
-#else
- return half(static_cast<float>(a));
-#endif
- }
-};
-
-template <>
-struct cast_impl<half, float> {
- EIGEN_DEVICE_FUNC
- static inline float run(const half& a) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
- (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
- return __half2float(a);
-#else
- return static_cast<float>(a);
-#endif
- }
-};
-
-} // namespace internal
-} // namespace Eigen
-
#endif // EIGEN_HALF_H
diff --git a/Eigen/src/Core/arch/Default/TypeCasting.h b/Eigen/src/Core/arch/Default/TypeCasting.h
new file mode 100644
index 0000000..dc779a7
--- /dev/null
+++ b/Eigen/src/Core/arch/Default/TypeCasting.h
@@ -0,0 +1,116 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERIC_TYPE_CASTING_H
+#define EIGEN_GENERIC_TYPE_CASTING_H
+
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+template<>
+struct scalar_cast_op<float, Eigen::half> {
+ typedef Eigen::half result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
+ #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+ return __float2half(a);
+ #else
+ return Eigen::half(a);
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<float, Eigen::half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<int, Eigen::half> {
+ typedef Eigen::half result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
+ #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+ return __float2half(static_cast<float>(a));
+ #else
+ return Eigen::half(static_cast<float>(a));
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<int, Eigen::half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<Eigen::half, float> {
+ typedef float result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
+ #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
+ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
+ return __half2float(a);
+ #else
+ return static_cast<float>(a);
+ #endif
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<Eigen::half, float> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<float, Eigen::bfloat16> {
+ typedef Eigen::bfloat16 result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const float& a) const {
+ return Eigen::bfloat16(a);
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<float, Eigen::bfloat16> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<int, Eigen::bfloat16> {
+ typedef Eigen::bfloat16 result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 operator() (const int& a) const {
+ return Eigen::bfloat16(static_cast<float>(a));
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<int, Eigen::bfloat16> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<Eigen::bfloat16, float> {
+ typedef float result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::bfloat16& a) const {
+ return static_cast<float>(a);
+ }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<Eigen::bfloat16, float> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+}
+}
+
+#endif // EIGEN_GENERIC_TYPE_CASTING_H
diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h
index 834fcf5..add31b9 100644
--- a/Eigen/src/Core/arch/NEON/TypeCasting.h
+++ b/Eigen/src/Core/arch/NEON/TypeCasting.h
@@ -17,61 +17,6 @@
namespace internal {
-
-//==============================================================================
-// preinterpret (truncation operations)
-//==============================================================================
-
-template <>
-EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet16c>(const Packet16c& a) {
- return Packet8c(vget_low_s8(a));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet8c>(const Packet8c& a) {
- return Packet4c(vget_lane_s32(vreinterpret_s32_s8(a), 0));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet16c>(const Packet16c& a) {
- return preinterpret<Packet4c>(preinterpret<Packet8c>(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet16uc>(const Packet16uc& a) {
- return Packet8uc(vget_low_u8(a));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet8uc>(const Packet8uc& a) {
- return Packet4uc(vget_lane_u32(vreinterpret_u32_u8(a), 0));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet16uc>(const Packet16uc& a) {
- return preinterpret<Packet4uc>(preinterpret<Packet8uc>(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet8s>(const Packet8s& a) {
- return Packet4s(vget_low_s16(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet8us>(const Packet8us& a) {
- return Packet4us(vget_low_u16(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet4i>(const Packet4i& a) {
- return Packet2i(vget_low_s32(a));
-}
-template <>
-EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet4ui>(const Packet4ui& a) {
- return Packet2ui(vget_low_u32(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet4f>(const Packet4f& a) {
- return Packet2f(vget_low_f32(a));
-}
-
//==============================================================================
// preinterpret
//==============================================================================
@@ -92,7 +37,6 @@
return Packet4f(vreinterpretq_f32_u32(a));
}
-
template <>
EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
return static_cast<Packet4c>(a);
@@ -106,7 +50,6 @@
return Packet16c(vreinterpretq_s8_u8(a));
}
-
template <>
EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
return static_cast<Packet4uc>(a);
@@ -128,6 +71,7 @@
EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
return Packet8s(vreinterpretq_s16_u16(a));
}
+
template <>
EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
return Packet4us(vreinterpret_u16_s16(a));
@@ -183,7 +127,18 @@
//==============================================================================
// pcast, SrcType = float
//==============================================================================
-
+template <>
+struct type_casting_traits<float, float> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet4f pcast<Packet4f, Packet4f>(const Packet4f& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet2f pcast<Packet2f, Packet2f>(const Packet2f& a) {
+ return a;
+}
template <>
struct type_casting_traits<float, numext::int64_t> {
@@ -201,18 +156,10 @@
return vcvtq_s64_f64(vcvt_f64_f32(vget_low_f32(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
- return vcvtq_s64_f64(vcvt_f64_f32(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
// Discard second half of input.
return vcvtq_u64_f64(vcvt_f64_f32(vget_low_f32(a)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
- return vcvtq_u64_f64(vcvt_f64_f32(a));
-}
#else
template <>
EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
@@ -220,19 +167,10 @@
return vmovl_s32(vget_low_s32(vcvtq_s32_f32(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet2l pcast<Packet2f, Packet2l>(const Packet2f& a) {
- return vmovl_s32(vcvt_s32_f32(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
// Discard second half of input.
return vmovl_u32(vget_low_u32(vcvtq_u32_f32(a)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f, Packet2ul>(const Packet2f& a) {
- // Discard second half of input.
- return vmovl_u32(vcvt_u32_f32(a));
-}
#endif // EIGEN_ARCH_ARM64
template <>
@@ -270,10 +208,6 @@
return vcombine_s16(vmovn_s32(vcvtq_s32_f32(a)), vmovn_s32(vcvtq_s32_f32(b)));
}
template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet4f, Packet4s>(const Packet4f& a) {
- return vmovn_s32(vcvtq_s32_f32(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet2f, Packet4s>(const Packet2f& a, const Packet2f& b) {
return vmovn_s32(vcombine_s32(vcvt_s32_f32(a), vcvt_s32_f32(b)));
}
@@ -287,10 +221,6 @@
return vcombine_u16(vmovn_u32(vcvtq_u32_f32(a)), vmovn_u32(vcvtq_u32_f32(b)));
}
template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet4f, Packet4us>(const Packet4f& a) {
- return vmovn_u32(vcvtq_u32_f32(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet4us pcast<Packet2f, Packet4us>(const Packet2f& a, const Packet2f& b) {
return vmovn_u32(vcombine_u32(vcvt_u32_f32(a), vcvt_u32_f32(b)));
}
@@ -307,25 +237,12 @@
return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
}
template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet4f, Packet8c>(const Packet4f& a, const Packet4f& b) {
- const int16x8_t ab_s16 = pcast<Packet4f, Packet8s>(a, b);
- return vmovn_s16(ab_s16);
-}
-template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet2f, Packet8c>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
const Packet2f& d) {
const int16x4_t ab_s16 = pcast<Packet2f, Packet4s>(a, b);
const int16x4_t cd_s16 = pcast<Packet2f, Packet4s>(c, d);
return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet4f, Packet4c>(const Packet4f& a) {
- const int32x4_t a_s32x4 = vcvtq_s32_f32(a);
- const int16x4_t a_s16x4 = vmovn_s32(a_s32x4);
- const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
- const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
- return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
-}
template <>
struct type_casting_traits<float, numext::uint8_t> {
@@ -334,20 +251,16 @@
template <>
EIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
const Packet4f& d) {
- return preinterpret<Packet16uc>(pcast<Packet4f, Packet16c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet4f, Packet8uc>(const Packet4f& a, const Packet4f& b) {
- return preinterpret<Packet8uc>(pcast<Packet4f, Packet8c>(a, b));
+ const uint16x8_t ab_u16 = pcast<Packet4f, Packet8us>(a, b);
+ const uint16x8_t cd_u16 = pcast<Packet4f, Packet8us>(c, d);
+ return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
}
template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet2f, Packet8uc>(const Packet2f& a, const Packet2f& b, const Packet2f& c,
const Packet2f& d) {
- return preinterpret<Packet8uc>(pcast<Packet2f, Packet8c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet4f, Packet4uc>(const Packet4f& a) {
- return static_cast<Packet4uc>(pcast<Packet4f, Packet4c>(a));
+ const uint16x4_t ab_u16 = pcast<Packet2f, Packet4us>(a, b);
+ const uint16x4_t cd_u16 = pcast<Packet2f, Packet4us>(c, d);
+ return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
}
//==============================================================================
@@ -363,10 +276,6 @@
return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a)))));
}
template <>
-EIGEN_STRONG_INLINE Packet4f pcast<Packet4c, Packet4f>(const Packet4c& a) {
- return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a))))));
-}
-template <>
EIGEN_STRONG_INLINE Packet2f pcast<Packet8c, Packet2f>(const Packet8c& a) {
// Discard all but first 2 bytes.
return vcvt_f32_s32(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a)))));
@@ -401,20 +310,11 @@
return vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(a))));
}
template <>
-EIGEN_STRONG_INLINE Packet4i pcast<Packet8c, Packet4i>(const Packet8c& a) {
- return vmovl_s16(vget_low_s16(vmovl_s8(a)));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i pcast<Packet4c, Packet4i>(const Packet4c& a) {
- return pcast<Packet8c, Packet4i>(vreinterpret_s8_s32(vdup_n_s32(a)));
-}
-template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet8c, Packet2i>(const Packet8c& a) {
// Discard all but first 2 bytes.
return vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(a))));
}
-
template <>
struct type_casting_traits<numext::int8_t, numext::uint32_t> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 4 };
@@ -427,10 +327,6 @@
EIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) {
return preinterpret<Packet2ui>(pcast<Packet8c, Packet2i>(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet4ui pcast<Packet4c, Packet4ui>(const Packet4c& a) {
- return preinterpret<Packet4ui>(pcast<Packet4c, Packet4i>(a));
-}
template <>
struct type_casting_traits<numext::int8_t, numext::int16_t> {
@@ -442,18 +338,10 @@
return vmovl_s8(vget_low_s8(a));
}
template <>
-EIGEN_STRONG_INLINE Packet8s pcast<Packet8c, Packet8s>(const Packet8c& a) {
- return vmovl_s8(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet8c, Packet4s>(const Packet8c& a) {
// Discard second half of input.
return vget_low_s16(vmovl_s8(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet4c, Packet4s>(const Packet4c& a) {
- return pcast<Packet8c, Packet4s>(vreinterpret_s8_s32(vdup_n_s32(a)));
-}
template <>
struct type_casting_traits<numext::int8_t, numext::uint16_t> {
@@ -464,18 +352,43 @@
return preinterpret<Packet8us>(pcast<Packet16c, Packet8s>(a));
}
template <>
-EIGEN_STRONG_INLINE Packet8us pcast<Packet8c, Packet8us>(const Packet8c& a) {
- return preinterpret<Packet8us>(pcast<Packet8c, Packet8s>(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {
return preinterpret<Packet4us>(pcast<Packet8c, Packet4s>(a));
}
+
template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet4c, Packet4us>(const Packet4c& a) {
- return preinterpret<Packet4us>(pcast<Packet4c, Packet4s>(a));
+struct type_casting_traits<numext::int8_t, numext::int8_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16c pcast<Packet16c, Packet16c>(const Packet16c& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8c pcast<Packet8c, Packet8c>(const Packet8c& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet4c pcast<Packet4c, Packet4c>(const Packet4c& a) {
+ return a;
}
+template <>
+struct type_casting_traits<numext::int8_t, numext::uint8_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16uc pcast<Packet16c, Packet16uc>(const Packet16c& a) {
+ return preinterpret<Packet16uc>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8uc pcast<Packet8c, Packet8uc>(const Packet8c& a) {
+ return preinterpret<Packet8uc>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4uc pcast<Packet4c, Packet4uc>(const Packet4c& a) {
+ return static_cast<Packet4uc>(a);
+}
//==============================================================================
// pcast, SrcType = uint8_t
@@ -490,10 +403,6 @@
return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a)))));
}
template <>
-EIGEN_STRONG_INLINE Packet4f pcast<Packet4uc, Packet4f>(const Packet4uc& a) {
- return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))));
-}
-template <>
EIGEN_STRONG_INLINE Packet2f pcast<Packet8uc, Packet2f>(const Packet8uc& a) {
// Discard all but first 2 bytes.
return vcvt_f32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a)))));
@@ -528,18 +437,10 @@
return vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(a))));
}
template <>
-EIGEN_STRONG_INLINE Packet4ui pcast<Packet8uc, Packet4ui>(const Packet8uc& a) {
- return vmovl_u16(vget_low_u16(vmovl_u8(a)));
-}
-template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet8uc, Packet2ui>(const Packet8uc& a) {
// Discard all but first 2 bytes.
return vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(a))));
}
-template <>
-EIGEN_STRONG_INLINE Packet4ui pcast<Packet4uc, Packet4ui>(const Packet4uc& a) {
- return pcast<Packet8uc, Packet4ui>(vreinterpret_u8_u32(vdup_n_u32(a)));
-}
template <>
struct type_casting_traits<numext::uint8_t, numext::int32_t> {
@@ -553,10 +454,6 @@
EIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) {
return preinterpret<Packet2i>(pcast<Packet8uc, Packet2ui>(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet4i pcast<Packet4uc, Packet4i>(const Packet4uc& a) {
- return preinterpret<Packet4i>(pcast<Packet4uc, Packet4ui>(a));
-}
template <>
struct type_casting_traits<numext::uint8_t, numext::uint16_t> {
@@ -568,14 +465,10 @@
return vmovl_u8(vget_low_u8(a));
}
template <>
-EIGEN_STRONG_INLINE Packet8us pcast<Packet8uc, Packet8us>(const Packet8uc& a) {
- return vmovl_u8(a);
+EIGEN_STRONG_INLINE Packet4us pcast<Packet8uc, Packet4us>(const Packet8uc& a) {
+ // Discard second half of input.
+ return vget_low_u16(vmovl_u8(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet4uc, Packet4us>(const Packet4uc& a) {
- return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))));
-}
-
template <>
struct type_casting_traits<numext::uint8_t, numext::int16_t> {
@@ -586,14 +479,43 @@
return preinterpret<Packet8s>(pcast<Packet16uc, Packet8us>(a));
}
template <>
-EIGEN_STRONG_INLINE Packet8s pcast<Packet8uc, Packet8s>(const Packet8uc& a) {
- return preinterpret<Packet8s>(pcast<Packet8uc, Packet8us>(a));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet4uc, Packet4s>(const Packet4uc& a) {
- return preinterpret<Packet4s>(pcast<Packet4uc, Packet4us>(a));
+EIGEN_STRONG_INLINE Packet4s pcast<Packet8uc, Packet4s>(const Packet8uc& a) {
+ return preinterpret<Packet4s>(pcast<Packet8uc, Packet4us>(a));
}
+template <>
+struct type_casting_traits<numext::uint8_t, numext::uint8_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16uc pcast<Packet16uc, Packet16uc>(const Packet16uc& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8uc pcast<Packet8uc, Packet8uc>(const Packet8uc& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet4uc pcast<Packet4uc, Packet4uc>(const Packet4uc& a) {
+ return a;
+}
+
+template <>
+struct type_casting_traits<numext::uint8_t, numext::int8_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16c pcast<Packet16uc, Packet16c>(const Packet16uc& a) {
+ return preinterpret<Packet16c>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8c pcast<Packet8uc, Packet8c>(const Packet8uc& a) {
+ return preinterpret<Packet8c>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4c pcast<Packet4uc, Packet4c>(const Packet4uc& a) {
+ return static_cast<Packet4c>(a);
+}
//==============================================================================
// pcast, SrcType = int16_t
@@ -608,10 +530,6 @@
return vcvtq_f32_s32(vmovl_s16(vget_low_s16(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet4f pcast<Packet4s, Packet4f>(const Packet4s& a) {
- return vcvtq_f32_s32(vmovl_s16(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2f pcast<Packet4s, Packet2f>(const Packet4s& a) {
// Discard second half of input.
return vcvt_f32_s32(vget_low_s32(vmovl_s16(a)));
@@ -646,10 +564,6 @@
return vmovl_s16(vget_low_s16(a));
}
template <>
-EIGEN_STRONG_INLINE Packet4i pcast<Packet4s, Packet4i>(const Packet4s& a) {
- return vmovl_s16(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet4s, Packet2i>(const Packet4s& a) {
// Discard second half of input.
return vget_low_s32(vmovl_s16(a));
@@ -664,14 +578,35 @@
return preinterpret<Packet4ui>(pcast<Packet8s, Packet4i>(a));
}
template <>
-EIGEN_STRONG_INLINE Packet4ui pcast<Packet4s, Packet4ui>(const Packet4s& a) {
- return preinterpret<Packet4ui>(pcast<Packet4s, Packet4i>(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {
return preinterpret<Packet2ui>(pcast<Packet4s, Packet2i>(a));
}
+template <>
+struct type_casting_traits<numext::int16_t, numext::int16_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet8s pcast<Packet8s, Packet8s>(const Packet8s& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet4s pcast<Packet4s, Packet4s>(const Packet4s& a) {
+ return a;
+}
+
+template <>
+struct type_casting_traits<numext::int16_t, numext::uint16_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet8us pcast<Packet8s, Packet8us>(const Packet8s& a) {
+ return preinterpret<Packet8us>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4us pcast<Packet4s, Packet4us>(const Packet4s& a) {
+ return preinterpret<Packet4us>(a);
+}
template <>
struct type_casting_traits<numext::int16_t, numext::int8_t> {
@@ -682,18 +617,9 @@
return vcombine_s8(vmovn_s16(a), vmovn_s16(b));
}
template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet8s, Packet8c>(const Packet8s& a) {
- return vmovn_s16(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet4s, Packet8c>(const Packet4s& a, const Packet4s& b) {
return vmovn_s16(vcombine_s16(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet4s, Packet4c>(const Packet4s& a) {
- const int8x8_t aa_s8x8 = pcast<Packet4s, Packet8c>(a, a);
- return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
-}
template <>
struct type_casting_traits<numext::int16_t, numext::uint8_t> {
@@ -701,19 +627,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) {
- return preinterpret<Packet16uc>(pcast<Packet8s, Packet16c>(a, b));
-}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet8s, Packet8uc>(const Packet8s& a) {
- return preinterpret<Packet8uc>(pcast<Packet8s, Packet8c>(a));
+ return vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(a)), vmovn_u16(vreinterpretq_u16_s16(b)));
}
template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet4s, Packet8uc>(const Packet4s& a, const Packet4s& b) {
- return preinterpret<Packet8uc>(pcast<Packet4s, Packet8c>(a, b));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet4s, Packet4uc>(const Packet4s& a) {
- return static_cast<Packet4uc>(pcast<Packet4s, Packet4c>(a));
+ return vmovn_u16(vcombine_u16(vreinterpret_u16_s16(a), vreinterpret_u16_s16(b)));
}
//==============================================================================
@@ -729,10 +647,6 @@
return vcvtq_f32_u32(vmovl_u16(vget_low_u16(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet4f pcast<Packet4us, Packet4f>(const Packet4us& a) {
- return vcvtq_f32_u32(vmovl_u16(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2f pcast<Packet4us, Packet2f>(const Packet4us& a) {
// Discard second half of input.
return vcvt_f32_u32(vget_low_u32(vmovl_u16(a)));
@@ -767,10 +681,6 @@
return vmovl_u16(vget_low_u16(a));
}
template <>
-EIGEN_STRONG_INLINE Packet4ui pcast<Packet4us, Packet4ui>(const Packet4us& a) {
- return vmovl_u16(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet4us, Packet2ui>(const Packet4us& a) {
// Discard second half of input.
return vget_low_u32(vmovl_u16(a));
@@ -785,14 +695,35 @@
return preinterpret<Packet4i>(pcast<Packet8us, Packet4ui>(a));
}
template <>
-EIGEN_STRONG_INLINE Packet4i pcast<Packet4us, Packet4i>(const Packet4us& a) {
- return preinterpret<Packet4i>(pcast<Packet4us, Packet4ui>(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {
return preinterpret<Packet2i>(pcast<Packet4us, Packet2ui>(a));
}
+template <>
+struct type_casting_traits<numext::uint16_t, numext::uint16_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet8us pcast<Packet8us, Packet8us>(const Packet8us& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet4us pcast<Packet4us, Packet4us>(const Packet4us& a) {
+ return a;
+}
+
+template <>
+struct type_casting_traits<numext::uint16_t, numext::int16_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet8s pcast<Packet8us, Packet8s>(const Packet8us& a) {
+ return preinterpret<Packet8s>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet4s pcast<Packet4us, Packet4s>(const Packet4us& a) {
+ return preinterpret<Packet4s>(a);
+}
template <>
struct type_casting_traits<numext::uint16_t, numext::uint8_t> {
@@ -803,18 +734,9 @@
return vcombine_u8(vmovn_u16(a), vmovn_u16(b));
}
template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet8us, Packet8uc>(const Packet8us& a) {
- return vmovn_u16(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet4us, Packet8uc>(const Packet4us& a, const Packet4us& b) {
return vmovn_u16(vcombine_u16(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet4us, Packet4uc>(const Packet4us& a) {
- uint8x8_t aa_u8x8 = pcast<Packet4us, Packet8uc>(a, a);
- return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
-}
template <>
struct type_casting_traits<numext::uint16_t, numext::int8_t> {
@@ -825,17 +747,9 @@
return preinterpret<Packet16c>(pcast<Packet8us, Packet16uc>(a, b));
}
template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet8us, Packet8c>(const Packet8us& a) {
- return preinterpret<Packet8c>(pcast<Packet8us, Packet8uc>(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) {
return preinterpret<Packet8c>(pcast<Packet4us, Packet8uc>(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet4us, Packet4c>(const Packet4us& a) {
- return static_cast<Packet4c>(pcast<Packet4us, Packet4uc>(a));
-}
//==============================================================================
// pcast, SrcType = int32_t
@@ -862,10 +776,6 @@
// Discard second half of input.
return vmovl_s32(vget_low_s32(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet2l pcast<Packet2i, Packet2l>(const Packet2i& a) {
- return vmovl_s32(a);
-}
template <>
struct type_casting_traits<numext::int32_t, numext::uint64_t> {
@@ -875,11 +785,32 @@
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
return preinterpret<Packet2ul>(pcast<Packet4i, Packet2l>(a));
}
+
template <>
-EIGEN_STRONG_INLINE Packet2ul pcast<Packet2i, Packet2ul>(const Packet2i& a) {
- return preinterpret<Packet2ul>(pcast<Packet2i, Packet2l>(a));
+struct type_casting_traits<numext::int32_t, numext::int32_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet4i pcast<Packet4i, Packet4i>(const Packet4i& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet2i pcast<Packet2i, Packet2i>(const Packet2i& a) {
+ return a;
}
+template <>
+struct type_casting_traits<numext::int32_t, numext::uint32_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet4ui pcast<Packet4i, Packet4ui>(const Packet4i& a) {
+ return preinterpret<Packet4ui>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2ui pcast<Packet2i, Packet2ui>(const Packet2i& a) {
+ return preinterpret<Packet2ui>(a);
+}
template <>
struct type_casting_traits<numext::int32_t, numext::int16_t> {
@@ -890,10 +821,6 @@
return vcombine_s16(vmovn_s32(a), vmovn_s32(b));
}
template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet4i, Packet4s>(const Packet4i& a) {
- return vmovn_s32(a);
-}
-template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet2i, Packet4s>(const Packet2i& a, const Packet2i& b) {
return vmovn_s32(vcombine_s32(a, b));
}
@@ -907,10 +834,6 @@
return vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(a)), vmovn_u32(vreinterpretq_u32_s32(b)));
}
template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet4i, Packet4us>(const Packet4i& a) {
- return vmovn_u32(vreinterpretq_u32_s32(a));
-}
-template <>
EIGEN_STRONG_INLINE Packet4us pcast<Packet2i, Packet4us>(const Packet2i& a, const Packet2i& b) {
return vmovn_u32(vreinterpretq_u32_s32(vcombine_s32(a, b)));
}
@@ -927,24 +850,12 @@
return vcombine_s8(vmovn_s16(ab_s16), vmovn_s16(cd_s16));
}
template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet4i, Packet8c>(const Packet4i& a, const Packet4i& b) {
- const int16x8_t ab_s16 = pcast<Packet4i, Packet8s>(a, b);
- return vmovn_s16(ab_s16);
-}
-template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet2i, Packet8c>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
const Packet2i& d) {
const int16x4_t ab_s16 = vmovn_s32(vcombine_s32(a, b));
const int16x4_t cd_s16 = vmovn_s32(vcombine_s32(c, d));
return vmovn_s16(vcombine_s16(ab_s16, cd_s16));
}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet4i, Packet4c>(const Packet4i& a) {
- const int16x4_t a_s16x4 = vmovn_s32(a);
- const int16x8_t aa_s16x8 = vcombine_s16(a_s16x4, a_s16x4);
- const int8x8_t aa_s8x8 = vmovn_s16(aa_s16x8);
- return vget_lane_s32(vreinterpret_s32_s8(aa_s8x8), 0);
-}
template <>
struct type_casting_traits<numext::int32_t, numext::uint8_t> {
@@ -953,20 +864,16 @@
template <>
EIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
const Packet4i& d) {
- return preinterpret<Packet16uc>(pcast<Packet4i, Packet16c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet4i, Packet8uc>(const Packet4i& a, const Packet4i& b) {
- return preinterpret<Packet8uc>(pcast<Packet4i, Packet8c>(a, b));
+ const uint16x8_t ab_u16 = pcast<Packet4i, Packet8us>(a, b);
+ const uint16x8_t cd_u16 = pcast<Packet4i, Packet8us>(c, d);
+ return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
}
template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet2i, Packet8uc>(const Packet2i& a, const Packet2i& b, const Packet2i& c,
const Packet2i& d) {
- return preinterpret<Packet8uc>(pcast<Packet2i, Packet8c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet4i, Packet4uc>(const Packet4i& a) {
- return static_cast<Packet4uc>(pcast<Packet4i, Packet4c>(a));
+ const uint16x4_t ab_u16 = pcast<Packet2i, Packet4us>(a, b);
+ const uint16x4_t cd_u16 = pcast<Packet2i, Packet4us>(c, d);
+ return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
}
//==============================================================================
@@ -994,10 +901,6 @@
// Discard second half of input.
return vmovl_u32(vget_low_u32(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ui, Packet2ul>(const Packet2ui& a) {
- return vmovl_u32(a);
-}
template <>
struct type_casting_traits<numext::uint32_t, numext::int64_t> {
@@ -1007,11 +910,32 @@
EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {
return preinterpret<Packet2l>(pcast<Packet4ui, Packet2ul>(a));
}
+
template <>
-EIGEN_STRONG_INLINE Packet2l pcast<Packet2ui, Packet2l>(const Packet2ui& a) {
- return preinterpret<Packet2l>(pcast<Packet2ui, Packet2ul>(a));
+struct type_casting_traits<numext::uint32_t, numext::uint32_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet4ui pcast<Packet4ui, Packet4ui>(const Packet4ui& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ui, Packet2ui>(const Packet2ui& a) {
+ return a;
}
+template <>
+struct type_casting_traits<numext::uint32_t, numext::int32_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet4i pcast<Packet4ui, Packet4i>(const Packet4ui& a) {
+ return preinterpret<Packet4i>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet2i pcast<Packet2ui, Packet2i>(const Packet2ui& a) {
+ return preinterpret<Packet2i>(a);
+}
template <>
struct type_casting_traits<numext::uint32_t, numext::uint16_t> {
@@ -1025,10 +949,6 @@
EIGEN_STRONG_INLINE Packet4us pcast<Packet2ui, Packet4us>(const Packet2ui& a, const Packet2ui& b) {
return vmovn_u32(vcombine_u32(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet4ui, Packet4us>(const Packet4ui& a) {
- return vmovn_u32(a);
-}
template <>
struct type_casting_traits<numext::uint32_t, numext::int16_t> {
@@ -1042,10 +962,6 @@
EIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) {
return preinterpret<Packet4s>(pcast<Packet2ui, Packet4us>(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet4ui, Packet4s>(const Packet4ui& a) {
- return preinterpret<Packet4s>(pcast<Packet4ui, Packet4us>(a));
-}
template <>
struct type_casting_traits<numext::uint32_t, numext::uint8_t> {
@@ -1059,24 +975,12 @@
return vcombine_u8(vmovn_u16(ab_u16), vmovn_u16(cd_u16));
}
template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet4ui, Packet8uc>(const Packet4ui& a, const Packet4ui& b) {
- const uint16x8_t ab_u16 = vcombine_u16(vmovn_u32(a), vmovn_u32(b));
- return vmovn_u16(ab_u16);
-}
-template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ui, Packet8uc>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
const Packet2ui& d) {
const uint16x4_t ab_u16 = vmovn_u32(vcombine_u32(a, b));
const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(c, d));
return vmovn_u16(vcombine_u16(ab_u16, cd_u16));
}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet4ui, Packet4uc>(const Packet4ui& a) {
- const uint16x4_t a_u16x4 = vmovn_u32(a);
- const uint16x8_t aa_u16x8 = vcombine_u16(a_u16x4, a_u16x4);
- const uint8x8_t aa_u8x8 = vmovn_u16(aa_u16x8);
- return vget_lane_u32(vreinterpret_u32_u8(aa_u8x8), 0);
-}
template <>
struct type_casting_traits<numext::uint32_t, numext::int8_t> {
@@ -1088,18 +992,10 @@
return preinterpret<Packet16c>(pcast<Packet4ui, Packet16uc>(a, b, c, d));
}
template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet4ui, Packet8c>(const Packet4ui& a, const Packet4ui& b) {
- return preinterpret<Packet8c>(pcast<Packet4ui, Packet8uc>(a, b));
-}
-template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
const Packet2ui& d) {
return preinterpret<Packet8c>(pcast<Packet2ui, Packet8uc>(a, b, c, d));
}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet4ui, Packet4c>(const Packet4ui& a) {
- return static_cast<Packet4c>(pcast<Packet4ui, Packet4uc>(a));
-}
//==============================================================================
// pcast, SrcType = int64_t
@@ -1112,11 +1008,24 @@
EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b)));
}
+
template <>
-EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) {
- return vcvt_f32_s32(vmovn_s64(a));
+struct type_casting_traits<numext::int64_t, numext::int64_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet2l pcast<Packet2l, Packet2l>(const Packet2l& a) {
+ return a;
}
+template <>
+struct type_casting_traits<numext::int64_t, numext::uint64_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet2ul pcast<Packet2l, Packet2ul>(const Packet2l& a) {
+ return preinterpret<Packet2ul>(a);
+}
template <>
struct type_casting_traits<numext::int64_t, numext::int32_t> {
@@ -1126,10 +1035,6 @@
EIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) {
return vcombine_s32(vmovn_s64(a), vmovn_s64(b));
}
-template <>
-EIGEN_STRONG_INLINE Packet2i pcast<Packet2l, Packet2i>(const Packet2l& a) {
- return vmovn_s64(a);
-}
template <>
struct type_casting_traits<numext::int64_t, numext::uint32_t> {
@@ -1139,10 +1044,6 @@
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) {
return vcombine_u32(vmovn_u64(vreinterpretq_u64_s64(a)), vmovn_u64(vreinterpretq_u64_s64(b)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ui pcast<Packet2l, Packet2ui>(const Packet2l& a) {
- return vmovn_u64(vreinterpretq_u64_s64(a));
-}
template <>
struct type_casting_traits<numext::int64_t, numext::int16_t> {
@@ -1155,11 +1056,6 @@
const int32x4_t cd_s32 = pcast<Packet2l, Packet4i>(c, d);
return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet2l, Packet4s>(const Packet2l& a, const Packet2l& b) {
- const int32x4_t ab_s32 = pcast<Packet2l, Packet4i>(a, b);
- return vmovn_s32(ab_s32);
-}
template <>
struct type_casting_traits<numext::int64_t, numext::uint16_t> {
@@ -1168,11 +1064,9 @@
template <>
EIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
const Packet2l& d) {
- return preinterpret<Packet8us>(pcast<Packet2l, Packet8s>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet2l, Packet4us>(const Packet2l& a, const Packet2l& b) {
- return preinterpret<Packet4us>(pcast<Packet2l, Packet4s>(a, b));
+ const uint32x4_t ab_u32 = pcast<Packet2l, Packet4ui>(a, b);
+ const uint32x4_t cd_u32 = pcast<Packet2l, Packet4ui>(c, d);
+ return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));
}
template <>
@@ -1187,19 +1081,6 @@
const int16x8_t efgh_s16 = pcast<Packet2l, Packet8s>(e, f, g, h);
return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
}
-template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet2l, Packet8c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
- const Packet2l& d) {
- const int16x8_t abcd_s16 = pcast<Packet2l, Packet8s>(a, b, c, d);
- return vmovn_s16(abcd_s16);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet2l, Packet4c>(const Packet2l& a, const Packet2l& b) {
- const int16x4_t ab_s16 = pcast<Packet2l, Packet4s>(a, b);
- const int16x8_t abab_s16 = vcombine_s16(ab_s16, ab_s16);
- const int8x8_t abab_s8 = vmovn_s16(abab_s16);
- return vget_lane_s32(vreinterpret_s32_s8(abab_s8), 0);
-}
template <>
struct type_casting_traits<numext::int64_t, numext::uint8_t> {
@@ -1213,15 +1094,6 @@
const uint16x8_t efgh_u16 = pcast<Packet2l, Packet8us>(e, f, g, h);
return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet2l, Packet8uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
- const Packet2l& d) {
- return preinterpret<Packet8uc>(pcast<Packet2l, Packet8c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet2l, Packet4uc>(const Packet2l& a, const Packet2l& b) {
- return static_cast<Packet4uc>(pcast<Packet2l, Packet4c>(a, b));
-}
//==============================================================================
// pcast, SrcType = uint64_t
@@ -1234,11 +1106,24 @@
EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {
return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
}
+
template <>
-EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul, Packet2f>(const Packet2ul& a) {
- return vcvt_f32_u32(vmovn_u64(a));
+struct type_casting_traits<numext::uint64_t, numext::uint64_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ul, Packet2ul>(const Packet2ul& a) {
+ return a;
}
+template <>
+struct type_casting_traits<numext::uint64_t, numext::int64_t> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet2l pcast<Packet2ul, Packet2l>(const Packet2ul& a) {
+ return preinterpret<Packet2l>(a);
+}
template <>
struct type_casting_traits<numext::uint64_t, numext::uint32_t> {
@@ -1248,10 +1133,6 @@
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) {
return vcombine_u32(vmovn_u64(a), vmovn_u64(b));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ul, Packet2ui>(const Packet2ul& a) {
- return vmovn_u64(a);
-}
template <>
struct type_casting_traits<numext::uint64_t, numext::int32_t> {
@@ -1261,10 +1142,6 @@
EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {
return preinterpret<Packet4i>(pcast<Packet2ul, Packet4ui>(a, b));
}
-template <>
-EIGEN_STRONG_INLINE Packet2i pcast<Packet2ul, Packet2i>(const Packet2ul& a) {
- return preinterpret<Packet2i>(pcast<Packet2ul, Packet2ui>(a));
-}
template <>
struct type_casting_traits<numext::uint64_t, numext::uint16_t> {
@@ -1277,10 +1154,6 @@
const uint16x4_t cd_u16 = vmovn_u32(vcombine_u32(vmovn_u64(c), vmovn_u64(d)));
return vcombine_u16(ab_u16, cd_u16);
}
-template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet2ul, Packet4us>(const Packet2ul& a, const Packet2ul& b) {
- return vmovn_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b)));
-}
template <>
struct type_casting_traits<numext::uint64_t, numext::int16_t> {
@@ -1291,10 +1164,6 @@
const Packet2ul& d) {
return preinterpret<Packet8s>(pcast<Packet2ul, Packet8us>(a, b, c, d));
}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet2ul, Packet4s>(const Packet2ul& a, const Packet2ul& b) {
- return preinterpret<Packet4s>(pcast<Packet2ul, Packet4us>(a, b));
-}
template <>
struct type_casting_traits<numext::uint64_t, numext::uint8_t> {
@@ -1308,19 +1177,6 @@
const uint16x8_t efgh_u16 = pcast<Packet2ul, Packet8us>(e, f, g, h);
return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet2ul, Packet8uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
- const Packet2ul& d) {
- const uint16x8_t abcd_u16 = pcast<Packet2ul, Packet8us>(a, b, c, d);
- return vmovn_u16(abcd_u16);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet2ul, Packet4uc>(const Packet2ul& a, const Packet2ul& b) {
- const uint16x4_t ab_u16 = pcast<Packet2ul, Packet4us>(a, b);
- const uint16x8_t abab_u16 = vcombine_u16(ab_u16, ab_u16);
- const uint8x8_t abab_u8 = vmovn_u16(abab_u16);
- return vget_lane_u32(vreinterpret_u32_u8(abab_u8), 0);
-}
template <>
struct type_casting_traits<numext::uint64_t, numext::int8_t> {
@@ -1332,15 +1188,6 @@
const Packet2ul& g, const Packet2ul& h) {
return preinterpret<Packet16c>(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
}
-template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet2ul, Packet8c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
- const Packet2ul& d) {
- return preinterpret<Packet8c>(pcast<Packet2ul, Packet8uc>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet2ul, Packet4c>(const Packet2ul& a, const Packet2ul& b) {
- return static_cast<Packet4c>(pcast<Packet2ul, Packet4uc>(a, b));
-}
#if EIGEN_ARCH_ARM64
@@ -1373,6 +1220,14 @@
return Packet4i(vreinterpretq_s32_f64(a));
}
+template <>
+struct type_casting_traits<double, double> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet2d pcast<Packet2d, Packet2d>(const Packet2d& a) {
+ return a;
+}
template <>
struct type_casting_traits<double, float> {
@@ -1382,10 +1237,6 @@
EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return vcombine_f32(vcvt_f32_f64(a), vcvt_f32_f64(b));
}
-template <>
-EIGEN_STRONG_INLINE Packet2f pcast<Packet2d, Packet2f>(const Packet2d& a) {
- return vcvt_f32_f64(a);
-}
template <>
struct type_casting_traits<double, numext::int64_t> {
@@ -1413,10 +1264,6 @@
EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
return vcombine_s32(vmovn_s64(vcvtq_s64_f64(a)), vmovn_s64(vcvtq_s64_f64(b)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2i pcast<Packet2d, Packet2i>(const Packet2d& a) {
- return vmovn_s64(vcvtq_s64_f64(a));
-}
template <>
struct type_casting_traits<double, numext::uint32_t> {
@@ -1426,10 +1273,6 @@
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) {
return vcombine_u32(vmovn_u64(vcvtq_u64_f64(a)), vmovn_u64(vcvtq_u64_f64(b)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2ui pcast<Packet2d, Packet2ui>(const Packet2d& a) {
- return vmovn_u64(vcvtq_u64_f64(a));
-}
template <>
struct type_casting_traits<double, numext::int16_t> {
@@ -1442,11 +1285,6 @@
const int32x4_t cd_s32 = pcast<Packet2d, Packet4i>(c, d);
return vcombine_s16(vmovn_s32(ab_s32), vmovn_s32(cd_s32));
}
-template <>
-EIGEN_STRONG_INLINE Packet4s pcast<Packet2d, Packet4s>(const Packet2d& a, const Packet2d& b) {
- const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
- return vmovn_s32(ab_s32);
-}
template <>
struct type_casting_traits<double, numext::uint16_t> {
@@ -1455,11 +1293,9 @@
template <>
EIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
const Packet2d& d) {
- return preinterpret<Packet8us>(pcast<Packet2d, Packet8s>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4us pcast<Packet2d, Packet4us>(const Packet2d& a, const Packet2d& b) {
- return preinterpret<Packet4us>(pcast<Packet2d, Packet4s>(a, b));
+ const uint32x4_t ab_u32 = pcast<Packet2d, Packet4ui>(a, b);
+ const uint32x4_t cd_u32 = pcast<Packet2d, Packet4ui>(c, d);
+ return vcombine_u16(vmovn_u32(ab_u32), vmovn_u32(cd_u32));
}
template <>
@@ -1474,17 +1310,6 @@
const int16x8_t efgh_s16 = pcast<Packet2d, Packet8s>(e, f, g, h);
return vcombine_s8(vmovn_s16(abcd_s16), vmovn_s16(efgh_s16));
}
-template <>
-EIGEN_STRONG_INLINE Packet8c pcast<Packet2d, Packet8c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
- const Packet2d& d) {
- const int16x8_t abcd_s16 = pcast<Packet2d, Packet8s>(a, b, c, d);
- return vmovn_s16(abcd_s16);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4c pcast<Packet2d, Packet4c>(const Packet2d& a, const Packet2d& b) {
- const int32x4_t ab_s32 = pcast<Packet2d, Packet4i>(a, b);
- return pcast<Packet4i, Packet4c>(ab_s32);
-}
template <>
struct type_casting_traits<double, numext::uint8_t> {
@@ -1498,15 +1323,6 @@
const uint16x8_t efgh_u16 = pcast<Packet2d, Packet8us>(e, f, g, h);
return vcombine_u8(vmovn_u16(abcd_u16), vmovn_u16(efgh_u16));
}
-template <>
-EIGEN_STRONG_INLINE Packet8uc pcast<Packet2d, Packet8uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
- const Packet2d& d) {
- return preinterpret<Packet8uc>(pcast<Packet2d, Packet8c>(a, b, c, d));
-}
-template <>
-EIGEN_STRONG_INLINE Packet4uc pcast<Packet2d, Packet4uc>(const Packet2d& a, const Packet2d& b) {
- return static_cast<Packet4uc>(pcast<Packet2d, Packet4c>(a, b));
-}
template <>
struct type_casting_traits<float, double> {
@@ -1517,10 +1333,6 @@
// Discard second-half of input.
return vcvt_f64_f32(vget_low_f32(a));
}
-template <>
-EIGEN_STRONG_INLINE Packet2d pcast<Packet2f, Packet2d>(const Packet2f& a) {
- return vcvt_f64_f32(a);
-}
template <>
struct type_casting_traits<numext::int8_t, double> {
@@ -1576,10 +1388,6 @@
// Discard second half of input.
return vcvtq_f64_s64(vmovl_s32(vget_low_s32(a)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2d pcast<Packet2i, Packet2d>(const Packet2i& a) {
- return vcvtq_f64_s64(vmovl_s32(a));
-}
template <>
struct type_casting_traits<numext::uint32_t, double> {
@@ -1590,10 +1398,6 @@
// Discard second half of input.
return vcvtq_f64_u64(vmovl_u32(vget_low_u32(a)));
}
-template <>
-EIGEN_STRONG_INLINE Packet2d pcast<Packet2ui, Packet2d>(const Packet2ui& a) {
- return vcvtq_f64_u64(vmovl_u32(a));
-}
template <>
struct type_casting_traits<numext::int64_t, double> {
diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
index 0b5aa1c..df5c72c 100644
--- a/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -135,13 +135,6 @@
return _mm_castpd_si128(a);
}
-template<> EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
- return Packet4ui(a);
-}
-
-template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
- return Packet4i(a);
-}
// Disable the following code since it's broken on too many platforms / compilers.
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
#if 0
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 4760d9b..8354c0a 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -179,29 +179,17 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
};
+template <typename Scalar>
+struct scalar_cast_op<Scalar, bool> {
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a) const { return a != Scalar(0); }
+};
+
template<typename Scalar, typename NewType>
struct functor_traits<scalar_cast_op<Scalar,NewType> >
{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
/** \internal
- * `core_cast_op` serves to distinguish the vectorized implementation from that of the legacy `scalar_cast_op` for backwards
- * compatibility. The manner in which packet ops are handled is defined by the specialized unary_evaluator:
- * `unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, IndexBased>` in CoreEvaluators.h
- * Otherwise, the non-vectorized behavior is identical to that of `scalar_cast_op`
- */
-template <typename SrcType, typename DstType>
-struct core_cast_op : scalar_cast_op<SrcType, DstType> {};
-
-template <typename SrcType, typename DstType>
-struct functor_traits<core_cast_op<SrcType, DstType>> {
- using CastingTraits = type_casting_traits<SrcType, DstType>;
- enum {
- Cost = is_same<SrcType, DstType>::value ? 0 : NumTraits<DstType>::AddCost,
- PacketAccess = CastingTraits::VectorizedCast && (CastingTraits::SrcCoeffRatio <= 8)
- };
-};
-
-/** \internal
* \brief Template functor to arithmetically shift a scalar right by a number of bits
*
* \sa class CwiseUnaryOp, MatrixBase::shift_right()
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 49fa37c..b5f91bf 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -190,30 +190,6 @@
typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
};
-template <int Size, typename PacketType,
- bool Stop = (Size == unpacket_traits<PacketType>::size) ||
- is_same<PacketType, typename unpacket_traits<PacketType>::half>::value>
-struct find_packet_by_size_helper;
-template <int Size, typename PacketType>
-struct find_packet_by_size_helper<Size, PacketType, true> {
- using type = PacketType;
-};
-template <int Size, typename PacketType>
-struct find_packet_by_size_helper<Size, PacketType, false> {
- using type = typename find_packet_by_size_helper<Size, typename unpacket_traits<PacketType>::half>::type;
-};
-
-template <typename T, int Size>
-struct find_packet_by_size {
- using type = typename find_packet_by_size_helper<Size, typename packet_traits<T>::type>::type;
- static constexpr bool value = (Size == unpacket_traits<type>::size);
-};
-template <typename T>
-struct find_packet_by_size<T, 1> {
- using type = typename unpacket_traits<T>::type;
- static constexpr bool value = (unpacket_traits<type>::size == 1);
-};
-
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
constexpr inline int compute_default_alignment_helper(int ArrayBytes, int AlignmentBytes) {
if((ArrayBytes % AlignmentBytes) == 0) {
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h
index 1c6b284..390759c 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -45,7 +45,7 @@
operator-() const { return NegativeReturnType(derived()); }
-template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::core_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
/// \returns an expression of \c *this with the \a Scalar type casted to
/// \a NewScalar.
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 1648c7c..989359f 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -9,7 +9,6 @@
#include <vector>
#include "main.h"
-#include "random_without_cast_overflow.h"
// suppress annoying unsigned integer warnings
template <typename Scalar, bool IsSigned = NumTraits<Scalar>::IsSigned>
@@ -1214,109 +1213,6 @@
typed_logicals_test_impl<ArrayType>::run(m);
}
-template <typename SrcType, typename DstType, int RowsAtCompileTime, int ColsAtCompileTime>
-struct cast_test_impl {
- using SrcArray = Array<SrcType, RowsAtCompileTime, ColsAtCompileTime>;
- using DstArray = Array<DstType, RowsAtCompileTime, ColsAtCompileTime>;
- struct RandomOp {
- inline SrcType operator()(const SrcType&) const {
- return internal::random_without_cast_overflow<SrcType, DstType>::value();
- }
- };
-
- static constexpr int SrcPacketSize = internal::packet_traits<SrcType>::size;
- static constexpr int DstPacketSize = internal::packet_traits<DstType>::size;
- static constexpr int MaxPacketSize = internal::plain_enum_max(SrcPacketSize, DstPacketSize);
-
- // print non-mangled typenames
- template <typename T>
- static std::string printTypeInfo(const T&) {
- if (internal::is_same<bool, T>::value)
- return "bool";
- else if (internal::is_same<int8_t, T>::value)
- return "int8_t";
- else if (internal::is_same<int16_t, T>::value)
- return "int16_t";
- else if (internal::is_same<int32_t, T>::value)
- return "int32_t";
- else if (internal::is_same<int64_t, T>::value)
- return "int64_t";
- else if (internal::is_same<uint8_t, T>::value)
- return "uint8_t";
- else if (internal::is_same<uint16_t, T>::value)
- return "uint16_t";
- else if (internal::is_same<uint32_t, T>::value)
- return "uint32_t";
- else if (internal::is_same<uint64_t, T>::value)
- return "uint64_t";
- else if (internal::is_same<float, T>::value)
- return "float";
- else if (internal::is_same<double, T>::value)
- return "double";
- //else if (internal::is_same<long double, T>::value)
- // return "long double";
- else if (internal::is_same<half, T>::value)
- return "half";
- else if (internal::is_same<bfloat16, T>::value)
- return "bfloat16";
- else
- return typeid(T).name();
- }
-
- static void run() {
- const Index testRows = RowsAtCompileTime == Dynamic ? ((10 * MaxPacketSize) + 1) : RowsAtCompileTime;
- const Index testCols = ColsAtCompileTime == Dynamic ? ((10 * MaxPacketSize) + 1) : ColsAtCompileTime;
- const Index testSize = testRows * testCols;
- const Index minTestSize = 100;
- const Index repeats = numext::div_ceil(minTestSize, testSize);
- SrcArray src(testRows, testCols);
- DstArray dst(testRows, testCols);
- for (Index repeat = 0; repeat < repeats; repeat++) {
- src = src.unaryExpr(RandomOp());
- dst = src.template cast<DstType>();
- for (Index i = 0; i < testRows; i++)
- for (Index j = 0; j < testCols; j++) {
- DstType ref = internal::cast_impl<SrcType, DstType>::run(src(i, j));
- bool all_nan = ((numext::isnan)(src(i, j)) && (numext::isnan)(ref) && (numext::isnan)(dst(i, j)));
- bool is_equal = ref == dst(i, j);
- bool pass = all_nan || is_equal;
- if (!pass) {
- std::cout << printTypeInfo(SrcType()) << ": [" << +src(i, j) << "] to " << printTypeInfo(DstType()) << ": ["
- << +dst(i, j) << "] != [" << +ref << "]\n";
- }
- VERIFY(pass);
- }
- }
- }
-};
-
-template <int RowsAtCompileTime, int ColsAtCompileTime, typename... ScalarTypes>
-struct cast_tests_impl {
- using ScalarTuple = std::tuple<ScalarTypes...>;
- static constexpr size_t ScalarTupleSize = std::tuple_size<ScalarTuple>::value;
-
- template <size_t i = 0, size_t j = i + 1, bool Done = (i >= ScalarTupleSize - 1) || (j >= ScalarTupleSize)>
- static std::enable_if_t<Done> run() {}
-
- template <size_t i = 0, size_t j = i + 1, bool Done = (i >= ScalarTupleSize - 1) || (j >= ScalarTupleSize)>
- static std::enable_if_t<!Done> run() {
- using Type1 = typename std::tuple_element<i, ScalarTuple>::type;
- using Type2 = typename std::tuple_element<j, ScalarTuple>::type;
- cast_test_impl<Type1, Type2, RowsAtCompileTime, ColsAtCompileTime>::run();
- cast_test_impl<Type2, Type1, RowsAtCompileTime, ColsAtCompileTime>::run();
- static constexpr size_t next_i = (j == ScalarTupleSize - 1) ? (i + 1) : (i + 0);
- static constexpr size_t next_j = (j == ScalarTupleSize - 1) ? (i + 2) : (j + 1);
- run<next_i, next_j>();
- }
-};
-
-// for now, remove all references to 'long double' until test passes on all platforms
-template <int RowsAtCompileTime, int ColsAtCompileTime>
-void cast_test() {
- cast_tests_impl<RowsAtCompileTime, ColsAtCompileTime, bool, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
- uint32_t, uint64_t, float, double, /*long double, */half, bfloat16>::run();
-}
-
EIGEN_DECLARE_TEST(array_cwise)
{
for(int i = 0; i < g_repeat; i++) {
@@ -1373,20 +1269,6 @@
CALL_SUBTEST_3( typed_logicals_test(ArrayX<std::complex<double>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
}
- for (int i = 0; i < g_repeat; i++) {
- CALL_SUBTEST_1((cast_test<1, 1>()));
- CALL_SUBTEST_2((cast_test<3, 1>()));
- CALL_SUBTEST_2((cast_test<3, 3>()));
- CALL_SUBTEST_3((cast_test<5, 1>()));
- CALL_SUBTEST_3((cast_test<5, 5>()));
- CALL_SUBTEST_4((cast_test<9, 1>()));
- CALL_SUBTEST_4((cast_test<9, 9>()));
- CALL_SUBTEST_5((cast_test<17, 1>()));
- CALL_SUBTEST_5((cast_test<17, 17>()));
- CALL_SUBTEST_6((cast_test<Dynamic, 1>()));
- CALL_SUBTEST_6((cast_test<Dynamic, Dynamic>()));
- }
-
VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
VERIFY((internal::is_same< internal::global_math_functions_filtering_base<float>::type, float >::value));
VERIFY((internal::is_same< internal::global_math_functions_filtering_base<Array2i>::type, ArrayBase<Array2i> >::value));