Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/954879183b1e008d7f0fefb97e48a925c4e3fb16
BEGIN_PUBLIC
Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/954879183b1e008d7f0fefb97e48a925c4e3fb16
END_PUBLIC
PiperOrigin-RevId: 379770630
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index ab2ebf3..f8c87d0 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -591,7 +591,7 @@
enum { innerSize = DstXprType::InnerSizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
- vectorizableSize = (innerSize/packetSize)*packetSize,
+ vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
size = DstXprType::SizeAtCompileTime };
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index 480e044..878c024 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -67,7 +67,7 @@
* \warning the internal storage must be column major. */
inline Block<CoefficientsType,Dynamic,1> col(Index i)
{
- EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+ EIGEN_STATIC_ASSERT((int(Options) & int(RowMajor)) == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
Index start = 0;
Index len = coeffs().rows();
if (i<=supers())
@@ -90,7 +90,7 @@
template<int Index> struct DiagonalIntReturnType {
enum {
- ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
+ ReturnOpposite = (int(Options) & int(SelfAdjoint)) && (((Index) > 0 && Supers == 0) || ((Index) < 0 && Subs == 0)),
Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
ActualIndex = ReturnOpposite ? -Index : Index,
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
@@ -192,7 +192,7 @@
Options = _Options,
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
};
- typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
+ typedef Matrix<Scalar, DataRowsAtCompileTime, ColsAtCompileTime, int(Options) & int(RowMajor) ? RowMajor : ColMajor> CoefficientsType;
};
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index e32c4ac..852de8b 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -81,7 +81,7 @@
typedef internal::evaluator<Derived> Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
- && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
};
Evaluator evaluator(derived());
if(unroll)
@@ -105,7 +105,7 @@
typedef internal::evaluator<Derived> Evaluator;
enum {
unroll = SizeAtCompileTime != Dynamic
- && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+ && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits<Scalar>::AddCost)) <= EIGEN_UNROLLING_LIMIT
};
Evaluator evaluator(derived());
if(unroll)
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 90c552f..0ff8c8d 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -561,7 +561,7 @@
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
Flags = evaluator<ArgType>::Flags
& (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
@@ -606,13 +606,13 @@
protected:
// this helper permits to completely eliminate the functor if it is empty
- class Data : private UnaryOp
+ struct Data
{
- public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
+ Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
+ const UnaryOp& func() const { return op; }
+ UnaryOp op;
evaluator<ArgType> argImpl;
};
@@ -639,7 +639,7 @@
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
enum {
- CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
+ CoeffReadCost = int(evaluator<Arg1>::CoeffReadCost) + int(evaluator<Arg2>::CoeffReadCost) + int(evaluator<Arg3>::CoeffReadCost) + int(functor_traits<TernaryOp>::Cost),
Arg1Flags = evaluator<Arg1>::Flags,
Arg2Flags = evaluator<Arg2>::Flags,
@@ -700,12 +700,13 @@
protected:
// this helper permits to completely eliminate the functor if it is empty
- struct Data : private TernaryOp
+ struct Data
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
+ Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); }
+ const TernaryOp& func() const { return op; }
+ TernaryOp op;
evaluator<Arg1> arg1Impl;
evaluator<Arg2> arg2Impl;
evaluator<Arg3> arg3Impl;
@@ -735,7 +736,7 @@
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
LhsFlags = evaluator<Lhs>::Flags,
RhsFlags = evaluator<Rhs>::Flags,
@@ -793,12 +794,13 @@
protected:
// this helper permits to completely eliminate the functor if it is empty
- struct Data : private BinaryOp
+ struct Data
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
+ Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); }
+ const BinaryOp& func() const { return op; }
+ BinaryOp op;
evaluator<Lhs> lhsImpl;
evaluator<Rhs> rhsImpl;
};
@@ -815,7 +817,7 @@
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
@@ -858,12 +860,13 @@
protected:
// this helper permits to completely eliminate the functor if it is empty
- struct Data : private UnaryOp
+ struct Data
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
+ Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
+ const UnaryOp& func() const { return op; }
+ UnaryOp op;
evaluator<ArgType> argImpl;
};
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 59974a5..2202b1c 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -102,7 +102,7 @@
#if EIGEN_COMP_MSVC && EIGEN_HAS_CXX11
//Required for Visual Studio or the Copy constructor will probably not get inlined!
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ EIGEN_STRONG_INLINE
CwiseBinaryOp(const CwiseBinaryOp<BinaryOp,LhsType,RhsType>&) = default;
#endif
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 7f82090..d7ac4d6 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -688,6 +689,30 @@
};
/****************************************************************************
+* Implementation of log *
+****************************************************************************/
+
+// Complex log defined in MathFunctionsImpl.h.
+template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);
+
+template<typename Scalar>
+struct log_impl {
+ EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD(log);
+ return static_cast<Scalar>(log(x));
+ }
+};
+
+template<typename Scalar>
+struct log_impl<std::complex<Scalar> > {
+ EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z)
+ {
+ return complex_log(z);
+ }
+};
+
+/****************************************************************************
* Implementation of log1p *
****************************************************************************/
@@ -700,7 +725,7 @@
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD(log);
Scalar x1p = RealScalar(1) + x;
- Scalar log_1p = log(x1p);
+ Scalar log_1p = log_impl<Scalar>::run(x1p);
const bool is_small = numext::equal_strict(x1p, Scalar(1));
const bool is_inf = numext::equal_strict(x1p, log_1p);
return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));
@@ -1460,8 +1485,7 @@
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T log(const T &x) {
- EIGEN_USING_STD(log);
- return static_cast<T>(log(x));
+ return internal::log_impl<T>::run(x);
}
#if defined(SYCL_DEVICE_ONLY)
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 0d3f317..4eaaaa7 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -184,6 +184,15 @@
: std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );
}
+template<typename T>
+EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
+ // Computes complex log.
+ T a = numext::abs(z);
+ EIGEN_USING_STD(atan2);
+ T b = atan2(z.imag(), z.real());
+ return std::complex<T>(numext::log(a), b);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index fdd4d4f..72eac5a 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -289,9 +289,9 @@
IsInteger = NumTraits<Scalar>::IsInteger,
IsSigned = NumTraits<Scalar>::IsSigned,
RequireInitialization = 1,
- ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
- AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
- MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+ ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::ReadCost),
+ AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::AddCost),
+ MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * int(NumTraits<Scalar>::MulCost)
};
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h
index 0be6942..29abf35 100644
--- a/Eigen/src/Core/PartialReduxEvaluator.h
+++ b/Eigen/src/Core/PartialReduxEvaluator.h
@@ -145,7 +145,7 @@
enum {
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
: TraversalSize==0 ? 1
- : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+ : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
_ArgFlags = evaluator<ArgType>::Flags,
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 202ed71..e2ddbd1 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -1019,7 +1019,7 @@
else
{
// The storage order does not allow us to use reallocation.
- typename Derived::PlainObject tmp(rows,cols);
+ Derived tmp(rows,cols);
const Index common_rows = numext::mini(rows, _this.rows());
const Index common_cols = numext::mini(cols, _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
@@ -1054,7 +1054,7 @@
else
{
// The storage order does not allow us to use reallocation.
- typename Derived::PlainObject tmp(other);
+ Derived tmp(other);
const Index common_rows = numext::mini(tmp.rows(), _this.rows());
const Index common_cols = numext::mini(tmp.cols(), _this.cols());
tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index b766e1a..8cf294b 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -831,7 +831,7 @@
typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
public:
enum {
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+ CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) + int(evaluator<DiagonalType>::CoeffReadCost),
MatrixFlags = evaluator<MatrixType>::Flags,
DiagFlags = evaluator<DiagonalType>::Flags,
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 30598f4..b6790d1 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -58,7 +58,7 @@
public:
enum {
Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
- : Evaluator::SizeAtCompileTime * Evaluator::CoeffReadCost + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+ : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
@@ -331,7 +331,7 @@
enum {
PacketSize = redux_traits<Func, Evaluator>::PacketSize,
Size = Evaluator::SizeAtCompileTime,
- VectorizedSize = (Size / PacketSize) * PacketSize
+ VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
};
template<typename XprType>
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index b7ed6f1..8ce3b37 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -66,7 +66,7 @@
enum {
Mode = internal::traits<SelfAdjointView>::Mode,
Flags = internal::traits<SelfAdjointView>::Flags,
- TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
+ TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0)
};
typedef typename MatrixType::PlainObject PlainObject;
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 3879444..dfbf995 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -168,7 +168,7 @@
{
OtherDerived& other = _other.const_cast_derived();
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
- eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
+ eigen_assert((!(int(Mode) & int(ZeroDiag))) && bool(int(Mode) & (int(Upper) | int(Lower))));
// If solving for a 0x0 matrix, nothing to do, simply return.
if (derived().cols() == 0)
return;
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 779152f..fdb8bc1 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -53,7 +53,7 @@
typedef Derived const& Nested;
EIGEN_DEVICE_FUNC
- inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
+ inline TriangularBase() { eigen_assert(!((int(Mode) & int(UnitDiag)) && (int(Mode) & int(ZeroDiag)))); }
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); }
@@ -819,7 +819,7 @@
enum {
unroll = DstXprType::SizeAtCompileTime != Dynamic
&& SrcEvaluatorType::CoeffReadCost < HugeCost
- && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
+ && DstXprType::SizeAtCompileTime * (int(DstEvaluatorType::CoeffReadCost) + int(SrcEvaluatorType::CoeffReadCost)) / 2 <= EIGEN_UNROLLING_LIMIT
};
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
@@ -853,7 +853,7 @@
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
- call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
+ call_triangular_assignment_loop<SrcXprType::Mode, (int(SrcXprType::Mode) & int(SelfAdjoint)) == 0>(dst, src, func);
}
};
@@ -951,7 +951,7 @@
EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{
other.derived().resize(this->rows(), this->cols());
- internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
+ internal::call_triangular_assignment_loop<Derived::Mode, (int(Derived::Mode) & int(SelfAdjoint)) == 0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
}
namespace internal {
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index 07a2e42..00bcca8 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -124,7 +124,7 @@
enum {
unroll = SizeAtCompileTime != Dynamic
- && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
+ && SizeAtCompileTime * int(ThisEvaluator::CoeffReadCost) + (SizeAtCompileTime-1) * int(internal::functor_traits<Visitor>::Cost) <= EIGEN_UNROLLING_LIMIT
};
return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
}
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 7c70c07..d4aee3e 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -2260,7 +2260,8 @@
static Packet2ul p2ul_PREV0DOT5 = { 0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull };
static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
-static Packet2d p2d_MZERO = { -0.0, -0.0 };
+static Packet2d p2d_MZERO = { numext::bit_cast<double>(0x8000000000000000ull),
+ numext::bit_cast<double>(0x8000000000000000ull) };
#ifdef _BIG_ENDIAN
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 87e8c27..8f1c1a8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -839,7 +839,8 @@
// Step 4. Compute solution for inputs with negative real part:
// [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]
- const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), RealScalar(-0.0))).v;
+ const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
+ const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
Packet negative_real_result;
// Notice that rho is positive, so taking it's absolute value is a noop.
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 2b48570..e1efe9b 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -866,12 +866,12 @@
template<> EIGEN_STRONG_INLINE Packet2f pxor<Packet2f>(const Packet2f& a, const Packet2f& b);
template<> EIGEN_STRONG_INLINE Packet2f paddsub<Packet2f>(const Packet2f& a, const Packet2f & b) {
- Packet2f mask = {-0.0f, 0.0f};
+ Packet2f mask = {numext::bit_cast<float>(0x80000000u), 0.0f};
return padd(a, pxor(mask, b));
}
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b);
template<> EIGEN_STRONG_INLINE Packet4f paddsub<Packet4f>(const Packet4f& a, const Packet4f& b) {
- Packet4f mask = {-0.0f, 0.0f, -0.0f, 0.0f};
+ Packet4f mask = {numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f};
return padd(a, pxor(mask, b));
}
@@ -2774,22 +2774,167 @@
return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2f, 2>& kernel)
-{
- const float32x2x2_t z = vzip_f32(kernel.packet[0], kernel.packet[1]);
- kernel.packet[0] = z.val[0];
- kernel.packet[1] = z.val[1];
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel)
-{
- const float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
- const float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
+// Helpers for ptranspose.
+namespace detail {
+
+template<typename Packet>
+void zip_in_place(Packet& p1, Packet& p2);
- kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
- kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
- kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
- kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet2f>(Packet2f& p1, Packet2f& p2) {
+ const float32x2x2_t tmp = vzip_f32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4f>(Packet4f& p1, Packet4f& p2) {
+ const float32x4x2_t tmp = vzipq_f32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet8c>(Packet8c& p1, Packet8c& p2) {
+ const int8x8x2_t tmp = vzip_s8(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet16c>(Packet16c& p1, Packet16c& p2) {
+ const int8x16x2_t tmp = vzipq_s8(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet8uc>(Packet8uc& p1, Packet8uc& p2) {
+ const uint8x8x2_t tmp = vzip_u8(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet16uc>(Packet16uc& p1, Packet16uc& p2) {
+ const uint8x16x2_t tmp = vzipq_u8(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet2i>(Packet2i& p1, Packet2i& p2) {
+ const int32x2x2_t tmp = vzip_s32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4i>(Packet4i& p1, Packet4i& p2) {
+ const int32x4x2_t tmp = vzipq_s32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet2ui>(Packet2ui& p1, Packet2ui& p2) {
+ const uint32x2x2_t tmp = vzip_u32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4ui>(Packet4ui& p1, Packet4ui& p2) {
+ const uint32x4x2_t tmp = vzipq_u32(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4s>(Packet4s& p1, Packet4s& p2) {
+ const int16x4x2_t tmp = vzip_s16(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet8s>(Packet8s& p1, Packet8s& p2) {
+ const int16x8x2_t tmp = vzipq_s16(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4us>(Packet4us& p1, Packet4us& p2) {
+ const uint16x4x2_t tmp = vzip_u16(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet8us>(Packet8us& p1, Packet8us& p2) {
+ const uint16x8x2_t tmp = vzipq_u16(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+
+template<typename Packet>
+EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 2>& kernel) {
+ zip_in_place(kernel.packet[0], kernel.packet[1]);
+}
+
+template<typename Packet>
+EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 4>& kernel) {
+ zip_in_place(kernel.packet[0], kernel.packet[2]);
+ zip_in_place(kernel.packet[1], kernel.packet[3]);
+ zip_in_place(kernel.packet[0], kernel.packet[1]);
+ zip_in_place(kernel.packet[2], kernel.packet[3]);
+}
+
+template<typename Packet>
+EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 8>& kernel) {
+ zip_in_place(kernel.packet[0], kernel.packet[4]);
+ zip_in_place(kernel.packet[1], kernel.packet[5]);
+ zip_in_place(kernel.packet[2], kernel.packet[6]);
+ zip_in_place(kernel.packet[3], kernel.packet[7]);
+
+ zip_in_place(kernel.packet[0], kernel.packet[2]);
+ zip_in_place(kernel.packet[1], kernel.packet[3]);
+ zip_in_place(kernel.packet[4], kernel.packet[6]);
+ zip_in_place(kernel.packet[5], kernel.packet[7]);
+
+ zip_in_place(kernel.packet[0], kernel.packet[1]);
+ zip_in_place(kernel.packet[2], kernel.packet[3]);
+ zip_in_place(kernel.packet[4], kernel.packet[5]);
+ zip_in_place(kernel.packet[6], kernel.packet[7]);
+}
+
+template<typename Packet>
+EIGEN_ALWAYS_INLINE void ptranspose_impl(PacketBlock<Packet, 16>& kernel) {
+ EIGEN_UNROLL_LOOP
+ for (int i=0; i<4; ++i) {
+ const int m = (1 << i);
+ EIGEN_UNROLL_LOOP
+ for (int j=0; j<m; ++j) {
+ const int n = (1 << (3-i));
+ EIGEN_UNROLL_LOOP
+ for (int k=0; k<n; ++k) {
+ const int idx = 2*j*n+k;
+ zip_in_place(kernel.packet[idx], kernel.packet[idx + n]);
+ }
+ }
+ }
+}
+
+} // namespace detail
+
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2f, 2>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4c, 4>& kernel)
{
const int8x8_t a = vreinterpret_s8_s32(vset_lane_s32(kernel.packet[2], vdup_n_s32(kernel.packet[0]), 1));
@@ -2803,83 +2948,22 @@
kernel.packet[2] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 0);
kernel.packet[3] = vget_lane_s32(vreinterpret_s32_s16(zip16.val[1]), 1);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8c, 8>& kernel)
-{
- int8x8x2_t zip8[4];
- uint16x4x2_t zip16[4];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- zip8[i] = vzip_s8(kernel.packet[i*2], kernel.packet[i*2+1]);
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- zip16[i*2+j] = vzip_u16(vreinterpret_u16_s8(zip8[i*2].val[j]), vreinterpret_u16_s8(zip8[i*2+1].val[j]));
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- const uint32x2x2_t z = vzip_u32(vreinterpret_u32_u16(zip16[i].val[j]), vreinterpret_u32_u16(zip16[i+2].val[j]));
- EIGEN_UNROLL_LOOP
- for (int k = 0; k != 2; k++)
- kernel.packet[i*4+j*2+k] = vreinterpret_s8_u32(z.val[k]);
- }
- }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8c, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 16>& kernel)
-{
- int8x16x2_t zip8[8];
- uint16x8x2_t zip16[8];
- uint32x4x2_t zip32[8];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 8; i++)
- zip8[i] = vzipq_s8(kernel.packet[i*2], kernel.packet[i*2+1]);
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- zip16[i*2+j] = vzipq_u16(vreinterpretq_u16_s8(zip8[i*2].val[j]),
- vreinterpretq_u16_s8(zip8[i*2+1].val[j]));
- }
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- EIGEN_UNROLL_LOOP
- for (int k = 0; k != 2; k++)
- zip32[i*4+j*2+k] = vzipq_u32(vreinterpretq_u32_u16(zip16[i*4+j].val[k]),
- vreinterpretq_u32_u16(zip16[i*4+j+2].val[k]));
- }
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- kernel.packet[i*4+j*2] = vreinterpretq_s8_u32(vcombine_u32(vget_low_u32(zip32[i].val[j]),
- vget_low_u32(zip32[i+4].val[j])));
- kernel.packet[i*4+j*2+1] = vreinterpretq_s8_u32(vcombine_u32(vget_high_u32(zip32[i].val[j]),
- vget_high_u32(zip32[i+4].val[j])));
- }
- }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8c, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 16>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4uc, 4>& kernel)
{
const uint8x8_t a = vreinterpret_u8_u32(vset_lane_u32(kernel.packet[2], vdup_n_u32(kernel.packet[0]), 1));
@@ -2893,233 +2977,62 @@
kernel.packet[2] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 0);
kernel.packet[3] = vget_lane_u32(vreinterpret_u32_u16(zip16.val[1]), 1);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8uc, 8>& kernel)
-{
- uint8x8x2_t zip8[4];
- uint16x4x2_t zip16[4];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- zip8[i] = vzip_u8(kernel.packet[i*2], kernel.packet[i*2+1]);
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- zip16[i*2+j] = vzip_u16(vreinterpret_u16_u8(zip8[i*2].val[j]), vreinterpret_u16_u8(zip8[i*2+1].val[j]));
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- const uint32x2x2_t z = vzip_u32(vreinterpret_u32_u16(zip16[i].val[j]), vreinterpret_u32_u16(zip16[i+2].val[j]));
- EIGEN_UNROLL_LOOP
- for (int k = 0; k != 2; k++)
- kernel.packet[i*4+j*2+k] = vreinterpret_u8_u32(z.val[k]);
- }
- }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8uc, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 16>& kernel)
-{
- uint8x16x2_t zip8[8];
- uint16x8x2_t zip16[8];
- uint32x4x2_t zip32[8];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 8; i++)
- zip8[i] = vzipq_u8(kernel.packet[i*2], kernel.packet[i*2+1]);
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- zip16[i*2+j] = vzipq_u16(vreinterpretq_u16_u8(zip8[i*2].val[j]),
- vreinterpretq_u16_u8(zip8[i*2+1].val[j]));
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- EIGEN_UNROLL_LOOP
- for (int k = 0; k != 2; k++)
- zip32[i*4+j*2+k] = vzipq_u32(vreinterpretq_u32_u16(zip16[i*4+j].val[k]),
- vreinterpretq_u32_u16(zip16[i*4+j+2].val[k]));
- }
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- EIGEN_UNROLL_LOOP
- for (int j = 0; j != 2; j++)
- {
- kernel.packet[i*4+j*2] = vreinterpretq_u8_u32(vcombine_u32(vget_low_u32(zip32[i].val[j]),
- vget_low_u32(zip32[i+4].val[j])));
- kernel.packet[i*4+j*2+1] = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(zip32[i].val[j]),
- vget_high_u32(zip32[i+4].val[j])));
- }
- }
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8uc, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4s, 4>& kernel)
-{
- const int16x4x2_t zip16_1 = vzip_s16(kernel.packet[0], kernel.packet[1]);
- const int16x4x2_t zip16_2 = vzip_s16(kernel.packet[2], kernel.packet[3]);
-
- const uint32x2x2_t zip32_1 = vzip_u32(vreinterpret_u32_s16(zip16_1.val[0]), vreinterpret_u32_s16(zip16_2.val[0]));
- const uint32x2x2_t zip32_2 = vzip_u32(vreinterpret_u32_s16(zip16_1.val[1]), vreinterpret_u32_s16(zip16_2.val[1]));
-
- kernel.packet[0] = vreinterpret_s16_u32(zip32_1.val[0]);
- kernel.packet[1] = vreinterpret_s16_u32(zip32_1.val[1]);
- kernel.packet[2] = vreinterpret_s16_u32(zip32_2.val[0]);
- kernel.packet[3] = vreinterpret_s16_u32(zip32_2.val[1]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 16>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 4>& kernel)
-{
- const int16x8x2_t zip16_1 = vzipq_s16(kernel.packet[0], kernel.packet[1]);
- const int16x8x2_t zip16_2 = vzipq_s16(kernel.packet[2], kernel.packet[3]);
-
- const uint32x4x2_t zip32_1 = vzipq_u32(vreinterpretq_u32_s16(zip16_1.val[0]), vreinterpretq_u32_s16(zip16_2.val[0]));
- const uint32x4x2_t zip32_2 = vzipq_u32(vreinterpretq_u32_s16(zip16_1.val[1]), vreinterpretq_u32_s16(zip16_2.val[1]));
-
- kernel.packet[0] = vreinterpretq_s16_u32(zip32_1.val[0]);
- kernel.packet[1] = vreinterpretq_s16_u32(zip32_1.val[1]);
- kernel.packet[2] = vreinterpretq_s16_u32(zip32_2.val[0]);
- kernel.packet[3] = vreinterpretq_s16_u32(zip32_2.val[1]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4s, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16c, 4>& kernel)
-{
- const int8x16x2_t zip8_1 = vzipq_s8(kernel.packet[0], kernel.packet[1]);
- const int8x16x2_t zip8_2 = vzipq_s8(kernel.packet[2], kernel.packet[3]);
-
- const int16x8x2_t zip16_1 = vzipq_s16(vreinterpretq_s16_s8(zip8_1.val[0]), vreinterpretq_s16_s8(zip8_2.val[0]));
- const int16x8x2_t zip16_2 = vzipq_s16(vreinterpretq_s16_s8(zip8_1.val[1]), vreinterpretq_s16_s8(zip8_2.val[1]));
-
- kernel.packet[0] = vreinterpretq_s8_s16(zip16_1.val[0]);
- kernel.packet[1] = vreinterpretq_s8_s16(zip16_1.val[1]);
- kernel.packet[2] = vreinterpretq_s8_s16(zip16_2.val[0]);
- kernel.packet[3] = vreinterpretq_s8_s16(zip16_2.val[1]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4us, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16uc, 4>& kernel)
-{
- const uint8x16x2_t zip8_1 = vzipq_u8(kernel.packet[0], kernel.packet[1]);
- const uint8x16x2_t zip8_2 = vzipq_u8(kernel.packet[2], kernel.packet[3]);
-
- const uint16x8x2_t zip16_1 = vzipq_u16(vreinterpretq_u16_u8(zip8_1.val[0]), vreinterpretq_u16_u8(zip8_2.val[0]));
- const uint16x8x2_t zip16_2 = vzipq_u16(vreinterpretq_u16_u8(zip8_1.val[1]), vreinterpretq_u16_u8(zip8_2.val[1]));
-
- kernel.packet[0] = vreinterpretq_u8_u16(zip16_1.val[0]);
- kernel.packet[1] = vreinterpretq_u8_u16(zip16_1.val[1]);
- kernel.packet[2] = vreinterpretq_u8_u16(zip16_2.val[0]);
- kernel.packet[3] = vreinterpretq_u8_u16(zip16_2.val[1]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2i, 2>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2ui, 2>& kernel) {
+ detail::zip_in_place(kernel.packet[0], kernel.packet[1]);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4ui, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8s, 8>& kernel)
-{
- const int16x8x2_t zip16_1 = vzipq_s16(kernel.packet[0], kernel.packet[1]);
- const int16x8x2_t zip16_2 = vzipq_s16(kernel.packet[2], kernel.packet[3]);
- const int16x8x2_t zip16_3 = vzipq_s16(kernel.packet[4], kernel.packet[5]);
- const int16x8x2_t zip16_4 = vzipq_s16(kernel.packet[6], kernel.packet[7]);
-
- const uint32x4x2_t zip32_1 = vzipq_u32(vreinterpretq_u32_s16(zip16_1.val[0]), vreinterpretq_u32_s16(zip16_2.val[0]));
- const uint32x4x2_t zip32_2 = vzipq_u32(vreinterpretq_u32_s16(zip16_1.val[1]), vreinterpretq_u32_s16(zip16_2.val[1]));
- const uint32x4x2_t zip32_3 = vzipq_u32(vreinterpretq_u32_s16(zip16_3.val[0]), vreinterpretq_u32_s16(zip16_4.val[0]));
- const uint32x4x2_t zip32_4 = vzipq_u32(vreinterpretq_u32_s16(zip16_3.val[1]), vreinterpretq_u32_s16(zip16_4.val[1]));
-
- kernel.packet[0] = vreinterpretq_s16_u32(vcombine_u32(vget_low_u32(zip32_1.val[0]), vget_low_u32(zip32_3.val[0])));
- kernel.packet[1] = vreinterpretq_s16_u32(vcombine_u32(vget_high_u32(zip32_1.val[0]), vget_high_u32(zip32_3.val[0])));
- kernel.packet[2] = vreinterpretq_s16_u32(vcombine_u32(vget_low_u32(zip32_1.val[1]), vget_low_u32(zip32_3.val[1])));
- kernel.packet[3] = vreinterpretq_s16_u32(vcombine_u32(vget_high_u32(zip32_1.val[1]), vget_high_u32(zip32_3.val[1])));
- kernel.packet[4] = vreinterpretq_s16_u32(vcombine_u32(vget_low_u32(zip32_2.val[0]), vget_low_u32(zip32_4.val[0])));
- kernel.packet[5] = vreinterpretq_s16_u32(vcombine_u32(vget_high_u32(zip32_2.val[0]), vget_high_u32(zip32_4.val[0])));
- kernel.packet[6] = vreinterpretq_s16_u32(vcombine_u32(vget_low_u32(zip32_2.val[1]), vget_low_u32(zip32_4.val[1])));
- kernel.packet[7] = vreinterpretq_s16_u32(vcombine_u32(vget_high_u32(zip32_2.val[1]), vget_high_u32(zip32_4.val[1])));
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4us, 4>& kernel)
-{
- const uint16x4x2_t zip16_1 = vzip_u16(kernel.packet[0], kernel.packet[1]);
- const uint16x4x2_t zip16_2 = vzip_u16(kernel.packet[2], kernel.packet[3]);
-
- const uint32x2x2_t zip32_1 = vzip_u32(vreinterpret_u32_u16(zip16_1.val[0]), vreinterpret_u32_u16(zip16_2.val[0]));
- const uint32x2x2_t zip32_2 = vzip_u32(vreinterpret_u32_u16(zip16_1.val[1]), vreinterpret_u32_u16(zip16_2.val[1]));
-
- kernel.packet[0] = vreinterpret_u16_u32(zip32_1.val[0]);
- kernel.packet[1] = vreinterpret_u16_u32(zip32_1.val[1]);
- kernel.packet[2] = vreinterpret_u16_u32(zip32_2.val[0]);
- kernel.packet[3] = vreinterpret_u16_u32(zip32_2.val[1]);
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8us, 8>& kernel)
-{
- const uint16x8x2_t zip16_1 = vzipq_u16(kernel.packet[0], kernel.packet[1]);
- const uint16x8x2_t zip16_2 = vzipq_u16(kernel.packet[2], kernel.packet[3]);
- const uint16x8x2_t zip16_3 = vzipq_u16(kernel.packet[4], kernel.packet[5]);
- const uint16x8x2_t zip16_4 = vzipq_u16(kernel.packet[6], kernel.packet[7]);
-
- const uint32x4x2_t zip32_1 = vzipq_u32(vreinterpretq_u32_u16(zip16_1.val[0]), vreinterpretq_u32_u16(zip16_2.val[0]));
- const uint32x4x2_t zip32_2 = vzipq_u32(vreinterpretq_u32_u16(zip16_1.val[1]), vreinterpretq_u32_u16(zip16_2.val[1]));
- const uint32x4x2_t zip32_3 = vzipq_u32(vreinterpretq_u32_u16(zip16_3.val[0]), vreinterpretq_u32_u16(zip16_4.val[0]));
- const uint32x4x2_t zip32_4 = vzipq_u32(vreinterpretq_u32_u16(zip16_3.val[1]), vreinterpretq_u32_u16(zip16_4.val[1]));
-
- kernel.packet[0] = vreinterpretq_u16_u32(vcombine_u32(vget_low_u32(zip32_1.val[0]), vget_low_u32(zip32_3.val[0])));
- kernel.packet[1] = vreinterpretq_u16_u32(vcombine_u32(vget_high_u32(zip32_1.val[0]), vget_high_u32(zip32_3.val[0])));
- kernel.packet[2] = vreinterpretq_u16_u32(vcombine_u32(vget_low_u32(zip32_1.val[1]), vget_low_u32(zip32_3.val[1])));
- kernel.packet[3] = vreinterpretq_u16_u32(vcombine_u32(vget_high_u32(zip32_1.val[1]), vget_high_u32(zip32_3.val[1])));
- kernel.packet[4] = vreinterpretq_u16_u32(vcombine_u32(vget_low_u32(zip32_2.val[0]), vget_low_u32(zip32_4.val[0])));
- kernel.packet[5] = vreinterpretq_u16_u32(vcombine_u32(vget_high_u32(zip32_2.val[0]), vget_high_u32(zip32_4.val[0])));
- kernel.packet[6] = vreinterpretq_u16_u32(vcombine_u32(vget_low_u32(zip32_2.val[1]), vget_low_u32(zip32_4.val[1])));
- kernel.packet[7] = vreinterpretq_u16_u32(vcombine_u32(vget_high_u32(zip32_2.val[1]), vget_high_u32(zip32_4.val[1])));
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2i, 2>& kernel)
-{
- const int32x2x2_t z = vzip_s32(kernel.packet[0], kernel.packet[1]);
- kernel.packet[0] = z.val[0];
- kernel.packet[1] = z.val[1];
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4i, 4>& kernel)
-{
- const int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
- const int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
-
- kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
- kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
- kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
- kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2ui, 2>& kernel)
-{
- const uint32x2x2_t z = vzip_u32(kernel.packet[0], kernel.packet[1]);
- kernel.packet[0] = z.val[0];
- kernel.packet[1] = z.val[1];
-}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4ui, 4>& kernel)
-{
- const uint32x4x2_t tmp1 = vzipq_u32(kernel.packet[0], kernel.packet[1]);
- const uint32x4x2_t tmp2 = vzipq_u32(kernel.packet[2], kernel.packet[3]);
-
- kernel.packet[0] = vcombine_u32(vget_low_u32(tmp1.val[0]), vget_low_u32(tmp2.val[0]));
- kernel.packet[1] = vcombine_u32(vget_high_u32(tmp1.val[0]), vget_high_u32(tmp2.val[0]));
- kernel.packet[2] = vcombine_u32(vget_low_u32(tmp1.val[1]), vget_low_u32(tmp2.val[1]));
- kernel.packet[3] = vcombine_u32(vget_high_u32(tmp1.val[1]), vget_high_u32(tmp2.val[1]));
-}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet2l, 2>& kernel)
{
#if EIGEN_ARCH_ARM64
const int64x2_t tmp1 = vzip1q_s64(kernel.packet[0], kernel.packet[1]);
- const int64x2_t tmp2 = vzip2q_s64(kernel.packet[0], kernel.packet[1]);
-
+ kernel.packet[1] = vzip2q_s64(kernel.packet[0], kernel.packet[1]);
kernel.packet[0] = tmp1;
- kernel.packet[1] = tmp2;
#else
const int64x1_t tmp[2][2] = {
{ vget_low_s64(kernel.packet[0]), vget_high_s64(kernel.packet[0]) },
@@ -3135,10 +3048,8 @@
{
#if EIGEN_ARCH_ARM64
const uint64x2_t tmp1 = vzip1q_u64(kernel.packet[0], kernel.packet[1]);
- const uint64x2_t tmp2 = vzip2q_u64(kernel.packet[0], kernel.packet[1]);
-
+ kernel.packet[1] = vzip2q_u64(kernel.packet[0], kernel.packet[1]);
kernel.packet[0] = tmp1;
- kernel.packet[1] = tmp2;
#else
const uint64x1_t tmp[2][2] = {
{ vget_low_u64(kernel.packet[0]), vget_high_u64(kernel.packet[0]) },
@@ -3468,6 +3379,15 @@
};
};
+namespace detail {
+template<>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4bf>(Packet4bf& p1, Packet4bf& p2) {
+ const uint16x4x2_t tmp = vzip_u16(p1, p2);
+ p1 = tmp.val[0];
+ p2 = tmp.val[1];
+}
+} // namespace detail
+
EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p)
{
// See the scalar implemention in BFloat16.h for a comprehensible explanation
@@ -3674,16 +3594,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4bf, 4>& kernel)
{
- PacketBlock<Packet4us, 4> k;
- k.packet[0] = kernel.packet[0];
- k.packet[1] = kernel.packet[1];
- k.packet[2] = kernel.packet[2];
- k.packet[3] = kernel.packet[3];
- ptranspose(k);
- kernel.packet[0] = k.packet[0];
- kernel.packet[1] = k.packet[1];
- kernel.packet[2] = k.packet[2];
- kernel.packet[3] = k.packet[3];
+ detail::ptranspose_impl(kernel);
}
template<> EIGEN_STRONG_INLINE Packet4bf pabsdiff<Packet4bf>(const Packet4bf& a, const Packet4bf& b)
@@ -3840,7 +3751,7 @@
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& , const Packet2d& );
template<> EIGEN_STRONG_INLINE Packet2d paddsub<Packet2d>(const Packet2d& a, const Packet2d& b){
- const Packet2d mask = {-0.0,0.0};
+ const Packet2d mask = {numext::bit_cast<double>(0x8000000000000000ull),0.0};
return padd(a, pxor(mask, b));
}
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index b10c1f6..2246439 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -94,8 +94,9 @@
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
-static Packet2d p2d_ONE = { 1.0, 1.0 };
-static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO_ = { numext::bit_cast<double>0x8000000000000000ull),
+ numext::bit_cast<double>0x8000000000000000ull) };
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index a182b4b..63f09ab 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -50,7 +50,7 @@
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
enum {
- Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
+ Cost = (int(NumTraits<LhsScalar>::AddCost) + int(NumTraits<RhsScalar>::AddCost)) / 2, // rough estimate!
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
// TODO vectorize mixed sum
};
@@ -88,7 +88,7 @@
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
enum {
- Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+ Cost = (int(NumTraits<LhsScalar>::MulCost) + int(NumTraits<RhsScalar>::MulCost))/2, // rough estimate!
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
// TODO vectorize mixed product
};
@@ -364,7 +364,7 @@
template<typename LhsScalar,typename RhsScalar>
struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
enum {
- Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ Cost = (int(NumTraits<LhsScalar>::AddCost) + int(NumTraits<RhsScalar>::AddCost)) / 2,
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
};
};
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index c98fa57..16136d1 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -109,7 +109,7 @@
template<typename Scalar> struct scalar_conjugate_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::conj(a); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
};
@@ -138,7 +138,7 @@
template<typename Scalar> struct scalar_arg_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
typedef typename NumTraits<Scalar>::Real result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::arg(a); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
{ return internal::parg(a); }
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 79367f1..8362ecc 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1673,8 +1673,8 @@
EIGEN_GEBGP_ONESTEP(6);
EIGEN_GEBGP_ONESTEP(7);
- blB += pk*RhsProgress;
- blA += pk*3*Traits::LhsProgress;
+ blB += int(pk) * int(RhsProgress);
+ blA += int(pk) * 3 * int(Traits::LhsProgress);
EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
}
@@ -1885,8 +1885,8 @@
EIGEN_GEBGP_ONESTEP(6);
EIGEN_GEBGP_ONESTEP(7);
- blB += pk*RhsProgress;
- blA += pk*2*Traits::LhsProgress;
+ blB += int(pk) * int(RhsProgress);
+ blA += int(pk) * 2 * int(Traits::LhsProgress);
EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
}
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 09209f7..f752a0b 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -80,8 +80,8 @@
if (IsRowMajor)
actualAlpha = numext::conj(actualAlpha);
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
- typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(UBlasTraits::NeedToConjugate), _ActualUType>::type>::type UType;
+ typedef typename internal::remove_all<typename internal::conj_expr_if<int(IsRowMajor) ^ int(VBlasTraits::NeedToConjugate), _ActualVType>::type>::type VType;
internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h
index ef3fdfb..d457e02 100644
--- a/Eigen/src/Core/util/IntegralConstant.h
+++ b/Eigen/src/Core/util/IntegralConstant.h
@@ -184,7 +184,7 @@
#ifndef EIGEN_PARSED_BY_DOXYGEN
-#if EIGEN_HAS_CXX14
+#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
template<int N>
static const internal::FixedInt<N> fix{};
#else
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index efd7199..fda253d 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -166,8 +166,8 @@
/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++
// XLC version
-// 3.1 0x0301
-// 4.5 0x0405
+// 3.1 0x0301
+// 4.5 0x0405
// 5.0 0x0500
// 12.1 0x0C01
#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__)
@@ -641,6 +641,14 @@
#define EIGEN_COMP_CXXVER 03
#endif
+#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
+ #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14
+ #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1
+ #else
+ #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0
+ #endif
+#endif
+
// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features
// but in practice we should not rely on them but rather on the availabilty of
@@ -837,7 +845,7 @@
#endif
#endif
-// NOTE: the required Apple's clang version is very conservative
+// NOTE: the required Apple's clang version is very conservative
// and it could be that XCode 9 works just fine.
// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support
// and not tested.
@@ -966,7 +974,7 @@
#endif
#define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline))
// All functions callable from CUDA/HIP code must be qualified with __device__
-#elif defined(EIGEN_GPUCC)
+#elif defined(EIGEN_GPUCC)
#define EIGEN_DEVICE_FUNC __host__ __device__
#else
#define EIGEN_DEVICE_FUNC
@@ -993,7 +1001,7 @@
#else
#define eigen_plain_assert(x)
#endif
-#else
+#else
#if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
namespace Eigen {
namespace internal {
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index f66325f..b6aaed1 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -189,6 +189,8 @@
template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
template<> struct make_unsigned<signed long> { typedef unsigned long type; };
template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
+template<> struct make_unsigned<signed long long> { typedef unsigned long long type; };
+template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
#if EIGEN_COMP_MSVC
template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 2c63a95..f232317 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -611,9 +611,9 @@
struct plain_row_type
{
typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
- ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+ int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
- ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+ int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
typedef typename conditional<
is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index d947dac..1f21139 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -267,7 +267,7 @@
private:
- typedef Matrix<Scalar, 1, Size, Options | RowMajor, 1, MaxSize> VectorType;
+ typedef Matrix<Scalar, 1, Size, int(Options) | int(RowMajor), 1, MaxSize> VectorType;
typedef typename NumTraits<Scalar>::Real RealScalar;
static void _compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp);
diff --git a/Eigen/src/Geometry/arch/Geometry_SIMD.h b/Eigen/src/Geometry/arch/Geometry_SIMD.h
index 9c15bfb..89ac920 100644
--- a/Eigen/src/Geometry/arch/Geometry_SIMD.h
+++ b/Eigen/src/Geometry/arch/Geometry_SIMD.h
@@ -146,8 +146,9 @@
{
evaluator<typename Derived::Coefficients> qe(q.coeffs());
Quaternion<double> res;
- double arr1[2] = {-0.0, -0.0};
- double arr2[2] = {-0.0, 0.0};
+ const double neg_zero = numext::bit_cast<double>(0x8000000000000000ull);
+ double arr1[2] = {neg_zero, neg_zero};
+ double arr2[2] = {neg_zero, 0.0};
const Packet2d mask0 = pset<Packet2d>(arr1);
const Packet2d mask2 = pset<Packet2d>(arr2);
pstoret<double,Packet2d,ResAlignment>(&res.x(), pxor(mask0, qe.template packet<traits<Derived>::Alignment,Packet2d>(0)));
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index ee5548a..106224b 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -143,7 +143,7 @@
iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
- const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f};
+ const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
const Packet4f p4f_sign_PNNP = pset<Packet4f>(sign_mask);
rd = pxor(rd, p4f_sign_PNNP);
iA = pmul(iA, rd);
@@ -326,8 +326,8 @@
iC1 = psub(pmul(B1, dC), iC1);
iC2 = psub(pmul(B2, dC), iC2);
- const double sign_mask1[2] = {0.0, -0.0};
- const double sign_mask2[2] = {-0.0, 0.0};
+ const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
+ const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
const Packet2d sign_PN = pset<Packet2d>(sign_mask1);
const Packet2d sign_NP = pset<Packet2d>(sign_mask2);
d1 = pxor(rd, sign_PN);
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index a22a2e5..8551a06 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -112,8 +112,8 @@
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
- TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor))
- : ColsAtCompileTime==1 ? (MatrixType::Options | RowMajor)
+ TrOptions = RowsAtCompileTime==1 ? (int(MatrixType::Options) & ~(int(RowMajor)))
+ : ColsAtCompileTime==1 ? (int(MatrixType::Options) | int(RowMajor))
: MatrixType::Options
};
typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, TrOptions, MaxColsAtCompileTime, MaxRowsAtCompileTime>
@@ -202,8 +202,8 @@
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
- TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor))
- : ColsAtCompileTime==1 ? (MatrixType::Options | RowMajor)
+ TrOptions = RowsAtCompileTime==1 ? (int(MatrixType::Options) & ~(int(RowMajor)))
+ : ColsAtCompileTime==1 ? (int(MatrixType::Options) | int(RowMajor))
: MatrixType::Options
};
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 94c9f0f..9f93e32 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -218,7 +218,7 @@
CholMatrixType tmp(size,size);
ConstCholMatrixPtr pmat;
- if(m_P.size()==0 && (UpLo&Upper)==Upper)
+ if(m_P.size() == 0 && (int(UpLo) & int(Upper)) == Upper)
{
// If there is no ordering, try to directly use the input matrix without any copy
internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, tmp);
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 6130bab..9b0d3f9 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -126,7 +126,7 @@
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
@@ -211,7 +211,7 @@
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
@@ -298,7 +298,7 @@
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
@@ -457,7 +457,7 @@
enum {
- CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
@@ -530,7 +530,7 @@
enum {
- CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
@@ -604,7 +604,7 @@
enum {
- CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ CoeffReadCost = int(evaluator<LhsArg>::CoeffReadCost) + int(evaluator<RhsArg>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost),
Flags = XprType::Flags
};
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index df6c28d..32dac0f 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -24,7 +24,7 @@
class InnerIterator;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost),
Flags = XprType::Flags
};
@@ -79,7 +79,7 @@
class InnerIterator;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+ CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<ViewOp>::Cost),
Flags = XprType::Flags
};
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 0808446..eb8457d 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -478,6 +478,7 @@
execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION}
OUTPUT_VARIABLE eigen_cxx_compiler_version_string OUTPUT_STRIP_TRAILING_WHITESPACE)
+ string(REGEX REPLACE "^[ \n\r]+" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string})
string(REGEX REPLACE "[\n\r].*" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string})
ei_get_compilerver_from_cxx_version_string("${eigen_cxx_compiler_version_string}" CNAME CVER)
@@ -487,9 +488,10 @@
endmacro()
# Extract compiler name and version from a raw version string
-# WARNING: if you edit thid macro, then please test it by uncommenting
+# WARNING: if you edit this macro, then please test it by uncommenting
# the testing macro call in ei_init_testing() of the EigenTesting.cmake file.
-# See also the ei_test_get_compilerver_from_cxx_version_string macro at the end of the file
+# See also the ei_test_get_compilerver_from_cxx_version_string macro at the end
+# of the file
macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER)
# extract possible compiler names
string(REGEX MATCH "g\\+\\+" ei_has_gpp ${VERSTRING})
@@ -497,6 +499,7 @@
string(REGEX MATCH "gcc|GCC" ei_has_gcc ${VERSTRING})
string(REGEX MATCH "icpc|ICC" ei_has_icpc ${VERSTRING})
string(REGEX MATCH "clang|CLANG" ei_has_clang ${VERSTRING})
+ string(REGEX MATCH "mingw32" ei_has_mingw ${VERSTRING})
# combine them
if((ei_has_llvm) AND (ei_has_gpp OR ei_has_gcc))
@@ -505,6 +508,8 @@
set(${CNAME} "llvm-clang++")
elseif(ei_has_clang)
set(${CNAME} "clang++")
+ elseif ((ei_has_mingw) AND (ei_has_gpp OR ei_has_gcc))
+ set(${CNAME} "mingw32-g++")
elseif(ei_has_icpc)
set(${CNAME} "icpc")
elseif(ei_has_gpp OR ei_has_gcc)
@@ -525,11 +530,17 @@
if(NOT eicver)
# try to extract 2:
string(REGEX MATCH "[^0-9][0-9]+\\.[0-9]+" eicver ${VERSTRING})
- else()
- set(eicver " _")
+ if (NOT eicver AND ei_has_mingw)
+ # try to extract 1 number plus suffix:
+ string(REGEX MATCH "[^0-9][0-9]+-win32" eicver ${VERSTRING})
+ endif()
endif()
endif()
endif()
+
+ if (NOT eicver)
+ set(eicver " _")
+ endif()
string(REGEX REPLACE ".(.*)" "\\1" ${CVER} ${eicver})
@@ -654,6 +665,7 @@
ei_test1_get_compilerver_from_cxx_version_string("i686-apple-darwin11-llvm-g++-4.2 (GCC) 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)" "llvm-g++" "4.2.1")
ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 4.4.6" "g++" "4.4.6")
ei_test1_get_compilerver_from_cxx_version_string("g++-mp-4.4 (GCC) 2011" "g++" "4.4")
+ ei_test1_get_compilerver_from_cxx_version_string("x86_64-w64-mingw32-g++ (GCC) 10-win32 20210110" "mingw32-g++" "10-win32")
endmacro()
# Split all tests listed in EIGEN_TESTS_LIST into num_splits many targets
@@ -767,4 +779,4 @@
set_property(TEST ${test} PROPERTY LABELS "${test_labels};smoketest")
endif()
endforeach()
-endmacro(ei_add_smoke_tests)
\ No newline at end of file
+endmacro(ei_add_smoke_tests)
diff --git a/test/conservative_resize.cpp b/test/conservative_resize.cpp
index 5dc5000..d709e33 100644
--- a/test/conservative_resize.cpp
+++ b/test/conservative_resize.cpp
@@ -148,6 +148,7 @@
CALL_SUBTEST_4((run_matrix_tests<std::complex<float>, Eigen::ColMajor>()));
CALL_SUBTEST_5((run_matrix_tests<std::complex<double>, Eigen::RowMajor>()));
CALL_SUBTEST_5((run_matrix_tests<std::complex<double>, Eigen::ColMajor>()));
+ CALL_SUBTEST_1((run_matrix_tests<int, Eigen::RowMajor | Eigen::DontAlign>()));
CALL_SUBTEST_1((run_vector_tests<int>()));
CALL_SUBTEST_2((run_vector_tests<float>()));
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 0bb511d..121ec72 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -518,9 +518,7 @@
for (int i = 0; i < PacketSize; ++i) ref[0] += data1[i];
VERIFY(test::isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
- if (PacketSize == 8 && internal::unpacket_traits<typename internal::unpacket_traits<Packet>::half>::size ==
- 4) // so far, predux_half_downto4 is only required in such a case
- {
+ if (!internal::is_same<Packet, typename internal::unpacket_traits<Packet>::half>::value) {
int HalfPacketSize = PacketSize > 4 ? PacketSize / 2 : PacketSize;
for (int i = 0; i < HalfPacketSize; ++i) ref[i] = Scalar(0);
for (int i = 0; i < PacketSize; ++i) ref[i % HalfPacketSize] += data1[i];
@@ -548,6 +546,27 @@
}
}
+ // GeneralBlockPanelKernel also checks PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize>;
+ if (PacketSize > 4 && PacketSize % 4 == 0) {
+ internal::PacketBlock<Packet, PacketSize%4==0?4:PacketSize> kernel2;
+ for (int i = 0; i < 4; ++i) {
+ kernel2.packet[i] = internal::pload<Packet>(data1 + i * PacketSize);
+ }
+ ptranspose(kernel2);
+ int data_counter = 0;
+ for (int i = 0; i < PacketSize; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ data2[data_counter++] = data1[j*PacketSize + i];
+ }
+ }
+ for (int i = 0; i < 4; ++i) {
+ internal::pstore(data3, kernel2.packet[i]);
+ for (int j = 0; j < PacketSize; ++j) {
+ VERIFY(test::isApproxAbs(data3[j], data2[i*PacketSize + j], refvalue) && "ptranspose");
+ }
+ }
+ }
+
if (PacketTraits::HasBlend) {
Packet thenPacket = internal::pload<Packet>(data1);
Packet elsePacket = internal::pload<Packet>(data2);
diff --git a/test/prec_inverse_4x4.cpp b/test/prec_inverse_4x4.cpp
index 0724664..86f0571 100644
--- a/test/prec_inverse_4x4.cpp
+++ b/test/prec_inverse_4x4.cpp
@@ -30,18 +30,17 @@
{
using std::abs;
typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::RealScalar RealScalar;
double error_sum = 0., error_max = 0.;
for(int i = 0; i < repeat; ++i)
{
MatrixType m;
- RealScalar absdet;
+ bool is_invertible;
do {
m = MatrixType::Random();
- absdet = abs(m.determinant());
- } while(absdet < NumTraits<Scalar>::epsilon());
+ is_invertible = Eigen::FullPivLU<MatrixType>(m).isInvertible();
+ } while(!is_invertible);
MatrixType inv = m.inverse();
- double error = double( (m*inv-MatrixType::Identity()).norm() * absdet / NumTraits<Scalar>::epsilon() );
+ double error = double( (m*inv-MatrixType::Identity()).norm());
error_sum += error;
error_max = (std::max)(error_max, error);
}
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index 7a85388..97c0bda 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -159,11 +159,11 @@
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(),
- (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal,
+ (int(Matrix1::InnerSizeAtCompileTime) % int(PacketSize))==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal,
CompleteUnrolling));
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
- EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
+ EIGEN_UNALIGNED_VECTORIZE ? ((int(Matrix1::InnerSizeAtCompileTime) % int(PacketSize))==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
: LinearTraversal, CompleteUnrolling));
VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
@@ -324,7 +324,7 @@
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
- EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
+ EIGEN_UNALIGNED_VECTORIZE ? ((int(Matrix1::InnerSizeAtCompileTime) % int(PacketSize))==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
if(PacketSize>1)
{
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
index 91a6f8d..8b8fb92 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
@@ -99,18 +99,18 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
return m_impl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -240,7 +240,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_orig_impl(op.expression(), device),
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
m_return_dim(op.return_dim())
@@ -263,11 +263,11 @@
return m_impl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index 72f072c..e5811d6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -104,14 +104,14 @@
static const int NumDims = XprType::NumDims;
enum {
- IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
- TensorEvaluator<RightArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
- TensorEvaluator<RightArgType, Device>::PacketAccess,
- BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
- TensorEvaluator<RightArgType, Device>::BlockAccess,
- PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
- TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
+ IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
+ int(TensorEvaluator<RightArgType, Device>::IsAligned),
+ PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
+ int(TensorEvaluator<RightArgType, Device>::PacketAccess),
+ BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
+ int(TensorEvaluator<RightArgType, Device>::BlockAccess),
+ PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
+ int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
};
@@ -124,7 +124,7 @@
RightTensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
+ TensorEvaluator(const XprType& op, const Device& device) :
m_leftImpl(op.lhsExpression(), device),
m_rightImpl(op.rhsExpression(), device)
{
@@ -142,7 +142,7 @@
return m_rightImpl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
m_leftImpl.evalSubExprsIfNeeded(NULL);
// If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
@@ -154,7 +154,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
m_rightImpl.evalSubExprsIfNeededAsync(
@@ -163,7 +163,7 @@
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_leftImpl.cleanup();
m_rightImpl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index fc75c8d..a354132 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -138,8 +138,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: isCopy(false), nByOne(false), oneByN(false),
m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
{
@@ -211,20 +210,20 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 7c6bbd1..3764573 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -164,7 +164,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
{
EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -200,12 +200,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -433,7 +433,7 @@
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
index 0dfe216..5235a8e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
@@ -119,7 +119,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis())
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -172,14 +172,14 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
// TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
{
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
+ EIGEN_STRONG_INLINE void cleanup()
{
m_leftImpl.cleanup();
m_rightImpl.cleanup();
@@ -318,7 +318,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
: Base(op, device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 424cace..8b35f79 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -417,7 +417,7 @@
typedef DSizes<Index, NumDims> Dimensions;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ EIGEN_STRONG_INLINE
TensorContractionEvaluatorBase(const XprType& op, const Device& device)
: m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
op.lhsExpression(), op.rhsExpression()), device),
@@ -602,7 +602,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
if (data) {
@@ -617,7 +617,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType dest, EvalSubExprsCallback done) {
m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) {
m_rightImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) {
@@ -633,6 +633,7 @@
}
#endif // EIGEN_USE_THREADS
+#ifndef TENSOR_CONTRACTION_DISPATCH
#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \
if (this->m_lhs_inner_dim_contiguous) { \
if (this->m_rhs_inner_dim_contiguous) { \
@@ -663,7 +664,9 @@
} \
} \
}
+#endif
+#ifndef TENSOR_CONTRACTION_ASYNC_DISPATCH
#define TENSOR_CONTRACTION_ASYNC_DISPATCH(METHOD, DONE, ALIGNMENT, ARGS, FN) \
if (this->m_lhs_inner_dim_contiguous) { \
if (this->m_rhs_inner_dim_contiguous) { \
@@ -694,6 +697,7 @@
} \
} \
}
+#endif
EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
static_cast<const Derived*>(this)->template evalProduct<Unaligned>(buffer);
@@ -908,7 +912,7 @@
kernel.deallocate(this->m_device, packed_mem);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_leftImpl.cleanup();
m_rightImpl.cleanup();
@@ -1005,7 +1009,7 @@
// Could we use NumDimensions here?
typedef DSizes<Index, NumDims> Dimensions;
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
+ TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device) { }
template <int Alignment>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
index bb990b3..c818038 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@@ -1270,7 +1270,7 @@
typedef typename LeftEvaluator::Dimensions LeftDimensions;
typedef typename RightEvaluator::Dimensions RightDimensions;
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
+ TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device)
{
EIGEN_STATIC_ASSERT( (internal::is_same<OutputKernelType, const NoOpOutputKernel>::value),
@@ -1278,7 +1278,7 @@
}
// We need to redefine this method to make nvcc happy
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
this->m_leftImpl.evalSubExprsIfNeeded(NULL);
this->m_rightImpl.evalSubExprsIfNeeded(NULL);
if (data) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
index a6ca177..473c228 100755
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h
@@ -1340,10 +1340,10 @@
(RDims == 2 && ContractDims == 1) || (rhs_inner_dim_contiguous && !rhs_inner_dim_reordered);
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType &op, const Device &device) : Base(op, device) {}
+ TensorEvaluator(const XprType &op, const Device &device) : Base(op, device) {}
// We need to redefine this method to make nvcc happy
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(typename Base::EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(typename Base::EvaluatorPointerType data) {
this->m_leftImpl.evalSubExprsIfNeeded(NULL);
this->m_rightImpl.evalSubExprsIfNeeded(NULL);
if (!data) {
@@ -1630,7 +1630,7 @@
}
#endif
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
this->m_leftImpl.cleanup();
this->m_rightImpl.cleanup();
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 4449390..09d2da9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -195,14 +195,14 @@
};
template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
+ static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
impl.evalSubExprsIfNeeded(NULL);
return true;
}
};
template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
+ static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
return impl.evalSubExprsIfNeeded(data);
}
};
@@ -211,8 +211,7 @@
template <bool SameType, typename Eval, typename EvalPointerType,
typename EvalSubExprsCallback>
struct ConversionSubExprEvalAsync {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(
- Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
+ static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
}
};
@@ -221,8 +220,7 @@
typename EvalSubExprsCallback>
struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
EvalSubExprsCallback> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(
- Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
+ static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
impl.evalSubExprsIfNeededAsync(data, std::move(done));
}
};
@@ -363,21 +361,21 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
{
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
{
return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType data, EvalSubExprsCallback done) {
ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
EvaluatorPointerType,
@@ -385,7 +383,7 @@
}
#endif
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
+ EIGEN_STRONG_INLINE void cleanup()
{
m_impl.cleanup();
}
@@ -404,8 +402,8 @@
const bool Vectorizable =
IsSameType
? TensorEvaluator<ArgType, Device>::PacketAccess
- : TensorEvaluator<ArgType, Device>::PacketAccess &
- internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+ : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
+ int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
Vectorizable, IsSameType>::run(m_impl, index);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index df289e2..b20f80b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -307,8 +307,8 @@
typedef typename Storage::Type EvaluatorPointerType;
enum {
- IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
+ IsAligned = int(TensorEvaluator<InputArgType, Device>::IsAligned) & int(TensorEvaluator<KernelArgType, Device>::IsAligned),
+ PacketAccess = int(TensorEvaluator<InputArgType, Device>::PacketAccess) & int(TensorEvaluator<KernelArgType, Device>::PacketAccess),
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<InputArgType, Device>::Layout,
@@ -320,7 +320,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -384,12 +384,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
m_inputImpl.evalSubExprsIfNeeded(NULL);
preloadKernel();
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_inputImpl.cleanup();
if (m_local_kernel) {
m_device.deallocate((void*)m_kernel);
@@ -797,7 +797,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
+ TensorEvaluator(const XprType& op, const GpuDevice& device)
: m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
index 92003c7..033318f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h
@@ -305,7 +305,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType &op, const Eigen::SyclDevice &device)
+ TensorEvaluator(const XprType &op, const Eigen::SyclDevice &device)
: m_inputImpl(op.inputExpression(), device),
m_kernelArg(op.kernelExpression()),
m_kernelImpl(op.kernelExpression(), device),
@@ -334,7 +334,7 @@
EIGEN_DEVICE_FUNC const Dimensions &dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
preloadKernel();
m_inputImpl.evalSubExprsIfNeeded(NULL);
if (data) {
@@ -348,7 +348,7 @@
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_inputImpl.cleanup();
if (m_buf) {
m_device.deallocate_temp(m_buf);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 476b228..95a8a84 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -106,7 +106,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
: m_op(op), m_device(device), m_result(NULL)
{
m_dimensions = op.func().dimensions(op.expression());
@@ -114,7 +114,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (data) {
evalTo(data);
return false;
@@ -126,7 +126,7 @@
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
if (m_result) {
m_device.deallocate_temp(m_result);
m_result = NULL;
@@ -157,7 +157,7 @@
#endif
protected:
- EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
+ void evalTo(EvaluatorPointerType data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(m_device.get(data), m_dimensions);
m_op.func().eval(m_op.expression(), result, m_device);
}
@@ -279,7 +279,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_op(op), m_device(device), m_result(NULL)
{
m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression());
@@ -287,7 +287,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (data) {
evalTo(data);
return false;
@@ -299,7 +299,7 @@
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) {
m_device.deallocate_temp(m_result);
m_result = NULL;
@@ -330,7 +330,7 @@
#endif
protected:
- EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
+ void evalTo(EvaluatorPointerType data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(m_device.get(data), m_dimensions);
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index 4689b02..a48d035 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -131,17 +131,17 @@
TensorBlockAssignment;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_buffer(device.get(op.buffer())), m_expression(op.expression()){}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
+ EIGEN_STRONG_INLINE ~TensorEvaluator() {
}
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType scalar) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType scalar) {
EIGEN_UNUSED_VARIABLE(scalar);
eigen_assert(scalar == NULL);
return m_impl.evalSubExprsIfNeeded(m_buffer);
@@ -149,7 +149,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType scalar, EvalSubExprsCallback done) {
EIGEN_UNUSED_VARIABLE(scalar);
eigen_assert(scalar == NULL);
@@ -191,7 +191,7 @@
block.cleanup();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index d4532b7..3aff7fa 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -63,7 +63,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
: m_data(device.get((const_cast<TensorPointerType>(m.data())))),
m_dims(m.dimensions()),
m_device(device)
@@ -72,7 +72,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
@@ -82,14 +82,14 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType dest, EvalSubExprsCallback done) {
// TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
done(evalSubExprsIfNeeded(dest));
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {}
+ EIGEN_STRONG_INLINE void cleanup() {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data != NULL);
@@ -262,13 +262,13 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
: m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
@@ -278,14 +278,14 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType dest, EvalSubExprsCallback done) {
// TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
done(evalSubExprsIfNeeded(dest));
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+ EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data != NULL);
@@ -357,7 +357,6 @@
{
typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
- EIGEN_DEVICE_FUNC
TensorEvaluator(const XprType& op, const Device& device)
: m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
{ }
@@ -391,17 +390,17 @@
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
done(true);
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+ EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
@@ -446,8 +445,8 @@
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess &
- internal::functor_traits<UnaryOp>::PacketAccess,
+ PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
+ int(internal::functor_traits<UnaryOp>::PacketAccess),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -455,7 +454,7 @@
RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_device(device),
m_functor(op.functor()),
m_argImpl(op.nestedExpression(), device)
@@ -485,20 +484,20 @@
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_argImpl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_argImpl.cleanup();
}
@@ -557,21 +556,21 @@
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
enum {
- IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned &
- TensorEvaluator<RightArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
- TensorEvaluator<RightArgType, Device>::PacketAccess &
- internal::functor_traits<BinaryOp>::PacketAccess,
- BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
- TensorEvaluator<RightArgType, Device>::BlockAccess,
- PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
- TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
+ IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
+ int(TensorEvaluator<RightArgType, Device>::IsAligned),
+ PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
+ int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
+ int(internal::functor_traits<BinaryOp>::PacketAccess),
+ BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
+ int(TensorEvaluator<RightArgType, Device>::BlockAccess),
+ PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
+ int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_device(device),
m_functor(op.functor()),
m_leftImpl(op.lhsExpression(), device),
@@ -613,7 +612,7 @@
return m_leftImpl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
@@ -621,7 +620,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
// TODO(ezhulenev): Evaluate two expression in parallel?
m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
@@ -631,7 +630,7 @@
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_leftImpl.cleanup();
m_rightImpl.cleanup();
}
@@ -709,7 +708,7 @@
RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_functor(op.functor()),
m_arg1Impl(op.arg1Expression(), device),
m_arg2Impl(op.arg2Expression(), device),
@@ -752,13 +751,13 @@
return m_arg1Impl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_arg1Impl.evalSubExprsIfNeeded(NULL);
m_arg2Impl.evalSubExprsIfNeeded(NULL);
m_arg3Impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_arg1Impl.cleanup();
m_arg2Impl.cleanup();
m_arg3Impl.cleanup();
@@ -829,7 +828,7 @@
RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_condImpl(op.ifExpression(), device),
m_thenImpl(op.thenExpression(), device),
m_elseImpl(op.elseExpression(), device)
@@ -886,7 +885,7 @@
return m_condImpl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_condImpl.evalSubExprsIfNeeded(NULL);
m_thenImpl.evalSubExprsIfNeeded(NULL);
m_elseImpl.evalSubExprsIfNeeded(NULL);
@@ -895,7 +894,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
@@ -905,7 +904,7 @@
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_condImpl.cleanup();
m_thenImpl.cleanup();
m_elseImpl.cleanup();
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index c62bc5f..4a1a068 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -144,7 +144,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
for (int i = 0; i < NumDims; ++i) {
eigen_assert(input_dims[i] > 0);
@@ -169,7 +169,7 @@
return m_dimensions;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
if (data) {
evalToBuf(data);
@@ -181,7 +181,7 @@
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
if (m_data) {
m_device.deallocate(m_data);
m_data = NULL;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index 14020aa..e800ded 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -135,16 +135,13 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_op(op.expression()),
m_device(device), m_buffer(NULL)
{ }
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
- #if !defined(EIGEN_HIPCC)
- EIGEN_DEVICE_FUNC
- #endif
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
@@ -165,7 +162,7 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
@@ -185,7 +182,7 @@
}
#endif
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_device.deallocate_temp(m_buffer);
m_buffer = NULL;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index b1ff1d8..174bf06 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -111,7 +111,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_device(device), m_generator(op.generator())
{
TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
@@ -136,10 +136,10 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
index db394bc..1d142f2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h
@@ -10,6 +10,8 @@
#if defined(EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H)
+#ifndef EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES
+
#undef gpuStream_t
#undef gpuDeviceProp_t
#undef gpuError_t
@@ -35,6 +37,8 @@
#undef gpuDeviceSynchronize
#undef gpuMemcpy
+#endif // EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES
+
#undef EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
#endif // EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 49d1004..dd51850 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -242,7 +242,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
: m_device(device), m_impl(op.expression(), device)
{
EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -389,20 +389,20 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -514,16 +514,16 @@
}
#endif
- Index rowPaddingTop() const { return m_rowPaddingTop; }
- Index colPaddingLeft() const { return m_colPaddingLeft; }
- Index outputRows() const { return m_outputRows; }
- Index outputCols() const { return m_outputCols; }
- Index userRowStride() const { return m_row_strides; }
- Index userColStride() const { return m_col_strides; }
- Index userInRowStride() const { return m_in_row_strides; }
- Index userInColStride() const { return m_in_col_strides; }
- Index rowInflateStride() const { return m_row_inflate_strides; }
- Index colInflateStride() const { return m_col_inflate_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowPaddingTop() const { return m_rowPaddingTop; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colPaddingLeft() const { return m_colPaddingLeft; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputRows() const { return m_outputRows; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputCols() const { return m_outputCols; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userRowStride() const { return m_row_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userColStride() const { return m_col_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInRowStride() const { return m_in_row_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInColStride() const { return m_in_col_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowInflateStride() const { return m_row_inflate_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colInflateStride() const { return m_col_inflate_strides; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
index 7dadec7..c5cb61a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
@@ -103,7 +103,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_strides(op.strides())
{
m_dimensions = m_impl.dimensions();
@@ -137,11 +137,11 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
index f159db1..80106c1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
@@ -113,7 +113,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
{
for(int i = 0; i < NumDims; ++i) {
@@ -136,10 +136,10 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -191,7 +191,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index ef79c85..ea97cf1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -142,7 +142,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_dimensions(op.dimensions())
{
// The total size of the reshaped tensor must be equal to the total size
@@ -154,16 +154,16 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType data, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
}
#endif
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -255,7 +255,7 @@
RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
@@ -443,7 +443,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
{
for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
@@ -498,7 +498,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
&& data && m_impl.data()) {
@@ -534,13 +534,13 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -738,7 +738,7 @@
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
@@ -906,7 +906,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device),
m_device(device),
m_strides(op.strides())
@@ -992,12 +992,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -1088,7 +1088,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 561666c..ee44382 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -116,7 +116,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
{
// The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
@@ -151,20 +151,20 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
index 64a436e..413d25d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
@@ -107,7 +107,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
{
Index num_patches = 1;
@@ -152,12 +152,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 0a65591..583f462 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -549,7 +549,7 @@
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
static const bool RunningFullReduction = (NumOutputDims==0);
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
{
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -631,13 +631,6 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_STRONG_INLINE
-#if !defined(EIGEN_HIPCC)
- // Marking this as EIGEN_DEVICE_FUNC for HIPCC requires also doing the same
- // for all the functions being called within here, which then leads to
- // proliferation of EIGEN_DEVICE_FUNC markings, one of which will eventually
- // result in an NVCC error
- EIGEN_DEVICE_FUNC
-#endif
bool evalSubExprsIfNeededCommon(EvaluatorPointerType data) {
// Use the FullReducer if possible.
if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction &&
@@ -746,9 +739,6 @@
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
EIGEN_STRONG_INLINE
-#if !defined(EIGEN_HIPCC)
- EIGEN_DEVICE_FUNC
-#endif
void
evalSubExprsIfNeededAsync(EvaluatorPointerType data,
EvalSubExprsCallback done) {
@@ -759,19 +749,12 @@
#endif
EIGEN_STRONG_INLINE
-#if !defined(EIGEN_HIPCC)
- // Marking this as EIGEN_DEVICE_FUNC for HIPCC requires also doing the same
- // for all the functions being called within here, which then leads to
- // proliferation of EIGEN_DEVICE_FUNC markings, one of which will eventually
- // result in an NVCC error
- EIGEN_DEVICE_FUNC
-#endif
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
return evalSubExprsIfNeededCommon(data);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
if (m_result) {
m_device.deallocate_temp(m_result);
@@ -987,7 +970,7 @@
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
: public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> {
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Base;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Device& device) : Base(op, device){}
+ EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Device& device) : Base(op, device){}
};
@@ -996,7 +979,7 @@
: public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> {
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> Base;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Eigen::SyclDevice& device) : Base(op, device){}
+ EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Eigen::SyclDevice& device) : Base(op, device){}
// The coeff function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel
//Therefore the coeff function should be overridden by for SYCL kernel
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::CoeffReturnType coeff(typename Base::Index index) const {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
index 030d198..a27d364 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -388,17 +388,17 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&)
+ EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&)
: m_ref(m)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+ EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
return m_ref.coeff(index);
@@ -439,7 +439,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d)
+ EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 3b1fca5..586ce68 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -121,8 +121,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device),
m_reverse(op.reverse()),
m_device(device)
@@ -150,20 +149,20 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -426,8 +425,7 @@
CoordAccess = false, // to be implemented
RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device) {}
typedef typename XprType::Scalar Scalar;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index a06c4a9..beae854 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -402,8 +402,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device),
m_device(device),
m_exclusive(op.exclusive()),
@@ -498,7 +497,7 @@
return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
if (m_output) {
m_device.deallocate_temp(m_output);
m_output = NULL;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index e6fed3d..0999815 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -118,8 +118,7 @@
TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_device(device),
m_impl(op.expression(), device)
{
@@ -163,20 +162,20 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
#ifdef EIGEN_USE_THREADS
template <typename EvalSubExprsCallback>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
EvaluatorPointerType, EvalSubExprsCallback done) {
m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
}
#endif // EIGEN_USE_THREADS
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -384,7 +383,7 @@
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
index 64bf3f1..2f62a66 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
@@ -109,7 +109,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
{
m_dimensions = m_impl.dimensions();
@@ -142,11 +142,11 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -277,7 +277,7 @@
RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device) { }
typedef typename XprType::Index Index;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
index 24d22c1..926ecdd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
@@ -108,7 +108,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_traceDim(1), m_device(device)
{
@@ -211,12 +211,12 @@
return m_dimensions;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
index 81bed57..0beb9ff 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
@@ -194,7 +194,7 @@
typedef internal::TensorBlockNotImplemented TensorBlock;
//===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) :
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) :
m_impl(op.expression(), device)
{
EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -352,12 +352,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -518,21 +518,21 @@
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
- Index planePaddingTop() const { return m_planePaddingTop; }
- Index rowPaddingTop() const { return m_rowPaddingTop; }
- Index colPaddingLeft() const { return m_colPaddingLeft; }
- Index outputPlanes() const { return m_outputPlanes; }
- Index outputRows() const { return m_outputRows; }
- Index outputCols() const { return m_outputCols; }
- Index userPlaneStride() const { return m_plane_strides; }
- Index userRowStride() const { return m_row_strides; }
- Index userColStride() const { return m_col_strides; }
- Index userInPlaneStride() const { return m_in_plane_strides; }
- Index userInRowStride() const { return m_in_row_strides; }
- Index userInColStride() const { return m_in_col_strides; }
- Index planeInflateStride() const { return m_plane_inflate_strides; }
- Index rowInflateStride() const { return m_row_inflate_strides; }
- Index colInflateStride() const { return m_col_inflate_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index planePaddingTop() const { return m_planePaddingTop; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowPaddingTop() const { return m_rowPaddingTop; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colPaddingLeft() const { return m_colPaddingLeft; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputPlanes() const { return m_outputPlanes; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputRows() const { return m_outputRows; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputCols() const { return m_outputCols; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userPlaneStride() const { return m_plane_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userRowStride() const { return m_row_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userColStride() const { return m_col_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInPlaneStride() const { return m_in_plane_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInRowStride() const { return m_in_row_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInColStride() const { return m_in_col_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index planeInflateStride() const { return m_plane_inflate_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowInflateStride() const { return m_row_inflate_strides; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colInflateStride() const { return m_col_inflate_strides; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
index 062f76e..30bcc1d 100644
--- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
@@ -329,26 +329,22 @@
int num_elem = size1*size2;
int result_size = (redux == 1 ? size1 : size2);
- float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
- d_float1, size1, size2);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
- d_float2, size1, size2);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+ d_float, size1, size2);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, result_size);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, result_size);
- gpu_float1.device(gpu_device) = gpu_float1.random() * 2.0f;
- gpu_float2.device(gpu_device) = gpu_float2.random() * 2.0f;
+ gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
Eigen::array<int, 1> redux_dim = {redux};
- gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>();
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim);
+ gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().sum(redux_dim);
Tensor<Eigen::half, 1> half_prec(result_size);
Tensor<Eigen::half, 1> full_prec(result_size);
@@ -361,8 +357,7 @@
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
}
- gpu_device.deallocate(d_float1);
- gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
@@ -386,25 +381,21 @@
int size = 13;
int num_elem = size*size;
- float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
- d_float1, size, size);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
- d_float2, size, size);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+ d_float, size, size);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half(
d_res_half);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float(
d_res_float);
- gpu_float1.device(gpu_device) = gpu_float1.random();
- gpu_float2.device(gpu_device) = gpu_float2.random();
+ gpu_float.device(gpu_device) = gpu_float.random();
- gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>();
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum();
+ gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().sum();
Tensor<Eigen::half, 0> half_prec;
Tensor<Eigen::half, 0> full_prec;
@@ -414,16 +405,15 @@
VERIFY_IS_APPROX(full_prec(), half_prec());
- gpu_res_float.device(gpu_device) = gpu_float1.maximum().cast<Eigen::half>();
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().maximum();
+ gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().maximum();
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
gpu_device.synchronize();
VERIFY_IS_APPROX(full_prec(), half_prec());
- gpu_device.deallocate(d_float1);
- gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}