Update Eigen to commit:dd56367554cdf0662cc1d4a8e462e8c3e8656d08
CHANGELOG
=========
dd5636755 - Fix docs job for nightlies
d79bac0d3 - Fix boolean scatter and random generation for tensors.
9935396b1 - Specify constructor template arguments for ConstexprTest struct
72adf891d - Slightly simplify ForkJoin code, and make sure the test is actually run.
6aebfa9ac - Build docs on push, and don'\''t expire
bddaa99e1 - Fix bitwise operation error when compiling as C++26
e42dceb3a - Fix implicit copy-constructor warning in TensorRef.
5fc6fc988 - Initialize matrix in bicgstab test
0ae7b5901 - Make assignment constexpr
4dda5b927 - fix Warray-bounds in inner product
PiperOrigin-RevId: 731494675
Change-Id: Id59938322667f1bd2b3348ce69f1bc143608e9e4
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5a2a3ac..0ea1bc3 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -191,7 +191,7 @@
static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
kernel.assignCoeffByOuterInner(Outer, Inner);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
}
@@ -204,7 +204,7 @@
template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
kernel.assignCoeffByOuterInner(outer, Index_);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
}
@@ -212,7 +212,7 @@
template <typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
};
/***********************
@@ -221,7 +221,7 @@
template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
kernel.assignCoeff(Index_);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
}
@@ -229,7 +229,7 @@
template <typename Kernel, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
};
/**************************
@@ -270,7 +270,7 @@
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
};
/***************************************************************************
@@ -281,7 +281,21 @@
template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
int Unrolling = Kernel::AssignmentTraits::Unrolling>
-struct dense_assignment_loop;
+struct dense_assignment_loop_impl;
+
+template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
+#ifdef __cpp_lib_is_constant_evaluated
+ if (internal::is_constant_evaluated())
+ dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
+ NoUnrolling>::run(kernel);
+ else
+#endif
+ dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
+ }
+};
/************************
***** Special Cases *****
@@ -289,7 +303,7 @@
// Zero-sized assignment is a no-op.
template <typename Kernel, int Unrolling>
-struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling> {
+struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/) {
@@ -302,8 +316,8 @@
************************/
template <typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> {
- EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& kernel) {
+struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
kernel.assignCoeffByOuterInner(outer, inner);
@@ -313,19 +327,19 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> {
+struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
}
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> {
+struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
const Index outerSize = kernel.outerSize();
for (Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
@@ -382,7 +396,7 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> {
+struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
using Scalar = typename Kernel::Scalar;
using PacketType = typename Kernel::PacketType;
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
@@ -407,7 +421,7 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
+struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
using PacketType = typename Kernel::PacketType;
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
@@ -424,7 +438,7 @@
**************************/
template <typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> {
+struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
using PacketType = typename Kernel::PacketType;
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
@@ -440,7 +454,7 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
+struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
@@ -449,7 +463,7 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
+struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
@@ -467,7 +481,7 @@
***********************/
template <typename Kernel>
-struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> {
+struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
const Index size = kernel.size();
for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
@@ -475,7 +489,7 @@
};
template <typename Kernel>
-struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> {
+struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
kernel);
@@ -487,7 +501,7 @@
***************************/
template <typename Kernel>
-struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> {
+struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
using Scalar = typename Kernel::Scalar;
using PacketType = typename Kernel::PacketType;
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
@@ -528,7 +542,7 @@
#if EIGEN_UNALIGNED_VECTORIZE
template <typename Kernel>
-struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
+struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
using PacketType = typename Kernel::PacketType;
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
@@ -566,9 +580,10 @@
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorType& dst,
- const SrcEvaluatorType& src,
- const Functor& func, DstXprType& dstExpr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
+ const SrcEvaluatorType& src,
+ const Functor& func,
+ DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
#ifdef EIGEN_DEBUG_ASSIGN
AssignmentTraits::debug();
@@ -586,7 +601,7 @@
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
/// Assign src(row,col) to dst(row,col) through the assignment functor.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
}
@@ -596,7 +611,7 @@
}
/// \sa assignCoeff(Index,Index)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
@@ -620,7 +635,7 @@
assignPacket<StoreMode, LoadMode, Packet>(row, col);
}
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
@@ -628,7 +643,7 @@
: inner;
}
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
@@ -672,16 +687,16 @@
***************************************************************************/
template <typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
- const Functor& /*func*/) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
+ const Functor& /*func*/) {
EIGEN_ONLY_USED_FOR_DEBUG(dst);
EIGEN_ONLY_USED_FOR_DEBUG(src);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
}
template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
- const internal::assign_op<T1, T2>& /*func*/) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
+ const internal::assign_op<T1, T2>& /*func*/) {
Index dstRows = src.rows();
Index dstCols = src.cols();
if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
@@ -750,7 +765,7 @@
// not has to bother about these annoying details.
template <typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
}
template <typename Dst, typename Src>
@@ -767,7 +782,7 @@
}
template <typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
call_assignment_no_alias(dst, src, func);
}
@@ -851,9 +866,12 @@
// both partial specialization+SFINAE without ambiguous specialization
template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
+ const Functor& func) {
#ifndef EIGEN_NO_DEBUG
- internal::check_for_aliasing(dst, src);
+ if (!internal::is_constant_evaluated()) {
+ internal::check_for_aliasing(dst, src);
+ }
#endif
call_dense_assignment_loop(dst, src, func);
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 6d16700..894bfc1 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -50,7 +50,7 @@
/** \returns a const reference to the derived object */
EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
- EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const {
+ EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const {
return *static_cast<Derived*>(const_cast<EigenBase*>(this));
}
EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast<const Derived*>(this); }
diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h
index dd57ca1..3b0af91 100644
--- a/Eigen/src/Core/Fill.h
+++ b/Eigen/src/Core/Fill.h
@@ -60,12 +60,12 @@
using Func = scalar_constant_op<Scalar>;
using PlainObject = typename Xpr::PlainObject;
using Constant = typename PlainObject::ConstantReturnType;
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) {
const Constant src(dst.rows(), dst.cols(), val);
run(dst, src);
}
template <typename SrcXpr>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
}
};
@@ -100,12 +100,12 @@
using Scalar = typename Xpr::Scalar;
using PlainObject = typename Xpr::PlainObject;
using Zero = typename PlainObject::ZeroReturnType;
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) {
const Zero src(dst.rows(), dst.cols());
run(dst, src);
}
template <typename SrcXpr>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
}
};
diff --git a/Eigen/src/Core/InnerProduct.h b/Eigen/src/Core/InnerProduct.h
index 1e16942..9849d9b 100644
--- a/Eigen/src/Core/InnerProduct.h
+++ b/Eigen/src/Core/InnerProduct.h
@@ -57,16 +57,20 @@
template <typename Func, typename Lhs, typename Rhs>
struct inner_product_evaluator {
- static constexpr int LhsFlags = evaluator<Lhs>::Flags, RhsFlags = evaluator<Rhs>::Flags,
- SizeAtCompileTime = min_size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime),
- LhsAlignment = evaluator<Lhs>::Alignment, RhsAlignment = evaluator<Rhs>::Alignment;
+ static constexpr int LhsFlags = evaluator<Lhs>::Flags;
+ static constexpr int RhsFlags = evaluator<Rhs>::Flags;
+ static constexpr int SizeAtCompileTime = size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime);
+ static constexpr int MaxSizeAtCompileTime =
+ min_size_prefer_fixed(Lhs::MaxSizeAtCompileTime, Rhs::MaxSizeAtCompileTime);
+ static constexpr int LhsAlignment = evaluator<Lhs>::Alignment;
+ static constexpr int RhsAlignment = evaluator<Rhs>::Alignment;
using Scalar = typename Func::result_type;
using Packet = typename find_inner_product_packet<Scalar, SizeAtCompileTime>::type;
static constexpr bool Vectorize =
bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess &&
- ((SizeAtCompileTime == Dynamic) || (unpacket_traits<Packet>::size <= SizeAtCompileTime));
+ ((MaxSizeAtCompileTime == Dynamic) || (unpacket_traits<Packet>::size <= MaxSizeAtCompileTime));
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs,
Func func = Func())
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 77f0cfa..5e91fba 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -1694,10 +1694,24 @@
}
template <>
EIGEN_STRONG_INLINE void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) {
- to[4 * stride * 0] = _mm_cvtsi128_si32(from);
- to[4 * stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
- to[4 * stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
- to[4 * stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+ EIGEN_ALIGN16 bool tmp[16];
+ pstore(tmp, from);
+ to[stride * 0] = tmp[0];
+ to[stride * 1] = tmp[1];
+ to[stride * 2] = tmp[2];
+ to[stride * 3] = tmp[3];
+ to[stride * 4] = tmp[4];
+ to[stride * 5] = tmp[5];
+ to[stride * 6] = tmp[6];
+ to[stride * 7] = tmp[7];
+ to[stride * 8] = tmp[8];
+ to[stride * 9] = tmp[9];
+ to[stride * 10] = tmp[10];
+ to[stride * 11] = tmp[11];
+ to[stride * 12] = tmp[12];
+ to[stride * 13] = tmp[13];
+ to[stride * 14] = tmp[14];
+ to[stride * 15] = tmp[15];
}
// some compilers might be tempted to perform multiple moves instead of using a vector path.
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 09d1da8..3687bb2 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -23,7 +23,7 @@
*/
template <typename DstScalar, typename SrcScalar>
struct assign_op {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
template <int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const {
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 9c8f095..fc708ee 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -95,7 +95,8 @@
namespace internal {
template <int Arch, typename VectorLhs, typename VectorRhs, typename Scalar = typename VectorLhs::Scalar,
- bool Vectorizable = bool((evaluator<VectorLhs>::Flags & evaluator<VectorRhs>::Flags) & PacketAccessBit)>
+ bool Vectorizable =
+ bool((int(evaluator<VectorLhs>::Flags) & int(evaluator<VectorRhs>::Flags)) & PacketAccessBit)>
struct cross3_impl {
EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs,
const VectorRhs& rhs) {
diff --git a/Eigen/src/ThreadPool/ForkJoin.h b/Eigen/src/ThreadPool/ForkJoin.h
index d6ea4dd..f67abd3 100644
--- a/Eigen/src/ThreadPool/ForkJoin.h
+++ b/Eigen/src/ThreadPool/ForkJoin.h
@@ -31,7 +31,7 @@
// where `s_{j+1} - s_{j}` and `end - s_n` are roughly within a factor of two of `granularity`. For a unary
// task function `g(k)`, the same operation is applied with
//
-// f(i,j) = [&](){ for(int k = i; k < j; ++k) g(k); };
+// f(i,j) = [&](){ for(Index k = i; k < j; ++k) g(k); };
//
// Note that the parameter `granularity` should be tuned by the user based on the trade-off of running the
// given task function sequentially vs. scheduling individual tasks in parallel. An example of a partially
@@ -45,51 +45,50 @@
// ForkJoinScheduler::ParallelFor(0, num_tasks, granularity, std::move(parallel_task), &thread_pool);
// ```
//
-// Example usage #2 (asynchronous):
+// Example usage #2 (executing multiple tasks asynchronously, each one parallelized with ParallelFor):
// ```
// ThreadPool thread_pool(num_threads);
-// Barrier barrier(num_tasks * num_async_calls);
-// auto done = [&](){barrier.Notify();};
-// for (int k=0; k<num_async_calls; ++k) {
-// thread_pool.Schedule([&](){
-// ForkJoinScheduler::ParallelForAsync(0, num_tasks, granularity, parallel_task, done, &thread_pool);
-// });
+// Barrier barrier(num_async_calls);
+// auto done = [&](){ barrier.Notify(); };
+// for (Index k=0; k<num_async_calls; ++k) {
+// ForkJoinScheduler::ParallelForAsync(task_start[k], task_end[k], granularity[k], parallel_task[k], done,
+// &thread_pool);
// }
// barrier.Wait();
// ```
class ForkJoinScheduler {
public:
- // Runs `do_func` asynchronously for the range [start, end) with a specified granularity. `do_func` should
- // either be of type `std::function<void(int)>` or `std::function<void(int, int)`.
- // If `end > start`, the `done` callback will be called `end - start` times when all tasks have been
- // executed. Otherwise, `done` is called only once.
- template <typename DoFnType>
- static void ParallelForAsync(int start, int end, int granularity, DoFnType do_func, std::function<void()> done,
- Eigen::ThreadPool* thread_pool) {
+ // Runs `do_func` asynchronously for the range [start, end) with a specified
+ // granularity. `do_func` should be of type `std::function<void(Index,
+ // Index)`. `done()` is called exactly once after all tasks have been executed.
+ template <typename DoFnType, typename DoneFnType>
+ static void ParallelForAsync(Index start, Index end, Index granularity, DoFnType&& do_func, DoneFnType&& done,
+ ThreadPool* thread_pool) {
if (start >= end) {
done();
return;
}
- ForkJoinScheduler::RunParallelForAsync(start, end, granularity, do_func, done, thread_pool);
+ thread_pool->Schedule([start, end, granularity, thread_pool, do_func = std::forward<DoFnType>(do_func),
+ done = std::forward<DoneFnType>(done)]() {
+ RunParallelFor(start, end, granularity, do_func, thread_pool);
+ done();
+ });
}
// Synchronous variant of ParallelForAsync.
template <typename DoFnType>
- static void ParallelFor(int start, int end, int granularity, DoFnType do_func, Eigen::ThreadPool* thread_pool) {
+ static void ParallelFor(Index start, Index end, Index granularity, DoFnType&& do_func, ThreadPool* thread_pool) {
if (start >= end) return;
- auto dummy_done = []() {};
Barrier barrier(1);
- thread_pool->Schedule([start, end, granularity, thread_pool, &do_func, &dummy_done, &barrier]() {
- ForkJoinScheduler::ParallelForAsync(start, end, granularity, do_func, dummy_done, thread_pool);
- barrier.Notify();
- });
+ auto done = [&barrier]() { barrier.Notify(); };
+ ParallelForAsync(start, end, granularity, do_func, done, thread_pool);
barrier.Wait();
}
private:
// Schedules `right_thunk`, runs `left_thunk`, and runs other tasks until `right_thunk` has finished.
template <typename LeftType, typename RightType>
- static void ForkJoin(LeftType&& left_thunk, RightType&& right_thunk, Eigen::ThreadPool* thread_pool) {
+ static void ForkJoin(LeftType&& left_thunk, RightType&& right_thunk, ThreadPool* thread_pool) {
std::atomic<bool> right_done(false);
auto execute_right = [&right_thunk, &right_done]() {
std::forward<RightType>(right_thunk)();
@@ -97,47 +96,38 @@
};
thread_pool->Schedule(execute_right);
std::forward<LeftType>(left_thunk)();
- Eigen::ThreadPool::Task task;
+ ThreadPool::Task task;
while (!right_done.load(std::memory_order_acquire)) {
thread_pool->MaybeGetTask(&task);
if (task.f) task.f();
}
}
- // Runs `do_func` in parallel for the range [start, end). The main recursive asynchronous runner that
- // calls `ForkJoin`.
- static void RunParallelForAsync(int start, int end, int granularity, std::function<void(int)>& do_func,
- std::function<void()>& done, Eigen::ThreadPool* thread_pool) {
- std::function<void(int, int)> wrapped_do_func = [&do_func](int start, int end) {
- for (int i = start; i < end; ++i) do_func(i);
- };
- ForkJoinScheduler::RunParallelForAsync(start, end, granularity, wrapped_do_func, done, thread_pool);
+ static Index ComputeMidpoint(Index start, Index end, Index granularity) {
+ // Typical workloads choose initial values of `{start, end, granularity}` such that `start - end` and
+ // `granularity` are powers of two. Since modern processors usually implement (2^x)-way
+ // set-associative caches, we minimize the number of cache misses by choosing midpoints that are not
+ // powers of two (to avoid having two addresses in the main memory pointing to the same point in the
+ // cache). More specifically, we choose the midpoint at (roughly) the 9/16 mark.
+ const Index size = end - start;
+ const Index offset = numext::round_down(9 * (size + 1) / 16, granularity);
+ return start + offset;
}
- // Variant of `RunAsyncParallelFor` that uses a do function that operates on an index range.
- // Specifically, `do_func` takes two arguments: the start and end of the range.
- static void RunParallelForAsync(int start, int end, int granularity, std::function<void(int, int)>& do_func,
- std::function<void()>& done, Eigen::ThreadPool* thread_pool) {
- if ((end - start) <= granularity) {
+ template <typename DoFnType>
+ static void RunParallelFor(Index start, Index end, Index granularity, DoFnType&& do_func, ThreadPool* thread_pool) {
+ Index mid = ComputeMidpoint(start, end, granularity);
+ if ((end - start) < granularity || mid == start || mid == end) {
do_func(start, end);
- for (int j = 0; j < end - start; ++j) done();
- } else {
- // Typical workloads choose initial values of `{start, end, granularity}` such that `start - end` and
- // `granularity` are powers of two. Since modern processors usually implement (2^x)-way
- // set-associative caches, we minimize the number of cache misses by choosing midpoints that are not
- // powers of two (to avoid having two addresses in the main memory pointing to the same point in the
- // cache). More specifically, we choose the midpoint at (roughly) the 9/16 mark.
- const int size = end - start;
- const int mid = start + 9 * (size + 1) / 16;
- ForkJoinScheduler::ForkJoin(
- [start, mid, granularity, &do_func, &done, thread_pool]() {
- RunParallelForAsync(start, mid, granularity, do_func, done, thread_pool);
- },
- [mid, end, granularity, &do_func, &done, thread_pool]() {
- RunParallelForAsync(mid, end, granularity, do_func, done, thread_pool);
- },
- thread_pool);
+ return;
}
+ ForkJoin([start, mid, granularity, &do_func, thread_pool]() {
+ RunParallelFor(start, mid, granularity, do_func, thread_pool);
+ },
+ [mid, end, granularity, &do_func, thread_pool]() {
+ RunParallelFor(mid, end, granularity, do_func, thread_pool);
+ },
+ thread_pool);
}
};
diff --git a/Eigen/src/ThreadPool/NonBlockingThreadPool.h b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
index 11dfae3..4ec1354 100644
--- a/Eigen/src/ThreadPool/NonBlockingThreadPool.h
+++ b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
@@ -156,7 +156,10 @@
// Tries to assign work to the current task.
void MaybeGetTask(Task* t) {
PerThread* pt = GetPerThread();
- Queue& q = thread_data_[pt->thread_id].queue;
+ const int thread_id = pt->thread_id;
+ // If we are not a worker thread of this pool, we can't get any work.
+ if (thread_id < 0) return;
+ Queue& q = thread_data_[thread_id].queue;
*t = q.PopFront();
if (t->f) return;
if (num_threads_ == 1) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fdfde45..3a67ab1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -320,6 +320,7 @@
ei_add_test(threads_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(threads_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(threads_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(threads_fork_join "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
add_executable(bug1213 bug1213.cpp bug1213_main.cpp)
check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH)
diff --git a/test/bicgstab.cpp b/test/bicgstab.cpp
index 7ff2f3d..6d837e5 100644
--- a/test/bicgstab.cpp
+++ b/test/bicgstab.cpp
@@ -52,7 +52,7 @@
// https://gitlab.com/libeigen/eigen/-/issues/2899
void test_2899() {
- Eigen::MatrixXd A(4, 4);
+ Eigen::MatrixXd A = Eigen::MatrixXd::Zero(4, 4);
A(0, 0) = 1;
A(1, 0) = -1.0 / 6;
A(1, 1) = 2.0 / 3;
@@ -64,7 +64,7 @@
A(3, 1) = -1.0 / 3;
A(3, 2) = -1.0 / 3;
A(3, 3) = 2.0 / 3;
- Eigen::VectorXd b(4);
+ Eigen::VectorXd b = Eigen::VectorXd::Zero(4);
b(0) = 0;
b(1) = 1;
b(2) = 1;
diff --git a/test/constexpr.cpp b/test/constexpr.cpp
index 34c728f..ecfda0a 100644
--- a/test/constexpr.cpp
+++ b/test/constexpr.cpp
@@ -10,6 +10,13 @@
#define EIGEN_TESTING_CONSTEXPR
#include "main.h"
+template <typename Scalar, int Rows>
+struct ConstexprTest {
+ constexpr ConstexprTest(const Matrix<Scalar, Rows, Rows>& B) { A = B; }
+
+ Matrix<Scalar, Rows, Rows> A;
+};
+
EIGEN_DECLARE_TEST(constexpr) {
// Clang accepts (some of) this code when using C++14/C++17, but GCC does not like
// the fact that `T array[Size]` inside Eigen::internal::plain_array is not initialized
@@ -33,6 +40,18 @@
VERIFY_IS_EQUAL(vec.size(), 3);
static_assert(vec.coeff(0, 1) == 2);
+ // Check assignment. A wrapper struct is used to avoid copy ellision.
+ constexpr ConstexprTest<double, 2> obj1(Matrix2d({{1, 2}, {3, 4}}));
+ VERIFY_IS_EQUAL(obj1.A.size(), 4);
+ static_assert(obj1.A(0, 0) == 1);
+ static_assert(obj1.A(0) == 1);
+ static_assert(obj1.A.coeff(0, 1) == 2);
+ constexpr ConstexprTest<double, 3> obj2(Matrix3d({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}));
+ VERIFY_IS_EQUAL(obj2.A.size(), 9);
+ static_assert(obj2.A(0, 0) == 1);
+ static_assert(obj2.A(0) == 1);
+ static_assert(obj2.A.coeff(0, 1) == 2);
+
// Also check dynamic size arrays/matrices with fixed-size storage (currently
// only works if all elements are initialized, since otherwise the compiler
// complains about uninitialized trailing elements.
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index cdbaad6..9c5d6cf 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -1635,7 +1635,7 @@
EIGEN_ALIGN_MAX Scalar data1[PacketSize];
RealScalar refvalue = RealScalar(0);
for (int i = 0; i < PacketSize; ++i) {
- data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
+ data1[i] = internal::random<Scalar>();
}
int stride = internal::random<int>(1, 20);
@@ -1655,7 +1655,7 @@
}
for (int i = 0; i < PacketSize * 7; ++i) {
- buffer[i] = internal::random<Scalar>() / RealScalar(PacketSize);
+ buffer[i] = internal::random<Scalar>();
}
packet = internal::pgather<Scalar, Packet>(buffer, 7);
internal::pstore(data1, packet);
@@ -1745,6 +1745,7 @@
CALL_SUBTEST_12(test::runner<std::complex<double>>::run());
CALL_SUBTEST_13(test::runner<half>::run());
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
+ CALL_SUBTEST_14((packetmath_scatter_gather<bool, internal::packet_traits<bool>::type>()));
CALL_SUBTEST_15(test::runner<bfloat16>::run());
g_first_pass = false;
}
diff --git a/test/threads_fork_join.cpp b/test/threads_fork_join.cpp
index 941c317..b852b05 100644
--- a/test/threads_fork_join.cpp
+++ b/test/threads_fork_join.cpp
@@ -12,39 +12,26 @@
#include "Eigen/ThreadPool"
struct TestData {
- ThreadPool tp;
+ std::unique_ptr<ThreadPool> tp;
std::vector<double> data;
};
TestData make_test_data(int num_threads, int num_shards) {
- return {ThreadPool(num_threads), std::vector<double>(num_shards, 1.0)};
+ return {std::make_unique<ThreadPool>(num_threads), std::vector<double>(num_shards, 1.0)};
}
-static void test_unary_parallel_for(int granularity) {
+static void test_parallel_for(int granularity) {
// Test correctness.
const int kNumTasks = 100000;
TestData test_data = make_test_data(/*num_threads=*/4, kNumTasks);
- std::atomic<double> sum = 0.0;
- std::function<void(int)> unary_do_fn = [&](int i) {
- for (double new_sum = sum; !sum.compare_exchange_weak(new_sum, new_sum + test_data.data[i]);) {
- };
- };
- ForkJoinScheduler::ParallelFor(0, kNumTasks, granularity, std::move(unary_do_fn), &test_data.tp);
- VERIFY_IS_EQUAL(sum, kNumTasks);
-}
-
-static void test_binary_parallel_for(int granularity) {
- // Test correctness.
- const int kNumTasks = 100000;
- TestData test_data = make_test_data(/*num_threads=*/4, kNumTasks);
- std::atomic<double> sum = 0.0;
- std::function<void(int, int)> binary_do_fn = [&](int i, int j) {
+ std::atomic<uint64_t> sum(0);
+ std::function<void(Index, Index)> binary_do_fn = [&](Index i, Index j) {
for (int k = i; k < j; ++k)
- for (double new_sum = sum; !sum.compare_exchange_weak(new_sum, new_sum + test_data.data[k]);) {
+ for (uint64_t new_sum = sum; !sum.compare_exchange_weak(new_sum, new_sum + test_data.data[k]);) {
};
};
- ForkJoinScheduler::ParallelFor(0, kNumTasks, granularity, std::move(binary_do_fn), &test_data.tp);
- VERIFY_IS_EQUAL(sum, kNumTasks);
+ ForkJoinScheduler::ParallelFor(0, kNumTasks, granularity, std::move(binary_do_fn), test_data.tp.get());
+ VERIFY_IS_EQUAL(sum.load(), kNumTasks);
}
static void test_async_parallel_for() {
@@ -54,26 +41,26 @@
const int kNumTasks = 100;
const int kNumAsyncCalls = kNumThreads * 4;
TestData test_data = make_test_data(kNumThreads, kNumTasks);
- std::atomic<double> sum = 0.0;
- std::function<void(int)> unary_do_fn = [&](int i) {
- for (double new_sum = sum; !sum.compare_exchange_weak(new_sum, new_sum + test_data.data[i]);) {
- };
+ std::atomic<uint64_t> sum(0);
+ std::function<void(Index, Index)> binary_do_fn = [&](Index i, Index j) {
+ for (Index k = i; k < j; ++k) {
+ for (uint64_t new_sum = sum; !sum.compare_exchange_weak(new_sum, new_sum + test_data.data[i]);) {
+ }
+ }
};
- Barrier barrier(kNumTasks * kNumAsyncCalls);
+ Barrier barrier(kNumAsyncCalls);
std::function<void()> done = [&]() { barrier.Notify(); };
for (int k = 0; k < kNumAsyncCalls; ++k) {
- test_data.tp.Schedule([&]() {
- ForkJoinScheduler::ParallelForAsync(0, kNumTasks, /*granularity=*/1, unary_do_fn, done, &test_data.tp);
+ test_data.tp->Schedule([&]() {
+ ForkJoinScheduler::ParallelForAsync(0, kNumTasks, /*granularity=*/1, binary_do_fn, done, test_data.tp.get());
});
}
barrier.Wait();
- VERIFY_IS_EQUAL(sum, kNumTasks * kNumAsyncCalls);
+ VERIFY_IS_EQUAL(sum.load(), kNumTasks * kNumAsyncCalls);
}
EIGEN_DECLARE_TEST(fork_join) {
- CALL_SUBTEST(test_unary_parallel_for(1));
- CALL_SUBTEST(test_unary_parallel_for(2));
- CALL_SUBTEST(test_binary_parallel_for(1));
- CALL_SUBTEST(test_binary_parallel_for(2));
+ CALL_SUBTEST(test_parallel_for(1));
+ CALL_SUBTEST(test_parallel_for(2));
CALL_SUBTEST(test_async_parallel_for());
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index c9c613a..e9de988 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -50,6 +50,12 @@
}
template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool RandomToTypeUniform<bool>(uint64_t* state, uint64_t stream) {
+ unsigned rnd = PCG_XSH_RS_generator(state, stream);
+ return (rnd & 0x1) != 0;
+}
+
+template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state, uint64_t stream) {
// Generate 10 random bits for the mantissa, merge with exponent.
unsigned rnd = PCG_XSH_RS_generator(state, stream);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
index e12923d..98223fe 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -247,6 +247,8 @@
EIGEN_STRONG_INLINE TensorRef() : Base() {}
+ EIGEN_STRONG_INLINE TensorRef(const TensorRef& other) : Base(other) {}
+
template <typename Expression>
EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {
EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
@@ -254,6 +256,8 @@
"TensorRef<const Expression>?)");
}
+ TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
+
template <typename Expression>
EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
@@ -262,8 +266,6 @@
return Base::operator=(expr).derived();
}
- TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
-
template <typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) {
const std::size_t num_indices = (sizeof...(otherIndices) + 1);
@@ -306,17 +308,17 @@
public:
EIGEN_STRONG_INLINE TensorRef() : Base() {}
+ EIGEN_STRONG_INLINE TensorRef(const TensorRef& other) : Base(other) {}
+
template <typename Expression>
EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {}
+ TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
+
template <typename Expression>
EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
return Base::operator=(expr).derived();
}
-
- TensorRef(const TensorRef& other) : Base(other) {}
-
- TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
};
// evaluator for rvalues