Update Eigen to commit:38b9cc263bbaeb03ce408a4e26084543a6c0dedb
CHANGELOG
=========
38b9cc263 - Fix warnings about repeated deinitions of macros.
f02f89bf2 - Don't redefine EIGEN_DEFAULT_IO_FORMAT in main.h.
9148c47d6 - Vectorize isfinite and isinf.
5a9f66fb3 - Fix Thread tests
c4d84dfdd - Fix compilation failures on constexpr matrices with GCC 14
99adca8b3 - Incorporate Threadpool in Eigen Core
d165c7377 - Format EIGEN_STATIC_ASSERT() as a statement macro
f78dfe36b - use built in alloca with align if available
b9b1c8661 - Suppress C++23 deprecation warnings for std::has_denorm and std::has_denorm_loss
3d2e738f2 - fix performance-no-int-to-ptr
de8013fa6 - Fix ubsan failure in array_for_matrix
5e4f3475b - Remove call to deprecated method initParallel() in SparseDenseProduct.h
59cf0df1d - SparseMatrix::insert add checks for valid indices
c0fe6ce22 - Fixed a clerical error at documentation of class Matrix.
afb17288c - Fix gcc6 compile error.
4d1d14e06 - Change predux on PowerPC for Packet4i to NOT saturate the sum of the elements (like other architectures).
ff174f792 - fix issue: cmake package does not set include path correctly
PiperOrigin-RevId: 638732445
Change-Id: I550ab37d222c05c774ae2959fe6b7a6bbb61e01a
diff --git a/Eigen/Core b/Eigen/Core
index ed7d353..e452e73 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -348,6 +348,7 @@
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
+#include "src/Core/DeviceWrapper.h"
#ifdef EIGEN_GEMM_THREADPOOL
#include "ThreadPool"
#endif
diff --git a/Eigen/ThreadPool b/Eigen/ThreadPool
index f79880a..74da328 100644
--- a/Eigen/ThreadPool
+++ b/Eigen/ThreadPool
@@ -89,6 +89,7 @@
#include "src/ThreadPool/ThreadEnvironment.h"
#include "src/ThreadPool/Barrier.h"
#include "src/ThreadPool/NonBlockingThreadPool.h"
+#include "src/ThreadPool/CoreThreadPoolDevice.h"
// IWYU pragma: end_exports
#include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 5e1cbf6..5da9c57 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -167,7 +167,7 @@
};
template <typename Derived>
-struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> {
+struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
typedef PlainObjectBase<Derived> PlainObjectType;
typedef typename PlainObjectType::Scalar Scalar;
typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
@@ -247,31 +247,29 @@
};
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
- : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
+ : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m)
- : evaluator<PlainObjectBase<XprType> >(m) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
};
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
- : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > {
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>
+ : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>>> {
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m)
- : evaluator<PlainObjectBase<XprType> >(m) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator<PlainObjectBase<XprType>>(m) {}
};
// -------------------- Transpose --------------------
template <typename ArgType>
-struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpose<ArgType> > {
+struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpose<ArgType>> {
typedef Transpose<ArgType> XprType;
enum {
@@ -460,8 +458,8 @@
#endif // MSVC workaround
template <typename NullaryOp, typename PlainObjectType>
-struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType> >
- : evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType> > {
+struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
+ : evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType>> {
typedef CwiseNullaryOp<NullaryOp, PlainObjectType> XprType;
typedef internal::remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
@@ -509,7 +507,7 @@
// -------------------- CwiseUnaryOp --------------------
template <typename UnaryOp, typename ArgType>
-struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> > {
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType>> {
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
enum {
@@ -762,17 +760,17 @@
// this is a ternary expression
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
-struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
- : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
+struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>>
+ : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> {
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
- typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
+ typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
- : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
+ : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>> {
typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
enum {
@@ -865,16 +863,16 @@
// this is a binary expression
template <typename BinaryOp, typename Lhs, typename Rhs>
-struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > {
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> {
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
- typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+ typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
template <typename BinaryOp, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
- : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > {
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs>> {
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
enum {
@@ -939,7 +937,7 @@
template <typename UnaryOp, typename ArgType, typename StrideType>
struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType, StrideType>, IndexBased>
- : evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType> > {
+ : evaluator_base<CwiseUnaryView<UnaryOp, ArgType, StrideType>> {
typedef CwiseUnaryView<UnaryOp, ArgType, StrideType> XprType;
enum {
@@ -1067,7 +1065,7 @@
};
template <typename PlainObjectType, int MapOptions, typename StrideType>
-struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType>>
: public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> {
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
typedef typename XprType::Scalar Scalar;
@@ -1100,13 +1098,13 @@
// -------------------- Ref --------------------
template <typename PlainObjectType, int RefOptions, typename StrideType>
-struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType>>
: public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> {
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
enum {
- Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
- Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType>>::Flags,
+ Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType>>::Alignment
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref)
@@ -1120,7 +1118,7 @@
struct block_evaluator;
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
-struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>>
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> {
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
@@ -1171,7 +1169,7 @@
// no direct-access => dispatch to a unary evaluator
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
- : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > {
+ : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>> {
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block)
@@ -1180,7 +1178,7 @@
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
- : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > {
+ : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel>> {
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block)
@@ -1293,8 +1291,8 @@
// TODO enable vectorization for Select
template <typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
- : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > {
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType>>
+ : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType>> {
typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
enum {
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost +
@@ -1335,8 +1333,8 @@
// -------------------- Replicate --------------------
template <typename ArgType, int RowFactor, int ColFactor>
-struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
- : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> > {
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
+ : evaluator_base<Replicate<ArgType, RowFactor, ColFactor>> {
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
typedef typename XprType::CoeffReturnType CoeffReturnType;
enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor };
@@ -1461,19 +1459,19 @@
};
template <typename TArgType>
-struct unary_evaluator<MatrixWrapper<TArgType> > : evaluator_wrapper_base<MatrixWrapper<TArgType> > {
+struct unary_evaluator<MatrixWrapper<TArgType>> : evaluator_wrapper_base<MatrixWrapper<TArgType>> {
typedef MatrixWrapper<TArgType> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper)
- : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) {}
+ : evaluator_wrapper_base<MatrixWrapper<TArgType>>(wrapper.nestedExpression()) {}
};
template <typename TArgType>
-struct unary_evaluator<ArrayWrapper<TArgType> > : evaluator_wrapper_base<ArrayWrapper<TArgType> > {
+struct unary_evaluator<ArrayWrapper<TArgType>> : evaluator_wrapper_base<ArrayWrapper<TArgType>> {
typedef ArrayWrapper<TArgType> XprType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper)
- : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) {}
+ : evaluator_wrapper_base<ArrayWrapper<TArgType>>(wrapper.nestedExpression()) {}
};
// -------------------- Reverse --------------------
@@ -1483,7 +1481,7 @@
struct reverse_packet_cond;
template <typename ArgType, int Direction>
-struct unary_evaluator<Reverse<ArgType, Direction> > : evaluator_base<Reverse<ArgType, Direction> > {
+struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<ArgType, Direction>> {
typedef Reverse<ArgType, Direction> XprType;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -1584,7 +1582,7 @@
// -------------------- Diagonal --------------------
template <typename ArgType, int DiagIndex>
-struct evaluator<Diagonal<ArgType, DiagIndex> > : evaluator_base<Diagonal<ArgType, DiagIndex> > {
+struct evaluator<Diagonal<ArgType, DiagIndex>> : evaluator_base<Diagonal<ArgType, DiagIndex>> {
typedef Diagonal<ArgType, DiagIndex> XprType;
enum {
@@ -1643,10 +1641,10 @@
class EvalToTemp;
template <typename ArgType>
-struct traits<EvalToTemp<ArgType> > : public traits<ArgType> {};
+struct traits<EvalToTemp<ArgType>> : public traits<ArgType> {};
template <typename ArgType>
-class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType> >::type {
+class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType>>::type {
public:
typedef typename dense_xpr_base<EvalToTemp>::type Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
@@ -1664,7 +1662,7 @@
};
template <typename ArgType>
-struct evaluator<EvalToTemp<ArgType> > : public evaluator<typename ArgType::PlainObject> {
+struct evaluator<EvalToTemp<ArgType>> : public evaluator<typename ArgType::PlainObject> {
typedef EvalToTemp<ArgType> XprType;
typedef typename ArgType::PlainObject PlainObject;
typedef evaluator<PlainObject> Base;
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 5ab54ef..8c8ca73 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -621,17 +621,19 @@
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
/** Default constructor. Do nothing. */
+#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_DEVICE_FUNC constexpr DenseBase() {
/* Just checks for self-consistency of the flags.
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
*/
-#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_STATIC_ASSERT(
(internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, int(IsRowMajor)) &&
internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, int(!IsRowMajor))),
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
-#endif
}
+#else
+ EIGEN_DEVICE_FUNC constexpr DenseBase() = default;
+#endif
private:
EIGEN_DEVICE_FUNC explicit DenseBase(int);
diff --git a/Eigen/src/Core/DeviceWrapper.h b/Eigen/src/Core/DeviceWrapper.h
new file mode 100644
index 0000000..9fdbe60
--- /dev/null
+++ b/Eigen/src/Core/DeviceWrapper.h
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DEVICEWRAPPER_H
+#define EIGEN_DEVICEWRAPPER_H
+
+namespace Eigen {
+template <typename Derived, typename Device>
+struct DeviceWrapper {
+ using Base = EigenBase<internal::remove_all_t<Derived>>;
+ using Scalar = typename Derived::Scalar;
+
+ EIGEN_DEVICE_FUNC DeviceWrapper(Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {}
+ EIGEN_DEVICE_FUNC DeviceWrapper(const Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {}
+
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived>& other) {
+ using AssignOp = internal::assign_op<Scalar, typename OtherDerived::Scalar>;
+ internal::call_assignment(*this, other.derived(), AssignOp());
+ return m_xpr;
+ }
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const EigenBase<OtherDerived>& other) {
+ using AddAssignOp = internal::add_assign_op<Scalar, typename OtherDerived::Scalar>;
+ internal::call_assignment(*this, other.derived(), AddAssignOp());
+ return m_xpr;
+ }
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const EigenBase<OtherDerived>& other) {
+ using SubAssignOp = internal::sub_assign_op<Scalar, typename OtherDerived::Scalar>;
+ internal::call_assignment(*this, other.derived(), SubAssignOp());
+ return m_xpr;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& derived() { return m_xpr; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Device& device() { return m_device; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NoAlias<DeviceWrapper, EigenBase> noalias() {
+ return NoAlias<DeviceWrapper, EigenBase>(*this);
+ }
+
+ Derived& m_xpr;
+ Device& m_device;
+};
+
+namespace internal {
+
+// this is where we differentiate between lazy assignment and specialized kernels (e.g. matrix products)
+template <typename DstXprType, typename SrcXprType, typename Functor, typename Device,
+ typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
+ typename evaluator_traits<SrcXprType>::Shape>::Kind,
+ typename EnableIf = void>
+struct AssignmentWithDevice;
+
+// unless otherwise specified, use the default product implementation
+template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Functor, typename Device,
+ typename Weak>
+struct AssignmentWithDevice<DstXprType, Product<Lhs, Rhs, Options>, Functor, Device, Dense2Dense, Weak> {
+ using SrcXprType = Product<Lhs, Rhs, Options>;
+ using Base = Assignment<DstXprType, SrcXprType, Functor>;
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func,
+ Device&) {
+ Base::run(dst, src, func);
+ };
+};
+
+// specialization for coeffcient-wise assignment
+template <typename DstXprType, typename SrcXprType, typename Functor, typename Device, typename Weak>
+struct AssignmentWithDevice<DstXprType, SrcXprType, Functor, Device, Dense2Dense, Weak> {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func,
+ Device& device) {
+#ifndef EIGEN_NO_DEBUG
+ internal::check_for_aliasing(dst, src);
+#endif
+
+ call_dense_assignment_loop(dst, src, func, device);
+ }
+};
+
+// this allows us to use the default evaulation scheme if it is not specialized for the device
+template <typename Kernel, typename Device, int Traversal = Kernel::AssignmentTraits::Traversal,
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop_with_device {
+ using Base = dense_assignment_loop<Kernel, Traversal, Unrolling>;
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Device&) { Base::run(kernel); }
+};
+
+// entry point for a generic expression with device
+template <typename Dst, typename Src, typename Func, typename Device>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(DeviceWrapper<Dst, Device> dst,
+ const Src& src, const Func& func) {
+ enum {
+ NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
+ (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
+ int(Dst::SizeAtCompileTime) != 1
+ };
+
+ using ActualDstTypeCleaned = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst>;
+ using ActualDstType = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&>;
+ ActualDstType actualDst(dst.derived());
+
+ // TODO check whether this is the right place to perform these checks:
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
+
+ // this provides a mechanism for specializing simple assignments, matrix products, etc
+ AssignmentWithDevice<ActualDstTypeCleaned, Src, Func, Device>::run(actualDst, src, func, dst.device());
+}
+
+// copy and pasted from AssignEvaluator except forward device to kernel
+template <typename DstXprType, typename SrcXprType, typename Functor, typename Device>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst,
+ const SrcXprType& src,
+ const Functor& func,
+ Device& device) {
+ using DstEvaluatorType = evaluator<DstXprType>;
+ using SrcEvaluatorType = evaluator<SrcXprType>;
+
+ SrcEvaluatorType srcEvaluator(src);
+
+ // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
+ // we need to resize the destination after the source evaluator has been created.
+ resize_if_allowed(dst, src, func);
+
+ DstEvaluatorType dstEvaluator(dst);
+
+ using Kernel = generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor>;
+
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+ dense_assignment_loop_with_device<Kernel, Device>::run(kernel, device);
+}
+
+} // namespace internal
+
+template <typename Derived>
+template <typename Device>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<Derived, Device> EigenBase<Derived>::device(Device& device) {
+ return DeviceWrapper<Derived, Device>(derived(), device);
+}
+
+template <typename Derived>
+template <typename Device>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<const Derived, Device> EigenBase<Derived>::device(
+ Device& device) const {
+ return DeviceWrapper<const Derived, Device>(derived(), device);
+}
+} // namespace Eigen
+#endif
\ No newline at end of file
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index f485016..a2d6ee2 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -104,6 +104,11 @@
// derived class can reimplement it in a more optimized way.
dst = this->derived() * dst;
}
+
+ template <typename Device>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<Derived, Device> device(Device& device);
+ template <typename Device>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<const Derived, Device> device(Device& device) const;
};
/***************************************************************************
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 1220073..b7ae44f 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -243,17 +243,15 @@
template <typename Scalar, int Size, int MaxSize>
struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
- internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax>
- m_data;
+ internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
- internal::plain_array<
- Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0>
- m_data;
+ internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
- return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES);
+ return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
+ EIGEN_MAX_ALIGN_BYTES);
}
#endif
};
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 8a07d50..e1347b9 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -579,12 +579,6 @@
return pand(a, pnot(b));
}
-/** \internal \returns isnan(a) */
-template <typename Packet>
-EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
- return pandnot(ptrue(a), pcmp_eq(a, a));
-}
-
// In the general case, use bitwise select.
template <typename Packet, typename EnableIf = void>
struct pselect_impl {
@@ -1002,6 +996,20 @@
* Special math functions
***************************/
+/** \internal \returns isnan(a) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
+ return pandnot(ptrue(a), pcmp_eq(a, a));
+}
+
+/** \internal \returns isinf(a) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
+ using Scalar = typename unpacket_traits<Packet>::type;
+ constexpr Scalar inf = NumTraits<Scalar>::infinity();
+ return pcmp_eq(pabs(a), pset1<Packet>(inf));
+}
+
/** \internal \returns the sine of \a a (coeff-wise) */
template <typename Packet>
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index d42fc93..feb9099 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -1249,20 +1249,40 @@
// T is assumed to be an integer type with a>=0, and b>0
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T div_ceil(T a, T b) {
+ using UnsignedT = typename internal::make_unsigned<T>::type;
EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
eigen_assert(a >= 0);
eigen_assert(b > 0);
+ // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
+ const UnsignedT ua = UnsignedT(a);
+ const UnsignedT ub = UnsignedT(b);
// Note: This form is used because it cannot overflow.
- return a == 0 ? 0 : (a - 1) / b + 1;
+ return ua == 0 ? 0 : (ua - 1) / ub + 1;
+}
+
+// Integer round down to nearest power of b
+// T is assumed to be an integer type with a>=0, and b>0
+template <typename T, typename U>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T round_down(T a, U b) {
+ using UnsignedT = typename internal::make_unsigned<T>::type;
+ using UnsignedU = typename internal::make_unsigned<U>::type;
+ EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+ EIGEN_STATIC_ASSERT((NumTraits<U>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+ eigen_assert(a >= 0);
+ eigen_assert(b > 0);
+ // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
+ const UnsignedT ua = UnsignedT(a);
+ const UnsignedU ub = UnsignedU(b);
+ return ub * (ua / ub);
}
/** Log base 2 for 32 bits positive integers.
* Conveniently returns 0 for x==0. */
-inline int log2(int x) {
+EIGEN_CONSTEXPR inline int log2(int x) {
eigen_assert(x >= 0);
unsigned int v(x);
- static const int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
- 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
+ constexpr int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index af6afaf..0d8691e 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -125,7 +125,7 @@
* coefficients.</dd>
*
* <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
- * <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates
+ * <dd>Fixed-size means that the numbers of rows and columns are known at compile-time. In this case, Eigen allocates
* the array of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices,
* typically up to 4x4, sometimes up to 16x16. Larger matrices should be declared as dynamic-size even if one happens to
* know their size at compile-time.
@@ -139,7 +139,7 @@
* <dt><b>\anchor maxrows MaxRows_ and MaxCols_:</b></dt>
* <dd>In most cases, one just leaves these parameters to the default values.
* These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
- * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they
+ * when the exact numbers of rows and columns are not known at compile-time, but it is known at compile-time that they
* cannot exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case
* MaxRows_ and MaxCols_ are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.</dd>
* </dl>
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index c01c627..11d7ad1 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -80,15 +80,12 @@
template <typename OriginalMatrixType>
EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
- : m_matrix(matrix),
- m_rowFactor(rowFactor),
- m_colFactor(colFactor){
- EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
- THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)}
-
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const {
- return m_matrix.rows() * m_rowFactor.value();
+ : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) {
+ EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
}
+
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
EIGEN_DEVICE_FUNC const MatrixTypeNested_& nestedExpression() const { return m_matrix; }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 4c92e05..6c472cd 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -2437,13 +2437,11 @@
template <>
EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
- Packet4i sum;
- sum = vec_sums(a, p4i_ZERO);
-#ifdef _BIG_ENDIAN
- sum = vec_sld(sum, p4i_ZERO, 12);
-#else
- sum = vec_sld(p4i_ZERO, sum, 4);
-#endif
+ Packet4i b, sum;
+ b = vec_sld(a, a, 8);
+ sum = a + b;
+ b = vec_sld(sum, sum, 4);
+ sum += b;
return pfirst(sum);
}
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index f31c6ce..9e79a39 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -139,8 +139,15 @@
static EIGEN_CONSTEXPR const bool has_infinity = true;
static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
+#if __cplusplus >= 202302L
+ EIGEN_DIAGNOSTICS(push)
+ EIGEN_DISABLE_DEPRECATED_WARNING
+#endif
static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
+#if __cplusplus >= 202302L
+ EIGEN_DIAGNOSTICS(pop)
+#endif
static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
static EIGEN_CONSTEXPR const bool is_iec559 = true;
// The C++ standard defines this as "true if the set of values representable
@@ -187,10 +194,17 @@
EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_quiet_NaN;
template <typename T>
EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_signaling_NaN;
+#if __cplusplus >= 202302L
+EIGEN_DIAGNOSTICS(push)
+EIGEN_DISABLE_DEPRECATED_WARNING
+#endif
template <typename T>
EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_bfloat16_impl<T>::has_denorm;
template <typename T>
EIGEN_CONSTEXPR const bool numeric_limits_bfloat16_impl<T>::has_denorm_loss;
+#if __cplusplus >= 202302L
+EIGEN_DIAGNOSTICS(pop)
+#endif
template <typename T>
EIGEN_CONSTEXPR const std::float_round_style numeric_limits_bfloat16_impl<T>::round_style;
template <typename T>
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 9c195c1..7754e8f 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -208,8 +208,15 @@
static EIGEN_CONSTEXPR const bool has_infinity = true;
static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
+#if __cplusplus >= 202302L
+ EIGEN_DIAGNOSTICS(push)
+ EIGEN_DISABLE_DEPRECATED_WARNING
+#endif
static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
+#if __cplusplus >= 202302L
+ EIGEN_DIAGNOSTICS(pop)
+#endif
static EIGEN_CONSTEXPR const std::float_round_style round_style = std::round_to_nearest;
static EIGEN_CONSTEXPR const bool is_iec559 = true;
// The C++ standard defines this as "true if the set of values representable
@@ -256,10 +263,17 @@
EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_quiet_NaN;
template <typename T>
EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_signaling_NaN;
+#if __cplusplus >= 202302L
+EIGEN_DIAGNOSTICS(push)
+EIGEN_DISABLE_DEPRECATED_WARNING
+#endif
template <typename T>
EIGEN_CONSTEXPR const std::float_denorm_style numeric_limits_half_impl<T>::has_denorm;
template <typename T>
EIGEN_CONSTEXPR const bool numeric_limits_half_impl<T>::has_denorm_loss;
+#if __cplusplus >= 202302L
+EIGEN_DIAGNOSTICS(pop)
+#endif
template <typename T>
EIGEN_CONSTEXPR const std::float_round_style numeric_limits_half_impl<T>::round_style;
template <typename T>
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index c1bbc7c..5059a54 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -989,10 +989,9 @@
* \brief Template functor to check whether a scalar is +/-inf
* \sa class CwiseUnaryOp, ArrayBase::isinf()
*/
-template <typename Scalar>
+template <typename Scalar, bool UseTypedPredicate = false>
struct scalar_isinf_op {
- typedef bool result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator()(const Scalar& a) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a) const {
#if defined(SYCL_DEVICE_ONLY)
return numext::isinf(a);
#else
@@ -1000,19 +999,33 @@
#endif
}
};
+
template <typename Scalar>
-struct functor_traits<scalar_isinf_op<Scalar>> {
- enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false };
+struct scalar_isinf_op<Scalar, true> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const {
+#if defined(SYCL_DEVICE_ONLY)
+ return (numext::isinf(a) ? ptrue(a) : pzero(a));
+#else
+ return (numext::isinf EIGEN_NOT_A_MACRO(a) ? ptrue(a) : pzero(a));
+#endif
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+ return pisinf(a);
+ }
+};
+template <typename Scalar, bool UseTypedPredicate>
+struct functor_traits<scalar_isinf_op<Scalar, UseTypedPredicate>> {
+ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasCmp && UseTypedPredicate };
};
/** \internal
* \brief Template functor to check whether a scalar has a finite value
* \sa class CwiseUnaryOp, ArrayBase::isfinite()
*/
-template <typename Scalar>
+template <typename Scalar, bool UseTypedPredicate = false>
struct scalar_isfinite_op {
- typedef bool result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator()(const Scalar& a) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a) const {
#if defined(SYCL_DEVICE_ONLY)
return numext::isfinite(a);
#else
@@ -1020,9 +1033,25 @@
#endif
}
};
+
template <typename Scalar>
-struct functor_traits<scalar_isfinite_op<Scalar>> {
- enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false };
+struct scalar_isfinite_op<Scalar, true> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const {
+#if defined(SYCL_DEVICE_ONLY)
+ return (numext::isfinite(a) ? ptrue(a) : pzero(a));
+#else
+ return (numext::isfinite EIGEN_NOT_A_MACRO(a) ? ptrue(a) : pzero(a));
+#endif
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
+ constexpr Scalar inf = NumTraits<Scalar>::infinity();
+ return pcmp_lt(pabs(a), pset1<Packet>(inf));
+ }
+};
+template <typename Scalar, bool UseTypedPredicate>
+struct functor_traits<scalar_isfinite_op<Scalar, UseTypedPredicate>> {
+ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasCmp && UseTypedPredicate };
};
/** \internal
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index 2122af9..8244758 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -57,7 +57,7 @@
Index rs = size - k - 1; // remaining size
Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
- Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1)/conj(tri(i,i)));
+ Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1) / conj(tri(i, i)));
for (Index j = 0; j < otherSize; ++j) {
if (TriStorageOrder == RowMajor) {
Scalar b(0);
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 2f2ba9b..cf25359 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -502,6 +502,9 @@
};
} // namespace internal
+template <typename XprType, typename Device>
+struct DeviceWrapper;
+
} // end namespace Eigen
#endif // EIGEN_FORWARDDECLARATIONS_H
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 6253454..7edd0a1 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -762,12 +762,22 @@
#ifdef EIGEN_ALLOCA
#if EIGEN_DEFAULT_ALIGN_BYTES > 0
- // We always manually re-align the result of EIGEN_ALLOCA.
+// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
-#define EIGEN_ALIGNED_ALLOCA(SIZE) \
- reinterpret_cast<void*>( \
- (std::uintptr_t(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1)) + EIGEN_DEFAULT_ALIGN_BYTES - 1) & \
- ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES - 1)))
+
+#if (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG)
+#define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES)
+#else
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) {
+ constexpr std::uintptr_t mask = EIGEN_DEFAULT_ALIGN_BYTES - 1;
+ std::uintptr_t ptr_int = std::uintptr_t(ptr);
+ std::uintptr_t aligned_ptr_int = (ptr_int + mask) & ~mask;
+ std::uintptr_t offset = aligned_ptr_int - ptr_int;
+ return static_cast<void*>(static_cast<uint8_t*>(ptr) + offset);
+}
+#define EIGEN_ALIGNED_ALLOCA(SIZE) eigen_aligned_alloca_helper(EIGEN_ALLOCA(SIZE + EIGEN_DEFAULT_ALIGN_BYTES - 1))
+#endif
+
#else
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
#endif
diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h
index 9668f1e..dc204af 100644
--- a/Eigen/src/Core/util/SymbolicIndex.h
+++ b/Eigen/src/Core/util/SymbolicIndex.h
@@ -96,17 +96,17 @@
return AddExpr<Derived, ValueExpr<>>(derived(), -a);
}
constexpr ProductExpr<Derived, ValueExpr<>> operator*(Index a) const {
- return ProductExpr<Derived, ValueExpr<> >(derived(), a);
+ return ProductExpr<Derived, ValueExpr<>>(derived(), a);
}
constexpr QuotientExpr<Derived, ValueExpr<>> operator/(Index a) const {
- return QuotientExpr<Derived, ValueExpr<> >(derived(), a);
+ return QuotientExpr<Derived, ValueExpr<>>(derived(), a);
}
friend constexpr AddExpr<Derived, ValueExpr<>> operator+(Index a, const BaseExpr& b) {
- return AddExpr<Derived, ValueExpr<> >(b.derived(), a);
+ return AddExpr<Derived, ValueExpr<>>(b.derived(), a);
}
friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<>> operator-(Index a, const BaseExpr& b) {
- return AddExpr<NegateExpr<Derived>, ValueExpr<> >(-b.derived(), a);
+ return AddExpr<NegateExpr<Derived>, ValueExpr<>>(-b.derived(), a);
}
friend constexpr ProductExpr<ValueExpr<>, Derived> operator*(Index a, const BaseExpr& b) {
return ProductExpr<ValueExpr<>, Derived>(a, b.derived());
@@ -117,41 +117,41 @@
template <int N>
constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>) const {
- return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
+ return AddExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
}
template <int N>
constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>> operator-(internal::FixedInt<N>) const {
- return AddExpr<Derived, ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >());
+ return AddExpr<Derived, ValueExpr<internal::FixedInt<-N>>>(derived(), ValueExpr<internal::FixedInt<-N>>());
}
template <int N>
constexpr ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator*(internal::FixedInt<N>) const {
- return ProductExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
+ return ProductExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
}
template <int N>
constexpr QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator/(internal::FixedInt<N>) const {
- return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >());
+ return QuotientExpr<Derived, ValueExpr<internal::FixedInt<N>>>(derived(), ValueExpr<internal::FixedInt<N>>());
}
template <int N>
friend constexpr AddExpr<Derived, ValueExpr<internal::FixedInt<N>>> operator+(internal::FixedInt<N>,
const BaseExpr& b) {
- return AddExpr<Derived, ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >());
+ return AddExpr<Derived, ValueExpr<internal::FixedInt<N>>>(b.derived(), ValueExpr<internal::FixedInt<N>>());
}
template <int N>
friend constexpr AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>> operator-(internal::FixedInt<N>,
const BaseExpr& b) {
- return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N> > >(-b.derived(),
- ValueExpr<internal::FixedInt<N> >());
+ return AddExpr<NegateExpr<Derived>, ValueExpr<internal::FixedInt<N>>>(-b.derived(),
+ ValueExpr<internal::FixedInt<N>>());
}
template <int N>
friend constexpr ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator*(internal::FixedInt<N>,
const BaseExpr& b) {
- return ProductExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
+ return ProductExpr<ValueExpr<internal::FixedInt<N>>, Derived>(ValueExpr<internal::FixedInt<N>>(), b.derived());
}
template <int N>
friend constexpr QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived> operator/(internal::FixedInt<N>,
const BaseExpr& b) {
- return QuotientExpr<ValueExpr<internal::FixedInt<N> >, Derived>(ValueExpr<internal::FixedInt<N> >(), b.derived());
+ return QuotientExpr<ValueExpr<internal::FixedInt<N>>, Derived>(ValueExpr<internal::FixedInt<N>>(), b.derived());
}
template <typename OtherDerived>
@@ -161,7 +161,7 @@
template <typename OtherDerived>
constexpr AddExpr<Derived, NegateExpr<OtherDerived>> operator-(const BaseExpr<OtherDerived>& b) const {
- return AddExpr<Derived, NegateExpr<OtherDerived> >(derived(), -b.derived());
+ return AddExpr<Derived, NegateExpr<OtherDerived>>(derived(), -b.derived());
}
template <typename OtherDerived>
@@ -179,7 +179,7 @@
struct is_symbolic {
// BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class
// BaseExpr<T>.
- enum { value = internal::is_convertible<T, BaseExpr<T> >::value };
+ enum { value = internal::is_convertible<T, BaseExpr<T>>::value };
};
// A simple wrapper around an integral value to provide the eval method.
@@ -317,7 +317,7 @@
/** Expression of a symbol uniquely identified by the template parameter type \c tag */
template <typename tag>
-class SymbolExpr : public BaseExpr<SymbolExpr<tag> > {
+class SymbolExpr : public BaseExpr<SymbolExpr<tag>> {
public:
/** Alias to the template parameter \c tag */
typedef tag Tag;
@@ -349,7 +349,7 @@
};
template <typename Arg0>
-class NegateExpr : public BaseExpr<NegateExpr<Arg0> > {
+class NegateExpr : public BaseExpr<NegateExpr<Arg0>> {
public:
constexpr NegateExpr() = default;
constexpr NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
@@ -370,7 +370,7 @@
};
template <typename Arg0, typename Arg1>
-class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1> > {
+class AddExpr : public BaseExpr<AddExpr<Arg0, Arg1>> {
public:
constexpr AddExpr() = default;
constexpr AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
@@ -393,7 +393,7 @@
};
template <typename Arg0, typename Arg1>
-class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1> > {
+class ProductExpr : public BaseExpr<ProductExpr<Arg0, Arg1>> {
public:
constexpr ProductExpr() = default;
constexpr ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
@@ -416,7 +416,7 @@
};
template <typename Arg0, typename Arg1>
-class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1> > {
+class QuotientExpr : public BaseExpr<QuotientExpr<Arg0, Arg1>> {
public:
constexpr QuotientExpr() = default;
constexpr QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index a6a7d3f..cecbee8 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -206,6 +206,64 @@
enum { Cost = 10, PacketAccess = false, IsRepeatable = false };
};
+// estimates the cost of lazily evaluating a generic functor by unwinding the expression
+template <typename Xpr>
+struct nested_functor_cost {
+ static constexpr Index Cost = static_cast<Index>(functor_traits<Xpr>::Cost);
+};
+
+template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct nested_functor_cost<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
+ static constexpr Index Cost = 1;
+};
+
+template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct nested_functor_cost<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
+ static constexpr Index Cost = 1;
+};
+
+// TODO: assign a cost to the stride type?
+template <typename PlainObjectType, int MapOptions, typename StrideType>
+struct nested_functor_cost<Map<PlainObjectType, MapOptions, StrideType>> : nested_functor_cost<PlainObjectType> {};
+
+template <typename Func, typename Xpr>
+struct nested_functor_cost<CwiseUnaryOp<Func, Xpr>> {
+ using XprCleaned = remove_all_t<Xpr>;
+ using FuncCleaned = remove_all_t<Func>;
+ static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<XprCleaned>::Cost;
+};
+
+template <typename Func, typename Xpr>
+struct nested_functor_cost<CwiseNullaryOp<Func, Xpr>> {
+ using XprCleaned = remove_all_t<Xpr>;
+ using FuncCleaned = remove_all_t<Func>;
+ static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<XprCleaned>::Cost;
+};
+
+template <typename Func, typename LhsXpr, typename RhsXpr>
+struct nested_functor_cost<CwiseBinaryOp<Func, LhsXpr, RhsXpr>> {
+ using LhsXprCleaned = remove_all_t<LhsXpr>;
+ using RhsXprCleaned = remove_all_t<RhsXpr>;
+ using FuncCleaned = remove_all_t<Func>;
+ static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<LhsXprCleaned>::Cost +
+ nested_functor_cost<RhsXprCleaned>::Cost;
+};
+
+template <typename Func, typename LhsXpr, typename MidXpr, typename RhsXpr>
+struct nested_functor_cost<CwiseTernaryOp<Func, LhsXpr, MidXpr, RhsXpr>> {
+ using LhsXprCleaned = remove_all_t<LhsXpr>;
+ using MidXprCleaned = remove_all_t<MidXpr>;
+ using RhsXprCleaned = remove_all_t<RhsXpr>;
+ using FuncCleaned = remove_all_t<Func>;
+ static constexpr Index Cost = nested_functor_cost<FuncCleaned>::Cost + nested_functor_cost<LhsXprCleaned>::Cost +
+ nested_functor_cost<MidXprCleaned>::Cost + nested_functor_cost<RhsXprCleaned>::Cost;
+};
+
+template <typename Xpr>
+struct functor_cost {
+ static constexpr Index Cost = plain_enum_max(nested_functor_cost<Xpr>::Cost, 1);
+};
+
template <typename T>
struct packet_traits;
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index f80ddc0..52f3564 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -1233,7 +1233,7 @@
using std::conj;
using std::pow;
using std::sqrt;
-
+
RealScalar s = m_computed(firstColm + i, firstColm);
RealScalar c = m_computed(firstColm + j, firstColm);
RealScalar r = numext::hypot(c, s);
@@ -1424,8 +1424,7 @@
if ((diag(i) - diag(i - 1)) < epsilon_strict) {
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i - 1)
- << " == " << (diag(i) - diag(i - 1)) << " < "
- << epsilon_strict << "\n";
+ << " == " << (diag(i) - diag(i - 1)) << " < " << epsilon_strict << "\n";
#endif
eigen_internal_assert(abs(diag(i) - diag(i - 1)) < epsilon_coarse &&
" diagonal entries are not properly sorted");
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index db70810..17ce596 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -45,7 +45,6 @@
Index n = lhs.outerSize();
#ifdef EIGEN_HAS_OPENMP
- Eigen::initParallel();
Index threads = Eigen::nbThreads();
#endif
@@ -125,7 +124,6 @@
LhsEval lhsEval(lhs);
#ifdef EIGEN_HAS_OPENMP
- Eigen::initParallel();
Index threads = Eigen::nbThreads();
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
// It basically represents the minimal amount of work to be done to be worth it.
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 849970a..fd92ab0 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -563,6 +563,8 @@
/** \internal
* same as insert(Index,Index) except that the indices are given relative to the storage order */
Scalar& insertByOuterInner(Index j, Index i) {
+ eigen_assert(j >= 0 && j < m_outerSize && "invalid outer index");
+ eigen_assert(i >= 0 && i < m_innerSize && "invalid inner index");
Index start = m_outerIndex[j];
Index end = isCompressed() ? m_outerIndex[j + 1] : start + m_innerNonZeros[j];
Index dst = start == end ? end : m_data.searchLowerIndex(start, end, i);
diff --git a/Eigen/src/ThreadPool/CoreThreadPoolDevice.h b/Eigen/src/ThreadPool/CoreThreadPoolDevice.h
new file mode 100644
index 0000000..acf1d62
--- /dev/null
+++ b/Eigen/src/ThreadPool/CoreThreadPoolDevice.h
@@ -0,0 +1,327 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CORE_THREAD_POOL_DEVICE_H
+#define EIGEN_CORE_THREAD_POOL_DEVICE_H
+
+namespace Eigen {
+
+// CoreThreadPoolDevice provides an easy-to-understand Device for parallelizing Eigen Core expressions with
+// Threadpool. Expressions are recursively split evenly until the evaluation cost is less than the threshold for
+// delegating the task to a thread.
+/*
+ a
+ / \
+ / \
+ / \
+ / \
+ / \
+ / \
+ / \
+ a e
+ / \ / \
+ / \ / \
+ / \ / \
+ a c e g
+ / \ / \ / \ / \
+ / \ / \ / \ / \
+ a b c d e f g h
+*/
+// Each task descends the binary tree to the left, delegates the right task to a new thread, and continues to the
+// left. This ensures that work is evenly distributed to the thread pool as quickly as possible and minimizes the number
+// of tasks created during the evaluation. Consider an expression that is divided into 8 chunks. The
+// primary task 'a' creates tasks 'e' 'c' and 'b', and executes its portion of the expression at the bottom of the
+// tree. Likewise, task 'e' creates tasks 'g' and 'f', and executes its portion of the expression.
+
+struct CoreThreadPoolDevice {
+ using Task = std::function<void()>;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoreThreadPoolDevice(ThreadPool& pool, float threadCostThreshold = 3e-5f)
+ : m_pool(pool) {
+ eigen_assert(threadCostThreshold >= 0.0f && "threadCostThreshold must be non-negative");
+ m_costFactor = threadCostThreshold;
+ }
+
+ template <int PacketSize>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int calculateLevels(Index size, float cost) const {
+ eigen_assert(cost >= 0.0f && "cost must be non-negative");
+ Index numOps = size / PacketSize;
+ int actualThreads = numOps < m_pool.NumThreads() ? static_cast<int>(numOps) : m_pool.NumThreads();
+ float totalCost = static_cast<float>(numOps) * cost;
+ float idealThreads = totalCost * m_costFactor;
+ if (idealThreads < static_cast<float>(actualThreads)) {
+ idealThreads = numext::maxi(idealThreads, 1.0f);
+ actualThreads = numext::mini(actualThreads, static_cast<int>(idealThreads));
+ }
+ int maxLevel = internal::log2_ceil(actualThreads);
+ return maxLevel;
+ }
+
+// MSVC does not like inlining parallelForImpl
+#if EIGEN_COMP_MSVC && !EIGEN_COMP_CLANG
+#define EIGEN_PARALLEL_FOR_INLINE
+#else
+#define EIGEN_PARALLEL_FOR_INLINE EIGEN_STRONG_INLINE
+#endif
+
+ template <typename UnaryFunctor, int PacketSize>
+ EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl(Index begin, Index end, UnaryFunctor& f,
+ Barrier& barrier, int level) {
+ while (level > 0) {
+ level--;
+ Index size = end - begin;
+ eigen_assert(size % PacketSize == 0 && "this function assumes size is a multiple of PacketSize");
+ Index mid = begin + numext::round_down(size >> 1, PacketSize);
+ Task right = [this, mid, end, &f, &barrier, level]() {
+ parallelForImpl<UnaryFunctor, PacketSize>(mid, end, f, barrier, level);
+ };
+ m_pool.Schedule(std::move(right));
+ end = mid;
+ }
+ for (Index i = begin; i < end; i += PacketSize) f(i);
+ barrier.Notify();
+ }
+
+ template <typename BinaryFunctor, int PacketSize>
+ EIGEN_DEVICE_FUNC EIGEN_PARALLEL_FOR_INLINE void parallelForImpl(Index outerBegin, Index outerEnd, Index innerBegin,
+ Index innerEnd, BinaryFunctor& f, Barrier& barrier,
+ int level) {
+ while (level > 0) {
+ level--;
+ Index outerSize = outerEnd - outerBegin;
+ if (outerSize > 1) {
+ Index outerMid = outerBegin + (outerSize >> 1);
+ Task right = [this, &f, &barrier, outerMid, outerEnd, innerBegin, innerEnd, level]() {
+ parallelForImpl<BinaryFunctor, PacketSize>(outerMid, outerEnd, innerBegin, innerEnd, f, barrier, level);
+ };
+ m_pool.Schedule(std::move(right));
+ outerEnd = outerMid;
+ } else {
+ Index innerSize = innerEnd - innerBegin;
+ eigen_assert(innerSize % PacketSize == 0 && "this function assumes innerSize is a multiple of PacketSize");
+ Index innerMid = innerBegin + numext::round_down(innerSize >> 1, PacketSize);
+ Task right = [this, &f, &barrier, outerBegin, outerEnd, innerMid, innerEnd, level]() {
+ parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerMid, innerEnd, f, barrier, level);
+ };
+ m_pool.Schedule(std::move(right));
+ innerEnd = innerMid;
+ }
+ }
+ for (Index outer = outerBegin; outer < outerEnd; outer++)
+ for (Index inner = innerBegin; inner < innerEnd; inner += PacketSize) f(outer, inner);
+ barrier.Notify();
+ }
+
+#undef EIGEN_PARALLEL_FOR_INLINE
+
+ template <typename UnaryFunctor, int PacketSize>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor(Index begin, Index end, UnaryFunctor& f, float cost) {
+ Index size = end - begin;
+ int maxLevel = calculateLevels<PacketSize>(size, cost);
+ Barrier barrier(1 << maxLevel);
+ parallelForImpl<UnaryFunctor, PacketSize>(begin, end, f, barrier, maxLevel);
+ barrier.Wait();
+ }
+
+ template <typename BinaryFunctor, int PacketSize>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void parallelFor(Index outerBegin, Index outerEnd, Index innerBegin,
+ Index innerEnd, BinaryFunctor& f, float cost) {
+ Index outerSize = outerEnd - outerBegin;
+ Index innerSize = innerEnd - innerBegin;
+ Index size = outerSize * innerSize;
+ int maxLevel = calculateLevels<PacketSize>(size, cost);
+ Barrier barrier(1 << maxLevel);
+ parallelForImpl<BinaryFunctor, PacketSize>(outerBegin, outerEnd, innerBegin, innerEnd, f, barrier, maxLevel);
+ barrier.Wait();
+ }
+
+ ThreadPool& m_pool;
+ // costFactor is the cost of delegating a task to a thread
+ // the inverse is used to avoid a floating point division
+ float m_costFactor;
+};
+
+// specialization of coefficient-wise assignment loops for CoreThreadPoolDevice
+
+namespace internal {
+
+template <typename Kernel>
+struct cost_helper {
+ using SrcEvaluatorType = typename Kernel::SrcEvaluatorType;
+ using DstEvaluatorType = typename Kernel::DstEvaluatorType;
+ using SrcXprType = typename SrcEvaluatorType::XprType;
+ using DstXprType = typename DstEvaluatorType::XprType;
+ static constexpr Index Cost = functor_cost<SrcXprType>::Cost + functor_cost<DstXprType>::Cost;
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, DefaultTraversal, NoUnrolling> {
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
+ this->assignCoeffByOuterInner(outer, inner);
+ }
+ };
+
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ constexpr float cost = static_cast<float>(XprEvaluationCost);
+ AssignmentFunctor functor(kernel);
+ device.template parallelFor<AssignmentFunctor, 1>(0, outerSize, 0, innerSize, functor, cost);
+ }
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, DefaultTraversal, InnerUnrolling> {
+ using DstXprType = typename Kernel::DstEvaluatorType::XprType;
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, InnerSize = DstXprType::InnerSizeAtCompileTime;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSize>::run(*this, outer);
+ }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index outerSize = kernel.outerSize();
+ AssignmentFunctor functor(kernel);
+ constexpr float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(InnerSize);
+ device.template parallelFor<AssignmentFunctor, 1>(0, outerSize, functor, cost);
+ }
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, InnerVectorizedTraversal, NoUnrolling> {
+ using PacketType = typename Kernel::PacketType;
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size,
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
+ this->template assignPacketByOuterInner<Unaligned, Unaligned, PacketType>(outer, inner);
+ }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(innerSize);
+ AssignmentFunctor functor(kernel);
+ device.template parallelFor<AssignmentFunctor, PacketSize>(0, outerSize, 0, innerSize, functor, cost);
+ }
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, InnerVectorizedTraversal, InnerUnrolling> {
+ using PacketType = typename Kernel::PacketType;
+ using DstXprType = typename Kernel::DstEvaluatorType::XprType;
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size,
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment,
+ InnerSize = DstXprType::InnerSizeAtCompileTime;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(*this, outer);
+ }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index outerSize = kernel.outerSize();
+ constexpr float cost = static_cast<float>(XprEvaluationCost) * static_cast<float>(InnerSize);
+ AssignmentFunctor functor(kernel);
+ device.template parallelFor<AssignmentFunctor, PacketSize>(0, outerSize, functor, cost);
+ }
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, SliceVectorizedTraversal, NoUnrolling> {
+ using Scalar = typename Kernel::Scalar;
+ using PacketType = typename Kernel::PacketType;
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost, PacketSize = unpacket_traits<PacketType>::size;
+ struct PacketAssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketAssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer, Index inner) {
+ this->template assignPacketByOuterInner<Unaligned, Unaligned, PacketType>(outer, inner);
+ }
+ };
+ struct ScalarAssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarAssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index outer) {
+ const Index innerSize = this->innerSize();
+ const Index packetAccessSize = numext::round_down(innerSize, PacketSize);
+ for (Index inner = packetAccessSize; inner < innerSize; inner++) this->assignCoeffByOuterInner(outer, inner);
+ }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index outerSize = kernel.outerSize();
+ const Index innerSize = kernel.innerSize();
+ const Index packetAccessSize = numext::round_down(innerSize, PacketSize);
+ constexpr float packetCost = static_cast<float>(XprEvaluationCost);
+ const float scalarCost = static_cast<float>(XprEvaluationCost) * static_cast<float>(innerSize - packetAccessSize);
+ PacketAssignmentFunctor packetFunctor(kernel);
+ ScalarAssignmentFunctor scalarFunctor(kernel);
+ device.template parallelFor<PacketAssignmentFunctor, PacketSize>(0, outerSize, 0, packetAccessSize, packetFunctor,
+ packetCost);
+ device.template parallelFor<ScalarAssignmentFunctor, 1>(0, outerSize, scalarFunctor, scalarCost);
+ };
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, LinearTraversal, NoUnrolling> {
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index index) { this->assignCoeff(index); }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index size = kernel.size();
+ constexpr float cost = static_cast<float>(XprEvaluationCost);
+ AssignmentFunctor functor(kernel);
+ device.template parallelFor<AssignmentFunctor, 1>(0, size, functor, cost);
+ }
+};
+
+template <typename Kernel>
+struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, LinearVectorizedTraversal, NoUnrolling> {
+ using Scalar = typename Kernel::Scalar;
+ using PacketType = typename Kernel::PacketType;
+ static constexpr Index XprEvaluationCost = cost_helper<Kernel>::Cost,
+ RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
+ PacketSize = unpacket_traits<PacketType>::size,
+ DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment,
+ DstAlignment = packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment
+ : Kernel::AssignmentTraits::DstAlignment,
+ SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
+ struct AssignmentFunctor : public Kernel {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AssignmentFunctor(Kernel& kernel) : Kernel(kernel) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index index) {
+ this->template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
+ }
+ };
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
+ const Index size = kernel.size();
+ const Index alignedStart =
+ DstIsAligned ? 0 : internal::first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
+ const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
+
+ unaligned_dense_assignment_loop<DstIsAligned != 0>::run(kernel, 0, alignedStart);
+
+ constexpr float cost = static_cast<float>(XprEvaluationCost);
+ AssignmentFunctor functor(kernel);
+ device.template parallelFor<AssignmentFunctor, PacketSize>(alignedStart, alignedEnd, functor, cost);
+
+ unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
+ }
+};
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif // EIGEN_CORE_THREAD_POOL_DEVICE_H
diff --git a/Eigen/src/ThreadPool/NonBlockingThreadPool.h b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
index efa6ef5..66fd63c 100644
--- a/Eigen/src/ThreadPool/NonBlockingThreadPool.h
+++ b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
@@ -343,7 +343,7 @@
}
victim += inc;
if (victim >= size) {
- victim -= size;
+ victim -= static_cast<unsigned int>(size);
}
}
return Task();
@@ -431,7 +431,7 @@
}
victim += inc;
if (victim >= size) {
- victim -= size;
+ victim -= static_cast<unsigned int>(size);
}
}
return -1;
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 3b36328..fdc5b48 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -960,9 +960,9 @@
// Check for larger magnitude complex numbers that expm1 matches exp - 1.
VERIFY_IS_APPROX(expm1(10. * m1), exp(10. * m1) - 1.);
- VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
- VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
- VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
+ VERIFY_IS_APPROX(sinh(m1), 0.5 * (exp(m1) - exp(-m1)));
+ VERIFY_IS_APPROX(cosh(m1), 0.5 * (exp(m1) + exp(-m1)));
+ VERIFY_IS_APPROX(tanh(m1), (0.5 * (exp(m1) - exp(-m1))) / (0.5 * (exp(m1) + exp(-m1))));
VERIFY_IS_APPROX(logistic(m1), (1.0 / (1.0 + exp(-m1))));
if (m1.size() > 0) {
// Complex exponential overflow edge-case.
@@ -1098,12 +1098,16 @@
}
};
+namespace Eigen {
+namespace internal {
template <int N, typename Scalar>
-struct internal::functor_traits<logical_left_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+struct functor_traits<logical_left_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
template <int N, typename Scalar>
-struct internal::functor_traits<logical_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+struct functor_traits<logical_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
template <int N, typename Scalar>
-struct internal::functor_traits<arithmetic_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+struct functor_traits<arithmetic_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {};
+} // namespace internal
+} // namespace Eigen
template <typename ArrayType>
struct shift_test_impl {
diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp
index 237ac67..e94398a 100644
--- a/test/array_for_matrix.cpp
+++ b/test/array_for_matrix.cpp
@@ -26,7 +26,7 @@
// Prevent overflows for integer types.
if (Eigen::NumTraits<Scalar>::IsInteger) {
- Scalar kMaxVal = Scalar(10000);
+ Scalar kMaxVal = Scalar(1000);
m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal);
m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal);
}
@@ -123,7 +123,7 @@
// test Select
VERIFY_IS_APPROX((m1.array() < m2.array()).select(m1, m2), m1.cwiseMin(m2));
VERIFY_IS_APPROX((m1.array() > m2.array()).select(m1, m2), m1.cwiseMax(m2));
- Scalar mid = (m1.cwiseAbs().minCoeff() + m1.cwiseAbs().maxCoeff()) / Scalar(2);
+ Scalar mid = m1.cwiseAbs().minCoeff() / Scalar(2) + m1.cwiseAbs().maxCoeff() / Scalar(2);
for (int j = 0; j < cols; ++j)
for (int i = 0; i < rows; ++i) m3(i, j) = abs(m1(i, j)) < mid ? 0 : m1(i, j);
VERIFY_IS_APPROX(
@@ -140,8 +140,8 @@
// and/or
VERIFY(((m1.array() < RealScalar(0)).matrix() && (m1.array() > RealScalar(0)).matrix()).count() == 0);
VERIFY(((m1.array() < RealScalar(0)).matrix() || (m1.array() >= RealScalar(0)).matrix()).count() == rows * cols);
- RealScalar a = m1.cwiseAbs().mean();
- VERIFY(((m1.array() < -a).matrix() || (m1.array() > a).matrix()).count() == (m1.cwiseAbs().array() > a).count());
+ VERIFY(((m1.array() < -mid).matrix() || (m1.array() > mid).matrix()).count() ==
+ (m1.cwiseAbs().array() > mid).count());
typedef Matrix<Index, Dynamic, 1> VectorOfIndices;
diff --git a/test/assignment_threaded.cpp b/test/assignment_threaded.cpp
new file mode 100644
index 0000000..7505a10
--- /dev/null
+++ b/test/assignment_threaded.cpp
@@ -0,0 +1,84 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2023 Charlie Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS 1
+
+#include "main.h"
+#include <Eigen/ThreadPool>
+
+namespace Eigen {
+namespace internal {
+// conveniently control vectorization logic
+template <typename Scalar, bool Vectorize>
+struct scalar_dummy_op {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const { return a; }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+ return a;
+ }
+};
+template <typename Scalar, bool Vectorize>
+struct functor_traits<scalar_dummy_op<Scalar, Vectorize> > {
+ enum { Cost = 1'000'000, PacketAccess = Vectorize && packet_traits<Scalar>::Vectorizable };
+};
+} // namespace internal
+} // namespace Eigen
+
+template <typename PlainObject>
+void test_threaded_assignment(const PlainObject&, Index rows = PlainObject::RowsAtCompileTime,
+ Index cols = PlainObject::ColsAtCompileTime) {
+ using Scalar = typename PlainObject::Scalar;
+ using VectorizationOff = internal::scalar_dummy_op<Scalar, false>;
+ using VectorizationOn = internal::scalar_dummy_op<Scalar, true>;
+
+ int threads = 4;
+ ThreadPool pool(threads);
+ CoreThreadPoolDevice threadPoolDevice(pool);
+
+ PlainObject dst(rows, cols), ref(rows, cols), rhs(rows, cols);
+ rhs.setRandom();
+ const auto rhs_xpr = rhs.cwiseAbs2();
+
+ // linear access
+ dst.setRandom();
+ ref.setRandom();
+ ref = rhs_xpr.unaryExpr(VectorizationOff());
+ dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOff());
+ VERIFY_IS_CWISE_EQUAL(ref, dst);
+
+ ref = rhs_xpr.unaryExpr(VectorizationOn());
+ dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOn());
+ VERIFY_IS_CWISE_EQUAL(ref, dst);
+
+ // outer-inner access
+ Index blockRows = numext::maxi(Index(1), rows - 1);
+ Index blockCols = numext::maxi(Index(1), cols - 1);
+ dst.setRandom();
+ ref.setRandom();
+ ref.bottomRightCorner(blockRows, blockCols) =
+ rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff());
+ dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) =
+ rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff());
+ VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols));
+
+ ref.setZero();
+ dst.setZero();
+ ref.bottomRightCorner(blockRows, blockCols) =
+ rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn());
+ dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) =
+ rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn());
+ VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols));
+}
+
+EIGEN_DECLARE_TEST(test) {
+ for (int i = 0; i < g_repeat; i++) {
+ CALL_SUBTEST(test_threaded_assignment(MatrixXd(), 123, 123));
+ CALL_SUBTEST(test_threaded_assignment(Matrix<float, 16, 16>()));
+ }
+}
diff --git a/test/constexpr.cpp b/test/constexpr.cpp
index ee154f8..9fdf447 100644
--- a/test/constexpr.cpp
+++ b/test/constexpr.cpp
@@ -7,6 +7,7 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#define EIGEN_TESTING_CONSTEXPR
#include "main.h"
EIGEN_DECLARE_TEST(constexpr) {
diff --git a/test/main.h b/test/main.h
index bce6736..e86c584 100644
--- a/test/main.h
+++ b/test/main.h
@@ -52,9 +52,6 @@
#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
#include <random>
#include <chrono>
-#ifdef EIGEN_USE_THREADS
-#include <future>
-#endif
#endif
#if __cplusplus > 201703L
// libstdc++ 9's <memory> indirectly uses max() via <bit>.
@@ -219,7 +216,6 @@
} // namespace Eigen
#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl
-// #define TRACK while()
#define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "")
@@ -336,7 +332,9 @@
#endif // EIGEN_NO_ASSERTION_CHECKING
+#ifndef EIGEN_TESTING_CONSTEXPR
#define EIGEN_INTERNAL_DEBUGGING
+#endif
#include <Eigen/QR> // required for createRandomPIMatrixOfRank and generateRandomMatrixSvs
inline void verify_impl(bool condition, const char* testname, const char* file, int line,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
index 9417469..9dc9591 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -239,18 +239,16 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(const Self& other) : Base(other), m_storage(other.m_storage) {}
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
- : m_storage(firstDimension, otherDimensions...)
- {
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
+ template <typename... IndexTypes>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
+ : m_storage(firstDimension, otherDimensions...) {
+ // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
+ EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ }
- /** Normal Dimension */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
- : m_storage(internal::array_prod(dimensions), dimensions)
- {
+ /** Normal Dimension */
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
+ : m_storage(internal::array_prod(dimensions), dimensions) {
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index f88793e..2c2c781 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -618,16 +618,15 @@
(isnan)() const {
return unaryExpr(internal::scalar_isnan_op<Scalar, true>()).template cast<bool>();
}
-
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorConversionOp<bool, const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar, true>, const Derived>>
(isinf)() const {
- return unaryExpr(internal::scalar_isinf_op<Scalar>());
+ return unaryExpr(internal::scalar_isinf_op<Scalar, true>()).template cast<bool>();
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorConversionOp<bool, const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar, true>, const Derived>>
(isfinite)() const {
- return unaryExpr(internal::scalar_isfinite_op<Scalar>());
+ return unaryExpr(internal::scalar_isfinite_op<Scalar, true>()).template cast<bool>();
}
// Coefficient-wise ternary operators.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index ae8f25f..06d4dfd3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -215,14 +215,13 @@
}
template <typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension,
- IndexTypes... otherDimensions)
- : Base({{firstDimension, secondDimension, otherDimensions...}}){EIGEN_STATIC_ASSERT(
- sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension,
+ IndexTypes... otherDimensions)
+ : Base({{firstDimension, secondDimension, otherDimensions...}}) {
+ EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ }
- EIGEN_DEVICE_FUNC DSizes
- &
- operator=(const array<DenseIndex, NumDims>& other) {
+ EIGEN_DEVICE_FUNC DSizes& operator=(const array<DenseIndex, NumDims>& other) {
*static_cast<Base*>(this) = other;
return *this;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index 2605219..0bdb1ab 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -367,7 +367,8 @@
const IndexType total_size = internal::array_prod(tensor.dimensions());
if (total_size > 0) {
const IndexType first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
- Map<const Array<Scalar, Dynamic, Dynamic, Tensor::Layout>> matrix(tensor.data(), first_dim, total_size / first_dim);
+ Map<const Array<Scalar, Dynamic, Dynamic, Tensor::Layout>> matrix(tensor.data(), first_dim,
+ total_size / first_dim);
s << matrix;
return;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index c201707..191413b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -79,14 +79,14 @@
template <typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr, Index firstDimension,
IndexTypes... otherDimensions)
- : m_data(dataPtr),
- m_dimensions(firstDimension, otherDimensions...){
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic),
- YOU_MADE_A_PROGRAMMING_MISTAKE)}
+ : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
+ // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
+ EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic),
+ YOU_MADE_A_PROGRAMMING_MISTAKE)
+ }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr,
- const array<Index, NumIndices>& dimensions)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(StoragePointerType dataPtr,
+ const array<Index, NumIndices>& dimensions)
: m_data(dataPtr), m_dimensions(dimensions) {}
template <typename Dimensions>
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 557fdf6..f32ad36 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -231,7 +231,7 @@
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
if (nfft < 1) nfft = src.size();
-
+
Index dst_size = nfft;
if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum)) {
dst_size = (nfft >> 1) + 1;
diff --git a/unsupported/test/cxx11_tensor_comparisons.cpp b/unsupported/test/cxx11_tensor_comparisons.cpp
index 3565b62..17a1776 100644
--- a/unsupported/test/cxx11_tensor_comparisons.cpp
+++ b/unsupported/test/cxx11_tensor_comparisons.cpp
@@ -132,8 +132,61 @@
}
}
+static void test_isinf() {
+ Tensor<Scalar, 3> mat(2, 3, 7);
+
+ mat.setRandom();
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ if (internal::random<bool>()) {
+ mat(i, j, k) = std::numeric_limits<Scalar>::infinity();
+ }
+ }
+ }
+ }
+ Tensor<bool, 3> inf(2, 3, 7);
+ inf = (mat.isinf)();
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_EQUAL(inf(i, j, k), (std::isinf)(mat(i, j, k)));
+ }
+ }
+ }
+}
+
+static void test_isfinite() {
+ Tensor<Scalar, 3> mat(2, 3, 7);
+
+ mat.setRandom();
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ if (internal::random<bool>()) {
+ mat(i, j, k) = std::numeric_limits<Scalar>::infinity();
+ }
+ if (internal::random<bool>()) {
+ mat(i, j, k) = std::numeric_limits<Scalar>::quiet_NaN();
+ }
+ }
+ }
+ }
+ Tensor<bool, 3> inf(2, 3, 7);
+ inf = (mat.isfinite)();
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ VERIFY_IS_EQUAL(inf(i, j, k), (std::isfinite)(mat(i, j, k)));
+ }
+ }
+ }
+}
+
EIGEN_DECLARE_TEST(cxx11_tensor_comparisons) {
CALL_SUBTEST(test_orderings());
CALL_SUBTEST(test_equality());
CALL_SUBTEST(test_isnan());
+ CALL_SUBTEST(test_isinf());
+ CALL_SUBTEST(test_isfinite());
}