Update Eigen to commit:66f7f51b7e069d0a03a21157fa60b24aece69aeb
CHANGELOG
=========
66f7f51b7 - Disable fno-check-new on clang.
151f6127d - Fix Warray-bounds warning for fixed-size assignments
1d8b82b07 - Fix power builds for no VSX and no POWER8.
eb3f9f443 - refactor AssignmentEvaluator
9c211430b - Fix TensorRef details
22cd7307d - Remove assumption of std::complex for complex scalar types.
6b4881ad4 - Eliminate type-punning UB in Eigen::half.
420d891de - Add missing mathjax/latex configuration.
becefd59e - Returns condition number of zero if matrix is not invertible.
809d266b4 - Fix numerical issues with BiCGSTAB.
ef475f277 - Add missing graphviz to doc build.
a0591cbc9 - Fix doxygen-generated pages
715deac18 - Add EIGEN_CI_CTEST_ARGS to allow for custom timeout.
PiperOrigin-RevId: 729330781
Change-Id: I16aadc1cb1ddf4816604ce120e933f9660da17f8
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 8954841..5a2a3ac 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -27,125 +27,115 @@
// copy_using_evaluator_traits is based on assign_traits
-template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
struct copy_using_evaluator_traits {
- typedef typename DstEvaluator::XprType Dst;
- typedef typename Dst::Scalar DstScalar;
+ using Src = typename SrcEvaluator::XprType;
+ using Dst = typename DstEvaluator::XprType;
+ using DstScalar = typename Dst::Scalar;
- enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags };
+ static constexpr int DstFlags = DstEvaluator::Flags;
+ static constexpr int SrcFlags = SrcEvaluator::Flags;
public:
- enum {
- DstAlignment = DstEvaluator::Alignment,
- SrcAlignment = SrcEvaluator::Alignment,
- DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
- JointAlignment = plain_enum_min(DstAlignment, SrcAlignment)
- };
-
- private:
- enum {
- InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
- : int(DstFlags) & RowMajorBit ? int(Dst::ColsAtCompileTime)
- : int(Dst::RowsAtCompileTime),
- InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
- : int(DstFlags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime)
- : int(Dst::MaxRowsAtCompileTime),
- RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize),
- RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize),
- OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
- MaxSizeAtCompileTime = Dst::SizeAtCompileTime
- };
+ static constexpr int DstAlignment = DstEvaluator::Alignment;
+ static constexpr int SrcAlignment = SrcEvaluator::Alignment;
+ static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
+ static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
+ static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
+ static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
+ static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
+ static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
+ static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
+ static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
+ static constexpr int MaxRowsAtCompileTime =
+ min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
+ static constexpr int MaxColsAtCompileTime =
+ min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
+ static constexpr int MaxSizeAtCompileTime =
+ min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
+ static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
+ : DstIsRowMajor ? ColsAtCompileTime
+ : RowsAtCompileTime;
+ static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
+ : DstIsRowMajor ? MaxColsAtCompileTime
+ : MaxRowsAtCompileTime;
+ static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
+ static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
+ static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
// TODO distinguish between linear traversal and inner-traversals
- typedef typename find_best_packet<DstScalar, RestrictedLinearSize>::type LinearPacketType;
- typedef typename find_best_packet<DstScalar, RestrictedInnerSize>::type InnerPacketType;
+ using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
+ using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
- enum {
- LinearPacketSize = unpacket_traits<LinearPacketType>::size,
- InnerPacketSize = unpacket_traits<InnerPacketType>::size
- };
+ static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
+ static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
public:
- enum {
- LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
- InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
- };
+ static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
+ static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
private:
- enum {
- DstIsRowMajor = DstFlags & RowMajorBit,
- SrcIsRowMajor = SrcFlags & RowMajorBit,
- StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
- MightVectorize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) &&
- bool(functor_traits<AssignFunc>::PacketAccess),
- MayInnerVectorize = MightVectorize && int(InnerSize) != Dynamic && int(InnerSize) % int(InnerPacketSize) == 0 &&
- int(OuterStride) != Dynamic && int(OuterStride) % int(InnerPacketSize) == 0 &&
- (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment) >= int(InnerRequiredAlignment)),
- MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
- MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) &&
- (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)) ||
- MaxSizeAtCompileTime == Dynamic),
- /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
- so it's only good for large enough sizes. */
- MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) &&
- (int(InnerMaxSize) == Dynamic ||
- int(InnerMaxSize) >= (EIGEN_UNALIGNED_VECTORIZE ? InnerPacketSize : (3 * InnerPacketSize)))
- /* slice vectorization can be slow, so we only want it if the slices are big, which is
- indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
- in a fixed-size matrix
- However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
- };
+ static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
+ static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
+ bool(functor_traits<AssignFunc>::PacketAccess);
+ static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
+ (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
+ (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
+ (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
+ static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
+ static constexpr bool MayLinearVectorize =
+ MightVectorize && MayLinearize && DstHasDirectAccess &&
+ (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
+ (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
+ /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+ so it's only good for large enough sizes. */
+ static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
+ static constexpr bool MaySliceVectorize =
+ MightVectorize && DstHasDirectAccess &&
+ (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
+ /* slice vectorization can be slow, so we only want it if the slices are big, which is
+ indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+ in a fixed-size matrix
+ However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
public:
- enum {
- Traversal = int(Dst::SizeAtCompileTime) == 0
- ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
- : (int(MayLinearVectorize) && (LinearPacketSize > InnerPacketSize)) ? int(LinearVectorizedTraversal)
- : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
- : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
- : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
- : int(MayLinearize) ? int(LinearTraversal)
- : int(DefaultTraversal),
- Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal ||
- int(Traversal) == SliceVectorizedTraversal
- };
+ static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
+ : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
+ ? LinearVectorizedTraversal
+ : MayInnerVectorize ? InnerVectorizedTraversal
+ : MayLinearVectorize ? LinearVectorizedTraversal
+ : MaySliceVectorize ? SliceVectorizedTraversal
+ : MayLinearize ? LinearTraversal
+ : DefaultTraversal;
+ static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
+ Traversal == SliceVectorizedTraversal;
- typedef std::conditional_t<int(Traversal) == LinearVectorizedTraversal, LinearPacketType, InnerPacketType> PacketType;
+ using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
private:
- enum {
- ActualPacketSize = int(Traversal) == LinearVectorizedTraversal ? LinearPacketSize
- : Vectorized ? InnerPacketSize
- : 1,
- UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
- MayUnrollCompletely =
- int(Dst::SizeAtCompileTime) != Dynamic &&
- int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <=
- int(UnrollingLimit),
- MayUnrollInner =
- int(InnerSize) != Dynamic &&
- int(InnerSize) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
- };
+ static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
+ static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
+ static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
+ static constexpr bool MayUnrollCompletely =
+ (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
+ static constexpr bool MayUnrollInner =
+ (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
public:
- enum {
- Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
- ? (int(MayUnrollCompletely) ? int(CompleteUnrolling)
- : int(MayUnrollInner) ? int(InnerUnrolling)
- : int(NoUnrolling))
- : int(Traversal) == int(LinearVectorizedTraversal)
- ? (bool(MayUnrollCompletely) &&
- (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)))
- ? int(CompleteUnrolling)
- : int(NoUnrolling))
- : int(Traversal) == int(LinearTraversal)
- ? (bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling))
+ static constexpr int Unrolling =
+ (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
+ ? (MayUnrollCompletely ? CompleteUnrolling
+ : MayUnrollInner ? InnerUnrolling
+ : NoUnrolling)
+ : Traversal == LinearVectorizedTraversal
+ ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
+ ? CompleteUnrolling
+ : NoUnrolling)
+ : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
#if EIGEN_UNALIGNED_VECTORIZE
- : int(Traversal) == int(SliceVectorizedTraversal)
- ? (bool(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling))
+ : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
#endif
- : int(NoUnrolling)
- };
+ : NoUnrolling;
#ifdef EIGEN_DEBUG_ASSIGN
static void debug() {
@@ -162,8 +152,8 @@
EIGEN_DEBUG_VAR(LinearRequiredAlignment)
EIGEN_DEBUG_VAR(InnerRequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
- EIGEN_DEBUG_VAR(InnerSize)
- EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
+ EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
EIGEN_DEBUG_VAR(LinearPacketSize)
EIGEN_DEBUG_VAR(InnerPacketSize)
EIGEN_DEBUG_VAR(ActualPacketSize)
@@ -196,17 +186,14 @@
*** Default traversal ***
************************/
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
- typedef typename DstEvaluatorType::XprType DstXprType;
-
- enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime };
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- kernel.assignCoeffByOuterInner(outer, inner);
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel);
+ kernel.assignCoeffByOuterInner(Outer, Inner);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
}
};
@@ -232,11 +219,11 @@
*** Linear traversal ***
***********************/
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- kernel.assignCoeff(Index);
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel);
+ kernel.assignCoeff(Index_);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
}
};
@@ -249,23 +236,17 @@
*** Inner vectorization ***
**************************/
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling {
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
- typedef typename DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::PacketType PacketType;
-
- enum {
- outer = Index / DstXprType::InnerSizeAtCompileTime,
- inner = Index % DstXprType::InnerSizeAtCompileTime,
- SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
- DstAlignment = Kernel::AssignmentTraits::DstAlignment
- };
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
- enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
};
@@ -277,10 +258,11 @@
template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling {
- typedef typename Kernel::PacketType PacketType;
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
- enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
outer);
}
@@ -308,9 +290,10 @@
// Zero-sized assignment is a no-op.
template <typename Kernel, int Unrolling>
struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling> {
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/) {
- EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0,
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
+ EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
}
};
@@ -331,21 +314,21 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> {
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
}
};
template <typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
const Index outerSize = kernel.outerSize();
for (Index outer = 0; outer < outerSize; ++outer)
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel,
- outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
}
};
@@ -380,18 +363,15 @@
}
};
-template <typename Kernel, int Index, int Stop>
+template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_linearvec_CompleteUnrolling {
- // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
- typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
- typedef typename DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::PacketType PacketType;
-
- enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment };
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+ static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index);
- enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+ kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
};
@@ -403,26 +383,24 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> {
+ using Scalar = typename Kernel::Scalar;
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+ static constexpr int RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment;
+ static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
+ static constexpr int DstAlignment =
+ packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
const Index size = kernel.size();
- typedef typename Kernel::Scalar Scalar;
- typedef typename Kernel::PacketType PacketType;
- enum {
- requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
- packetSize = unpacket_traits<PacketType>::size,
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
- dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
- : int(Kernel::AssignmentTraits::DstAlignment),
- srcAlignment = Kernel::AssignmentTraits::JointAlignment
- };
- const Index alignedStart =
- dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
- const Index alignedEnd = alignedStart + ((size - alignedStart) / packetSize) * packetSize;
+ const Index alignedStart = DstIsAligned ? 0 : first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
+ const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
- unaligned_dense_assignment_loop<dstIsAligned != 0>::run(kernel, 0, alignedStart);
+ unaligned_dense_assignment_loop<DstIsAligned>::run(kernel, 0, alignedStart);
- for (Index index = alignedStart; index < alignedEnd; index += packetSize)
- kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
+ for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
+ kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
}
@@ -430,18 +408,14 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+ static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
+ static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::PacketType PacketType;
-
- enum {
- size = DstXprType::SizeAtCompileTime,
- packetSize = unpacket_traits<PacketType>::size,
- alignedSize = (int(size) / packetSize) * packetSize
- };
-
- copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, AlignedSize, Size>::run(kernel);
}
};
@@ -451,35 +425,40 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> {
- typedef typename Kernel::PacketType PacketType;
- enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment };
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
- const Index packetSize = unpacket_traits<PacketType>::size;
for (Index outer = 0; outer < outerSize; ++outer)
- for (Index inner = 0; inner < innerSize; inner += packetSize)
+ for (Index inner = 0; inner < innerSize; inner += PacketSize)
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
}
};
template <typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
+ static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
}
};
template <typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
+ static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::AssignmentTraits Traits;
const Index outerSize = kernel.outerSize();
for (Index outer = 0; outer < outerSize; ++outer)
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, Traits::SrcAlignment,
- Traits::DstAlignment>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
+ outer);
}
};
@@ -498,8 +477,8 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
+ kernel);
}
};
@@ -509,42 +488,40 @@
template <typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> {
+ using Scalar = typename Kernel::Scalar;
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+ static constexpr int RequestedAlignment = Kernel::AssignmentTraits::InnerRequiredAlignment;
+ static constexpr bool Alignable =
+ packet_traits<Scalar>::AlignedOnScalar || Kernel::AssignmentTraits::DstAlignment >= sizeof(Scalar);
+ static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
+ static constexpr int DstAlignment = Alignable ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::Scalar Scalar;
- typedef typename Kernel::PacketType PacketType;
- enum {
- packetSize = unpacket_traits<PacketType>::size,
- requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
- alignable =
- packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar),
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
- dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment)
- };
const Scalar* dst_ptr = kernel.dstDataPtr();
- if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
+ if ((!DstIsAligned) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
// the pointer is not aligned-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel);
}
- const Index packetAlignedMask = packetSize - 1;
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
- const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+ const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
Index alignedStart =
- ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+ ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<RequestedAlignment>(dst_ptr, innerSize);
for (Index outer = 0; outer < outerSize; ++outer) {
- const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask);
+ const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
// do the non-vectorizable part of the assignment
for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
// do the vectorizable part of the assignment
- for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize)
- kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+ for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
+ kernel.template assignPacketByOuterInner<DstAlignment, Unaligned, PacketType>(outer, inner);
// do the non-vectorizable part of the assignment
for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
- alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize);
+ alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
}
}
};
@@ -552,20 +529,15 @@
#if EIGEN_UNALIGNED_VECTORIZE
template <typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
+ using PacketType = typename Kernel::PacketType;
+ static constexpr int PacketSize = unpacket_traits<PacketType>::size;
+ static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
+ static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::PacketType PacketType;
-
- enum {
- innerSize = DstXprType::InnerSizeAtCompileTime,
- packetSize = unpacket_traits<PacketType>::size,
- vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
- size = DstXprType::SizeAtCompileTime
- };
-
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, 0, 0>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, VectorizableSize, InnerSize>::run(kernel, outer);
}
}
};
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index c081499..528aed2 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -182,6 +182,35 @@
typedef typename NumTraits<Scalar>::Real& type;
};
+} // namespace internal
+
+namespace numext {
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(
+ const Scalar& x) {
+ return internal::real_ref_impl<Scalar>::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+
+} // namespace numext
+
+namespace internal {
+
/****************************************************************************
* Implementation of conj *
****************************************************************************/
@@ -221,7 +250,9 @@
struct abs2_impl_default<Scalar, true> // IsComplex
{
typedef typename NumTraits<Scalar>::Real RealScalar;
- EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x.real() * x.real() + x.imag() * x.imag(); }
+ EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+ return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x);
+ }
};
template <typename Scalar>
@@ -250,16 +281,14 @@
};
// Complex sqrt defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x);
// Custom implementation is faster than `std::sqrt`, works on
// GPU, and correctly handles special cases (unlike MSVC).
template <typename T>
struct sqrt_impl<std::complex<T>> {
- EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) {
- return complex_sqrt<T>(x);
- }
+ EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { return complex_sqrt(x); }
};
template <typename Scalar>
@@ -272,13 +301,13 @@
struct rsqrt_impl;
// Complex rsqrt defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x);
template <typename T>
struct rsqrt_impl<std::complex<T>> {
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) {
- return complex_rsqrt<T>(x);
+ return complex_rsqrt(x);
}
};
@@ -299,7 +328,7 @@
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
EIGEN_USING_STD(abs);
- return abs(x.real()) + abs(x.imag());
+ return abs(numext::real(x)) + abs(numext::imag(x));
}
};
@@ -469,8 +498,8 @@
****************************************************************************/
// Complex log defined in MathFunctionsImpl.h.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z);
template <typename Scalar>
struct log_impl {
@@ -846,7 +875,7 @@
real_type aa = abs(a);
if (aa == real_type(0)) return Scalar(0);
aa = real_type(1) / aa;
- return Scalar(a.real() * aa, a.imag() * aa);
+ return Scalar(numext::real(a) * aa, numext::imag(a) * aa);
}
};
@@ -1043,27 +1072,6 @@
#endif
template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) {
- return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(
- const Scalar& x) {
- return internal::real_ref_impl<Scalar>::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) {
- return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
-}
-
-template <typename Scalar>
-EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) {
- return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
-}
-
-template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
}
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 10ddabd..8e2705b 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -171,8 +171,8 @@
// Generic complex sqrt implementation that correctly handles corner cases
// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) {
// Computes the principal sqrt of the input.
//
// For a complex square root of the number x + i*y. We want to find real
@@ -194,21 +194,21 @@
// if x == 0: u = w, v = sign(y) * w
// if x > 0: u = w, v = y / (2 * w)
// if x < 0: u = |y| / (2 * w), v = sign(y) * w
-
+ using T = typename NumTraits<ComplexT>::Real;
const T x = numext::real(z);
const T y = numext::imag(z);
const T zero = T(0);
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
- return (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
- : numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
- : x > zero ? std::complex<T>(w, y / (2 * w))
- : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w);
+ return (numext::isinf)(y) ? ComplexT(NumTraits<T>::infinity(), y)
+ : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w)
+ : x > zero ? ComplexT(w, y / (2 * w))
+ : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w);
}
// Generic complex rsqrt implementation.
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) {
// Computes the principal reciprocal sqrt of the input.
//
// For a complex reciprocal square root of the number z = x + i*y. We want to
@@ -230,7 +230,7 @@
// if x == 0: u = w / |z|, v = -sign(y) * w / |z|
// if x > 0: u = w / |z|, v = -y / (2 * w * |z|)
// if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
-
+ using T = typename NumTraits<ComplexT>::Real;
const T x = numext::real(z);
const T y = numext::imag(z);
const T zero = T(0);
@@ -239,20 +239,21 @@
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
const T woz = w / abs_z;
// Corner cases consistent with 1/sqrt(z) on gcc/clang.
- return numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
- : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
- : numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz)
- : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
- : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
+ return numext::is_exactly_zero(abs_z) ? ComplexT(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
+ : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero)
+ : numext::is_exactly_zero(x) ? ComplexT(woz, y < zero ? woz : -woz)
+ : x > zero ? ComplexT(woz, -y / (2 * w * abs_z))
+ : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
}
-template <typename T>
-EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) {
// Computes complex log.
+ using T = typename NumTraits<ComplexT>::Real;
T a = numext::abs(z);
EIGEN_USING_STD(atan2);
T b = atan2(z.imag(), z.real());
- return std::complex<T>(numext::log(a), b);
+ return ComplexT(numext::log(a), b);
}
} // end namespace internal
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 5466a57..8d5c47e 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -112,7 +112,7 @@
ConstTransposeReturnType>
AdjointReturnType;
/** \internal Return type of eigenvalues() */
- typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor>
+ typedef Matrix<internal::make_complex_t<Scalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor>
EigenvaluesReturnType;
/** \internal the return type of identity */
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>, PlainObject> IdentityReturnType;
@@ -468,7 +468,7 @@
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
- EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
+ EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const internal::make_complex_t<Scalar>& p)
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index df5a0ef..038e233 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -1,877 +1,877 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-//
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
-#define EIGEN_PACKET_MATH_FP16_AVX512_H
-
-// IWYU pragma: private
-#include "../../InternalHeaderCheck.h"
-
-namespace Eigen {
-
-namespace internal {
-
-typedef __m512h Packet32h;
-typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
-typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
-
-template <>
-struct is_arithmetic<Packet8h> {
- enum { value = true };
-};
-
-template <>
-struct packet_traits<half> : default_packet_traits {
- typedef Packet32h type;
- typedef Packet16h half;
- enum {
- Vectorizable = 1,
- AlignedOnScalar = 1,
- size = 32,
-
- HasCmp = 1,
- HasAdd = 1,
- HasSub = 1,
- HasMul = 1,
- HasDiv = 1,
- HasNegate = 1,
- HasAbs = 1,
- HasAbs2 = 0,
- HasMin = 1,
- HasMax = 1,
- HasConj = 1,
- HasSetLinear = 0,
- HasLog = 1,
- HasLog1p = 1,
- HasExp = 1,
- HasExpm1 = 1,
- HasSqrt = 1,
- HasRsqrt = 1,
- // These ones should be implemented in future
- HasBessel = 0,
- HasNdtri = 0,
- HasSin = EIGEN_FAST_MATH,
- HasCos = EIGEN_FAST_MATH,
- HasTanh = EIGEN_FAST_MATH,
- HasErf = 0, // EIGEN_FAST_MATH,
- HasBlend = 0
- };
-};
-
-template <>
-struct unpacket_traits<Packet32h> {
- typedef Eigen::half type;
- typedef Packet16h half;
- enum {
- size = 32,
- alignment = Aligned64,
- vectorizable = true,
- masked_load_available = false,
- masked_store_available = false
- };
-};
-
-template <>
-struct unpacket_traits<Packet16h> {
- typedef Eigen::half type;
- typedef Packet8h half;
- enum {
- size = 16,
- alignment = Aligned32,
- vectorizable = true,
- masked_load_available = false,
- masked_store_available = false
- };
-};
-
-template <>
-struct unpacket_traits<Packet8h> {
- typedef Eigen::half type;
- typedef Packet8h half;
- enum {
- size = 8,
- alignment = Aligned16,
- vectorizable = true,
- masked_load_available = false,
- masked_store_available = false
- };
-};
-
-// Memory functions
-
-// pset1
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
- // half/half_raw is bit compatible
- return _mm512_set1_ph(numext::bit_cast<_Float16>(from));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) {
- return _mm512_setzero_ph();
-}
-
-// pset1frombits
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
- return _mm512_castsi512_ph(_mm512_set1_epi16(from));
-}
-
-// pfirst
-
-template <>
-EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
- return half_impl::raw_uint16_to_half(
- static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
-#else
- Eigen::half dest[32];
- _mm512_storeu_ph(dest, from);
- return dest[0];
-#endif
-}
-
-// pload
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
- EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
-}
-
-// ploadu
-
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
- EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
-}
-
-// pstore
-
-template <>
-EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
- EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
-}
-
-// pstoreu
-
-template <>
-EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
- EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
-}
-
-// ploaddup
-template <>
-EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
- __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
- return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
- 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
- a);
-}
-
-// ploadquad
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
- __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
- return _mm512_permutexvar_ph(
- _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
- a);
-}
-
-// pabs
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
- return _mm512_abs_ph(a);
-}
-
-// psignbit
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
- return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
-}
-
-// pmin
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_min_ph(a, b);
-}
-
-// pmax
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_max_ph(a, b);
-}
-
-// plset
-template <>
-EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
- return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
- 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
-}
-
-// por
-
-template <>
-EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
- return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pxor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
- return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pand
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
- return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pandnot
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
- return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
-}
-
-// pselect
-
-template <>
-EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
- __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
- return _mm512_mask_blend_ph(mask32, a, b);
-}
-
-// pcmp_eq
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
- __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
- return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
-}
-
-// pcmp_le
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
- __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
- return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
-}
-
-// pcmp_lt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
- __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
- return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
-}
-
-// pcmp_lt_or_nan
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
- __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
- return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu)));
-}
-
-// padd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_add_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
- return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
- return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// psub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_sub_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
- return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
- return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pmul
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_mul_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
- return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
- return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pdiv
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
- return _mm512_div_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
- return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
- return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pround
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
- // Work-around for default std::round rounding mode.
-
- // Mask for the sign bit
- const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
- // The largest half-preicision float less than 0.5
- const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
-
- return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
-}
-
-// print
-
-template <>
-EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
- return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
-}
-
-// pceil
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
- return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
-}
-
-// pfloor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
- return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
-}
-
-// ptrunc
-
-template <>
-EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) {
- return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO);
-}
-
-// predux
-template <>
-EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
- return (half)_mm512_reduce_add_ph(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
- return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
- return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
-}
-
-// predux_half_dowto4
-template <>
-EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
- __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
- __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
-
- return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#else
- Eigen::half data[32];
- _mm512_storeu_ph(data, a);
-
- __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
- __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
-
- return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#endif
-}
-
-// predux_max
-
-// predux_min
-
-// predux_mul
-
-#ifdef EIGEN_VECTORIZE_FMA
-
-// pmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
- return _mm512_fmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
- return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
- return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
- return _mm512_fmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
- return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
- return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
- return _mm512_fnmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
- return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
- return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
- return _mm512_fnmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
- return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
- return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-#endif
-
-// pnegate
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
- return psub(pzero(a), a);
-}
-
-// pconj
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
- return a;
-}
-
-// psqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
- return _mm512_sqrt_ph(a);
-}
-
-// prsqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
- return _mm512_rsqrt_ph(a);
-}
-
-// preciprocal
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
- return _mm512_rcp_ph(a);
-}
-
-// ptranspose
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
- __m512i t[32];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i < 16; i++) {
- t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
- t[2 * i + 1] =
- _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
- }
-
- __m512i p[32];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i < 8; i++) {
- p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
- p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
- p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
- p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
- }
-
- __m512i q[32];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i < 4; i++) {
- q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
- q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
- q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
- q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
- q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
- q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
- q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
- q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
- }
-
- __m512i f[32];
-
-#define PACKET32H_TRANSPOSE_HELPER(X, Y) \
- do { \
- f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \
- f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
- f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
- f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
- f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
- f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
- f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
- f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
- } while (false);
-
- PACKET32H_TRANSPOSE_HELPER(0, 0);
- PACKET32H_TRANSPOSE_HELPER(1, 1);
- PACKET32H_TRANSPOSE_HELPER(2, 2);
- PACKET32H_TRANSPOSE_HELPER(3, 3);
-
- PACKET32H_TRANSPOSE_HELPER(1, 0);
- PACKET32H_TRANSPOSE_HELPER(2, 0);
- PACKET32H_TRANSPOSE_HELPER(3, 0);
- PACKET32H_TRANSPOSE_HELPER(2, 1);
- PACKET32H_TRANSPOSE_HELPER(3, 1);
- PACKET32H_TRANSPOSE_HELPER(3, 2);
-
- PACKET32H_TRANSPOSE_HELPER(0, 1);
- PACKET32H_TRANSPOSE_HELPER(0, 2);
- PACKET32H_TRANSPOSE_HELPER(0, 3);
- PACKET32H_TRANSPOSE_HELPER(1, 2);
- PACKET32H_TRANSPOSE_HELPER(1, 3);
- PACKET32H_TRANSPOSE_HELPER(2, 3);
-
-#undef PACKET32H_TRANSPOSE_HELPER
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i < 32; i++) {
- a.packet[i] = _mm512_castsi512_ph(f[i]);
- }
-}
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
- __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
- t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
- t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
- t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
- t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
-
- p0 = _mm512_unpacklo_epi32(t0, t2);
- p1 = _mm512_unpackhi_epi32(t0, t2);
- p2 = _mm512_unpacklo_epi32(t1, t3);
- p3 = _mm512_unpackhi_epi32(t1, t3);
-
- a0 = p0;
- a1 = p1;
- a2 = p2;
- a3 = p3;
-
- a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
- a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
-
- a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
- a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
-
- a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
- a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
-
- a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
- a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
-
- a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
- a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
-
- a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
- a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
-
- a.packet[0] = _mm512_castsi512_ph(a0);
- a.packet[1] = _mm512_castsi512_ph(a1);
- a.packet[2] = _mm512_castsi512_ph(a2);
- a.packet[3] = _mm512_castsi512_ph(a3);
-}
-
-// preverse
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
- return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
- a);
-}
-
-// pscatter
-
-template <>
-EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
- EIGEN_ALIGN64 half aux[32];
- pstore(aux, from);
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i < 32; i++) {
- to[stride * i] = aux[i];
- }
-}
-
-// pgather
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
- return _mm512_castsi512_ph(_mm512_set_epi16(
- from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
- from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
- from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
- from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
- from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
- from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
- from[1 * stride].x, from[0 * stride].x));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
-
-EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
- __m512d result = _mm512_undefined_pd();
- result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
- result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
- return _mm512_castpd_ph(result);
-}
-
-EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
- a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
- b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
-}
-
-// psin
-template <>
-EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = psin(low);
- Packet16h highOut = psin(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// pcos
-template <>
-EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = pcos(low);
- Packet16h highOut = pcos(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// plog
-template <>
-EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = plog(low);
- Packet16h highOut = plog(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// plog2
-template <>
-EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = plog2(low);
- Packet16h highOut = plog2(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// plog1p
-template <>
-EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = plog1p(low);
- Packet16h highOut = plog1p(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// pexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = pexp(low);
- Packet16h highOut = pexp(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// pexpm1
-template <>
-EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = pexpm1(low);
- Packet16h highOut = pexpm1(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// ptanh
-template <>
-EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h lowOut = ptanh(low);
- Packet16h highOut = ptanh(high);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// pfrexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h exp1 = _mm256_undefined_si256();
- Packet16h exp2 = _mm256_undefined_si256();
-
- Packet16h lowOut = pfrexp(low, exp1);
- Packet16h highOut = pfrexp(high, exp2);
-
- exponent = combine2Packet16h(exp1, exp2);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-// pldexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
- Packet16h low;
- Packet16h high;
- extract2Packet16h(a, low, high);
-
- Packet16h exp1;
- Packet16h exp2;
- extract2Packet16h(exponent, exp1, exp2);
-
- Packet16h lowOut = pldexp(low, exp1);
- Packet16h highOut = pldexp(high, exp2);
-
- return combine2Packet16h(lowOut, highOut);
-}
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_PACKET_MATH_FP16_AVX512_H
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+//
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
+#define EIGEN_PACKET_MATH_FP16_AVX512_H
+
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+typedef __m512h Packet32h;
+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
+
+template <>
+struct is_arithmetic<Packet8h> {
+ enum { value = true };
+};
+
+template <>
+struct packet_traits<half> : default_packet_traits {
+ typedef Packet32h type;
+ typedef Packet16h half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 32,
+
+ HasCmp = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasAbs2 = 0,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 1,
+ HasSetLinear = 0,
+ HasLog = 1,
+ HasLog1p = 1,
+ HasExp = 1,
+ HasExpm1 = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ // These ones should be implemented in future
+ HasBessel = 0,
+ HasNdtri = 0,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasTanh = EIGEN_FAST_MATH,
+ HasErf = 0, // EIGEN_FAST_MATH,
+ HasBlend = 0
+ };
+};
+
+template <>
+struct unpacket_traits<Packet32h> {
+ typedef Eigen::half type;
+ typedef Packet16h half;
+ enum {
+ size = 32,
+ alignment = Aligned64,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+};
+
+template <>
+struct unpacket_traits<Packet16h> {
+ typedef Eigen::half type;
+ typedef Packet8h half;
+ enum {
+ size = 16,
+ alignment = Aligned32,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+};
+
+template <>
+struct unpacket_traits<Packet8h> {
+ typedef Eigen::half type;
+ typedef Packet8h half;
+ enum {
+ size = 8,
+ alignment = Aligned16,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+};
+
+// Memory functions
+
+// pset1
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
+ // half/half_raw is bit compatible
+ return _mm512_set1_ph(numext::bit_cast<_Float16>(from));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) {
+ return _mm512_setzero_ph();
+}
+
+// pset1frombits
+template <>
+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
+ return _mm512_castsi512_ph(_mm512_set1_epi16(from));
+}
+
+// pfirst
+
+template <>
+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return half_impl::raw_uint16_to_half(
+ static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
+#else
+ Eigen::half dest[32];
+ _mm512_storeu_ph(dest, from);
+ return dest[0];
+#endif
+}
+
+// pload
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
+}
+
+// ploadu
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
+}
+
+// pstore
+
+template <>
+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
+}
+
+// pstoreu
+
+template <>
+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
+}
+
+// ploaddup
+template <>
+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
+ __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
+ return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
+ 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
+ a);
+}
+
+// ploadquad
+template <>
+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
+ __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
+ return _mm512_permutexvar_ph(
+ _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
+ a);
+}
+
+// pabs
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
+ return _mm512_abs_ph(a);
+}
+
+// psignbit
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
+ return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
+}
+
+// pmin
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_min_ph(a, b);
+}
+
+// pmax
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_max_ph(a, b);
+}
+
+// plset
+template <>
+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
+ return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
+}
+
+// por
+
+template <>
+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
+ return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pxor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
+ return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pand
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
+ return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
+}
+
+// pandnot
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
+ return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
+}
+
+// pselect
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
+ __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+ return _mm512_mask_blend_ph(mask32, a, b);
+}
+
+// pcmp_eq
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
+ __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
+ return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_le
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
+ __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
+ return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_lt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
+ __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
+ return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu)));
+}
+
+// pcmp_lt_or_nan
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
+ __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
+ return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu)));
+}
+
+// padd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_add_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
+ return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
+ return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// psub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_sub_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
+ return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
+ return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pmul
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_mul_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
+ return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
+ return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pdiv
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
+ return _mm512_div_ph(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
+ return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
+ return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
+}
+
+// pround
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
+ // Work-around for default std::round rounding mode.
+
+ // Mask for the sign bit
+ const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
+ // The largest half-preicision float less than 0.5
+ const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
+
+ return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
+}
+
+// print
+
+template <>
+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
+ return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
+}
+
+// pceil
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
+ return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
+}
+
+// pfloor
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
+ return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
+}
+
+// ptrunc
+
+template <>
+EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) {
+ return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO);
+}
+
+// predux
+template <>
+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
+ return (half)_mm512_reduce_add_ph(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
+ return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
+ return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
+}
+
+// predux_half_dowto4
+template <>
+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
+ __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
+
+ return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#else
+ Eigen::half data[32];
+ _mm512_storeu_ph(data, a);
+
+ __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
+ __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
+
+ return Packet16h(padd<Packet16h>(lowHalf, highHalf));
+#endif
+}
+
+// predux_max
+
+// predux_min
+
+// predux_mul
+
+#ifdef EIGEN_VECTORIZE_FMA
+
+// pmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+ return _mm512_fmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+ return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+ return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+ return _mm512_fmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+ return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+ return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmadd
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+ return _mm512_fnmadd_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+ return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+ return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+// pnmsub
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
+ return _mm512_fnmsub_ph(a, b, c);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
+ return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
+ return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
+}
+
+#endif
+
+// pnegate
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
+ return psub(pzero(a), a);
+}
+
+// pconj
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
+ return a;
+}
+
+// psqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
+ return _mm512_sqrt_ph(a);
+}
+
+// prsqrt
+
+template <>
+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
+ return _mm512_rsqrt_ph(a);
+}
+
+// preciprocal
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
+ return _mm512_rcp_ph(a);
+}
+
+// ptranspose
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
+ __m512i t[32];
+
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < 16; i++) {
+ t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+ t[2 * i + 1] =
+ _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
+ }
+
+ __m512i p[32];
+
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < 8; i++) {
+ p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
+ p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
+ p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
+ p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
+ }
+
+ __m512i q[32];
+
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < 4; i++) {
+ q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
+ q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
+ q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
+ q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
+ q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
+ q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
+ q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
+ q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
+ }
+
+ __m512i f[32];
+
+#define PACKET32H_TRANSPOSE_HELPER(X, Y) \
+ do { \
+ f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \
+ f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
+ f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
+ f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
+ f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
+ f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
+ f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
+ f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
+ } while (false);
+
+ PACKET32H_TRANSPOSE_HELPER(0, 0);
+ PACKET32H_TRANSPOSE_HELPER(1, 1);
+ PACKET32H_TRANSPOSE_HELPER(2, 2);
+ PACKET32H_TRANSPOSE_HELPER(3, 3);
+
+ PACKET32H_TRANSPOSE_HELPER(1, 0);
+ PACKET32H_TRANSPOSE_HELPER(2, 0);
+ PACKET32H_TRANSPOSE_HELPER(3, 0);
+ PACKET32H_TRANSPOSE_HELPER(2, 1);
+ PACKET32H_TRANSPOSE_HELPER(3, 1);
+ PACKET32H_TRANSPOSE_HELPER(3, 2);
+
+ PACKET32H_TRANSPOSE_HELPER(0, 1);
+ PACKET32H_TRANSPOSE_HELPER(0, 2);
+ PACKET32H_TRANSPOSE_HELPER(0, 3);
+ PACKET32H_TRANSPOSE_HELPER(1, 2);
+ PACKET32H_TRANSPOSE_HELPER(1, 3);
+ PACKET32H_TRANSPOSE_HELPER(2, 3);
+
+#undef PACKET32H_TRANSPOSE_HELPER
+
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < 32; i++) {
+ a.packet[i] = _mm512_castsi512_ph(f[i]);
+ }
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
+ __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
+ t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+ t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
+ t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+ t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
+
+ p0 = _mm512_unpacklo_epi32(t0, t2);
+ p1 = _mm512_unpackhi_epi32(t0, t2);
+ p2 = _mm512_unpacklo_epi32(t1, t3);
+ p3 = _mm512_unpackhi_epi32(t1, t3);
+
+ a0 = p0;
+ a1 = p1;
+ a2 = p2;
+ a3 = p3;
+
+ a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
+ a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
+
+ a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
+ a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
+
+ a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
+ a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
+
+ a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
+ a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
+
+ a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
+ a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
+
+ a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
+ a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
+
+ a.packet[0] = _mm512_castsi512_ph(a0);
+ a.packet[1] = _mm512_castsi512_ph(a1);
+ a.packet[2] = _mm512_castsi512_ph(a2);
+ a.packet[3] = _mm512_castsi512_ph(a3);
+}
+
+// preverse
+
+template <>
+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
+ return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
+ a);
+}
+
+// pscatter
+
+template <>
+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
+ EIGEN_ALIGN64 half aux[32];
+ pstore(aux, from);
+
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < 32; i++) {
+ to[stride * i] = aux[i];
+ }
+}
+
+// pgather
+
+template <>
+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
+ return _mm512_castsi512_ph(_mm512_set_epi16(
+ from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
+ from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
+ from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
+ from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
+ from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
+ from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
+ from[1 * stride].x, from[0 * stride].x));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
+template <>
+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
+
+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
+ __m512d result = _mm512_undefined_pd();
+ result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
+ result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
+ return _mm512_castpd_ph(result);
+}
+
+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
+ a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
+ b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
+}
+
+// psin
+template <>
+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = psin(low);
+ Packet16h highOut = psin(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// pcos
+template <>
+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = pcos(low);
+ Packet16h highOut = pcos(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// plog
+template <>
+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = plog(low);
+ Packet16h highOut = plog(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// plog2
+template <>
+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = plog2(low);
+ Packet16h highOut = plog2(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// plog1p
+template <>
+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = plog1p(low);
+ Packet16h highOut = plog1p(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// pexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = pexp(low);
+ Packet16h highOut = pexp(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// pexpm1
+template <>
+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = pexpm1(low);
+ Packet16h highOut = pexpm1(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// ptanh
+template <>
+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h lowOut = ptanh(low);
+ Packet16h highOut = ptanh(high);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// pfrexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h exp1 = _mm256_undefined_si256();
+ Packet16h exp2 = _mm256_undefined_si256();
+
+ Packet16h lowOut = pfrexp(low, exp1);
+ Packet16h highOut = pfrexp(high, exp2);
+
+ exponent = combine2Packet16h(exp1, exp2);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+// pldexp
+template <>
+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
+ Packet16h low;
+ Packet16h high;
+ extract2Packet16h(a, low, high);
+
+ Packet16h exp1;
+ Packet16h exp2;
+ extract2Packet16h(exponent, exp1, exp2);
+
+ Packet16h lowOut = pldexp(low, exp1);
+ Packet16h highOut = pldexp(high, exp2);
+
+ return combine2Packet16h(lowOut, highOut);
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_FP16_AVX512_H
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 8027cb5..482064e 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1514,16 +1514,13 @@
template <typename Packet>
EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet) * from) {
- EIGEN_DEBUG_ALIGNED_LOAD
-#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
EIGEN_DEBUG_UNALIGNED_LOAD
+#if defined(EIGEN_VECTORIZE_VSX)
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else
- Packet16uc MSQ, LSQ;
- Packet16uc mask;
- MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword
- LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword
- mask = vec_lvsl(0, from); // create the permute mask
+ Packet16uc MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword
+ Packet16uc LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword
+ Packet16uc mask = vec_lvsl(0, from); // create the permute mask
// TODO: Add static_cast here
return (Packet)vec_perm(MSQ, LSQ, mask); // align the data
#endif
@@ -1733,7 +1730,7 @@
template <typename Packet>
EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) {
EIGEN_DEBUG_UNALIGNED_STORE
-#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
+#if defined(EIGEN_VECTORIZE_VSX)
vec_xst(from, 0, to);
#else
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@@ -2069,7 +2066,7 @@
input = padd<Packet4ui>(input, rounding_bias);
const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
-#ifdef _ARCH_PWR9
+#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
Packet4bi nan_selector = vec_test_data_class(p4f, __VEC_CLASS_FP_NAN);
input = vec_sel(input, p4ui_nan, nan_selector);
@@ -2178,7 +2175,7 @@
Packet8bi rounding_bias = vec_cmplt(lsb, p4f2);
Packet8us input = psub<Packet8us>(p4f, reinterpret_cast<Packet8us>(rounding_bias));
-#ifdef _ARCH_PWR9
+#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
Packet4bi nan_selector_lo = vec_test_data_class(lo, __VEC_CLASS_FP_NAN);
Packet4bi nan_selector_hi = vec_test_data_class(hi, __VEC_CLASS_FP_NAN);
Packet8us nan_selector =
@@ -3400,9 +3397,17 @@
return reinterpret_cast<Packet2d>(vec_cmpeq(a, b));
}
template <>
+#ifdef __POWER8_VECTOR__
EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
return reinterpret_cast<Packet2l>(vec_cmpeq(a, b));
}
+#else
+EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
+ Packet4i halves = reinterpret_cast<Packet4i>(vec_cmpeq(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(b)));
+ Packet4i flipped = vec_perm(halves, halves, p16uc_COMPLEX32_REV);
+ return reinterpret_cast<Packet2l>(pand(halves, flipped));
+}
+#endif
template <>
EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) {
Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a, b));
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index a8cb228..95697f3 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -522,11 +522,6 @@
#endif
}
-union float32_bits {
- unsigned int u;
- float f;
-};
-
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
(defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
@@ -549,46 +544,45 @@
return h;
#else
- float32_bits f;
- f.f = ff;
-
- const float32_bits f32infty = {255 << 23};
- const float32_bits f16max = {(127 + 16) << 23};
- const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
- unsigned int sign_mask = 0x80000000u;
+ uint32_t f_bits = Eigen::numext::bit_cast<uint32_t>(ff);
+ const uint32_t f32infty_bits = {255 << 23};
+ const uint32_t f16max_bits = {(127 + 16) << 23};
+ const uint32_t denorm_magic_bits = {((127 - 15) + (23 - 10) + 1) << 23};
+ const uint32_t sign_mask = 0x80000000u;
__half_raw o;
- o.x = static_cast<numext::uint16_t>(0x0u);
+ o.x = static_cast<uint16_t>(0x0u);
- unsigned int sign = f.u & sign_mask;
- f.u ^= sign;
+ const uint32_t sign = f_bits & sign_mask;
+ f_bits ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
- if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
- o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
- } else { // (De)normalized number or zero
- if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
+ if (f_bits >= f16max_bits) { // result is Inf or NaN (all exponent bits set)
+ o.x = (f_bits > f32infty_bits) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+ } else { // (De)normalized number or zero
+ if (f_bits < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
- f.f += denorm_magic.f;
+ f_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(f_bits) +
+ Eigen::numext::bit_cast<float>(denorm_magic_bits));
// and one integer subtract of the bias later, we have our final float!
- o.x = static_cast<numext::uint16_t>(f.u - denorm_magic.u);
+ o.x = static_cast<numext::uint16_t>(f_bits - denorm_magic_bits);
} else {
- unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+ const uint32_t mant_odd = (f_bits >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
// Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
// without arithmetic overflow.
- f.u += 0xc8000fffU;
+ f_bits += 0xc8000fffU;
// rounding bias part 2
- f.u += mant_odd;
+ f_bits += mant_odd;
// take the bits!
- o.x = static_cast<numext::uint16_t>(f.u >> 13);
+ o.x = static_cast<numext::uint16_t>(f_bits >> 13);
}
}
@@ -611,24 +605,23 @@
#elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
return static_cast<float>(h.x);
#else
- const float32_bits magic = {113 << 23};
- const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
- float32_bits o;
-
- o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
- unsigned int exp = shifted_exp & o.u; // just the exponent
- o.u += (127 - 15) << 23; // exponent adjust
+ const float magic = Eigen::numext::bit_cast<float>(static_cast<uint32_t>(113 << 23));
+ const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift
+ uint32_t o_bits = (h.x & 0x7fff) << 13; // exponent/mantissa bits
+ const uint32_t exp = shifted_exp & o_bits; // just the exponent
+ o_bits += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
- if (exp == shifted_exp) { // Inf/NaN?
- o.u += (128 - 16) << 23; // extra exp adjust
- } else if (exp == 0) { // Zero/Denormal?
- o.u += 1 << 23; // extra exp adjust
- o.f -= magic.f; // renormalize
+ if (exp == shifted_exp) { // Inf/NaN?
+ o_bits += (128 - 16) << 23; // extra exp adjust
+ } else if (exp == 0) { // Zero/Denormal?
+ o_bits += 1 << 23; // extra exp adjust
+ // renormalize
+ o_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(o_bits) - magic);
}
- o.u |= (h.x & 0x8000) << 16; // sign bit
- return o.f;
+ o_bits |= (h.x & 0x8000) << 16; // sign bit
+ return Eigen::numext::bit_cast<float>(o_bits);
#endif
}
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index fcde64a..2488be4 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -497,7 +497,7 @@
namespace internal {
template <typename Scalar>
struct stem_function {
- typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
} // namespace internal
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 2e3f59e..39a117e 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -698,6 +698,12 @@
}
template <typename A, typename B>
+inline constexpr int size_prefer_fixed(A a, B b) {
+ plain_enum_asserts(a, b);
+ return int(a) == Dynamic ? int(b) : int(a);
+}
+
+template <typename A, typename B>
inline constexpr bool enum_eq_not_dynamic(A a, B b) {
plain_enum_asserts(a, b);
if ((int)a == Dynamic || (int)b == Dynamic) return false;
@@ -745,6 +751,9 @@
constexpr bool is_constant_evaluated() { return false; }
#endif
+template <typename Scalar>
+using make_complex_t = std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar>>;
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index cecbee8..a42bb0f 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -885,8 +885,12 @@
};
template <typename T, bool Vectorized>
-struct scalar_div_cost<std::complex<T>, Vectorized> {
- enum { value = 2 * scalar_div_cost<T>::value + 6 * NumTraits<T>::MulCost + 3 * NumTraits<T>::AddCost };
+struct scalar_div_cost<T, Vectorized, std::enable_if_t<NumTraits<T>::IsComplex>> {
+ using RealScalar = typename NumTraits<T>::Real;
+ enum {
+ value =
+ 2 * scalar_div_cost<RealScalar>::value + 6 * NumTraits<RealScalar>::MulCost + 3 * NumTraits<RealScalar>::AddCost
+ };
};
template <bool Vectorized>
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 60a24a8..50fa3b8 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -70,7 +70,7 @@
* \c float or \c double) and just \c Scalar if #Scalar is
* complex.
*/
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
/** \brief Type for vector of eigenvalues as returned by eigenvalues().
*
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index a33e46e..22433f2 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -75,7 +75,7 @@
* \c float or \c double) and just \c Scalar if #Scalar is
* complex.
*/
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
/** \brief Type for the matrices in the Schur decomposition.
*
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index f73d58f..9dba7bd 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -89,7 +89,7 @@
* \c float or \c double) and just \c Scalar if #Scalar is
* complex.
*/
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
/** \brief Type for vector of eigenvalues as returned by eigenvalues().
*
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index b114cfa..c0a61dc 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -83,7 +83,7 @@
* \c float or \c double) and just \c Scalar if #Scalar is
* complex.
*/
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
/** \brief Type for vector of real scalar values eigenvalues as returned by betas().
*
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index 3466f51..5915387 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -69,7 +69,7 @@
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename MatrixType::Scalar Scalar;
- typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 5cef658..54a74e2 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -66,7 +66,7 @@
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename MatrixType::Scalar Scalar;
- typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index e3154b4..8fdeb84 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -31,8 +31,6 @@
template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, const Preconditioner& precond, Index& iters,
typename Dest::RealScalar& tol_error) {
- using std::abs;
- using std::sqrt;
typedef typename Dest::RealScalar RealScalar;
typedef typename Dest::Scalar Scalar;
typedef Matrix<Scalar, Dynamic, 1> VectorType;
@@ -43,14 +41,15 @@
VectorType r = rhs - mat * x;
VectorType r0 = r;
- RealScalar r0_sqnorm = r0.squaredNorm();
- RealScalar rhs_sqnorm = rhs.squaredNorm();
- if (rhs_sqnorm == 0) {
+ RealScalar r0_norm = r0.stableNorm();
+ RealScalar r_norm = r0_norm;
+ RealScalar rhs_norm = rhs.stableNorm();
+ if (rhs_norm == 0) {
x.setZero();
return true;
}
Scalar rho(1);
- Scalar alpha(1);
+ Scalar alpha(0);
Scalar w(1);
VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);
@@ -59,21 +58,22 @@
VectorType s(n), t(n);
- RealScalar tol2 = tol * tol * rhs_sqnorm;
- RealScalar eps2 = NumTraits<Scalar>::epsilon() * NumTraits<Scalar>::epsilon();
+ RealScalar eps = NumTraits<Scalar>::epsilon();
Index i = 0;
Index restarts = 0;
- while (r.squaredNorm() > tol2 && i < maxIters) {
+ while (r_norm > tol && i < maxIters) {
Scalar rho_old = rho;
-
rho = r0.dot(r);
- if (abs(rho) < eps2 * r0_sqnorm) {
+ if (Eigen::numext::abs(rho) / Eigen::numext::maxi(r0_norm, r_norm) < eps * Eigen::numext::mini(r0_norm, r_norm)) {
// The new residual vector became too orthogonal to the arbitrarily chosen direction r0
// Let's restart with a new r0:
r = rhs - mat * x;
r0 = r;
- rho = r0_sqnorm = r.squaredNorm();
+ rho = r.squaredNorm();
+ r0_norm = r.stableNorm();
+ alpha = Scalar(0);
+ w = Scalar(1);
if (restarts++ == 0) i = 0;
}
Scalar beta = (rho / rho_old) * (alpha / w);
@@ -82,23 +82,38 @@
y = precond.solve(p);
v.noalias() = mat * y;
-
- alpha = rho / r0.dot(v);
+ Scalar theta = r0.dot(v);
+ // For small angles ∠(r0, v) < eps, random restart.
+ RealScalar v_norm = v.stableNorm();
+ if (Eigen::numext::abs(theta) / Eigen::numext::maxi(r0_norm, v_norm) < eps * Eigen::numext::mini(r0_norm, v_norm)) {
+ r = rhs - mat * x;
+ r0.setRandom();
+ r0_norm = r0.stableNorm();
+ rho = Scalar(1);
+ alpha = Scalar(0);
+ w = Scalar(1);
+ if (restarts++ == 0) i = 0;
+ continue;
+ }
+ alpha = rho / theta;
s = r - alpha * v;
z = precond.solve(s);
t.noalias() = mat * z;
RealScalar tmp = t.squaredNorm();
- if (tmp > RealScalar(0))
+ if (tmp > RealScalar(0)) {
w = t.dot(s) / tmp;
- else
+ } else {
w = Scalar(0);
+ }
x += alpha * y + w * z;
r = s - w * t;
+ r_norm = r.stableNorm();
++i;
}
- tol_error = sqrt(r.squaredNorm() / rhs_sqnorm);
+
+ tol_error = r_norm / rhs_norm;
iters = i;
return true;
}
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 466834a..3e57764 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -243,7 +243,10 @@
the LU decomposition.
*/
inline RealScalar rcond() const {
- eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+ eigen_assert(m_isInitialized && "FullPivLU is not initialized.");
+ if (!isInvertible()) {
+ return RealScalar(0);
+ }
return internal::rcond_estimate_helper(m_l1_norm, *this);
}
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index da89857..22cf346 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
- <!-- Fixed x Fixed Matrix -->
- <Type Name="Eigen::Matrix<*,*,*,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
- <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : $T3</Size>
- <ValuePointer>m_storage.m_data.array</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : $T3</Size>
- <ValuePointer>m_storage.m_data.array</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- 2 x 2 Matrix -->
- <Type Name="Eigen::Matrix<*,2,2,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/>
- <DisplayString>[2, 2] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- 3 x 3 Matrix -->
- <Type Name="Eigen::Matrix<*,3,3,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/>
- <DisplayString>[3, 3] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- 4 x 4 Matrix -->
- <Type Name="Eigen::Matrix<*,4,4,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/>
- <DisplayString>[4, 4] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 3]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 3]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- Dynamic x Dynamic Matrix -->
- <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Fixed x Dynamic Matrix -->
- <Type Name="Eigen::Matrix<*,*,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic x Fixed Matrix -->
- <Type Name="Eigen::Matrix<*,-1,*,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : $T2</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : $T2</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic Column Vector -->
- <Type Name="Eigen::Matrix<*,1,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
- <Expand>
- <Item Name="[size]">m_storage.m_cols</Item>
- <ArrayItems>
- <Size>m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic Row Vector -->
- <Type Name="Eigen::Matrix<*,-1,1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
- <Expand>
- <Item Name="[size]">m_storage.m_rows</Item>
- <ArrayItems>
- <Size>m_storage.m_rows</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Fixed Vector -->
- <Type Name="Eigen::Matrix<*,1,1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/>
- <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,2,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/>
- <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,3,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/>
- <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- <Item Name="[z]">m_storage.m_data.array[2]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,4,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/>
- <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- <Item Name="[z]">m_storage.m_data.array[2]</Item>
- <Item Name="[w]">m_storage.m_data.array[3]</Item>
- </Expand>
- </Type>
-
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+ <!-- Fixed x Fixed Matrix -->
+ <Type Name="Eigen::Matrix<*,*,*,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
+ <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : $T3</Size>
+ <ValuePointer>m_storage.m_data.array</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : $T3</Size>
+ <ValuePointer>m_storage.m_data.array</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- 2 x 2 Matrix -->
+ <Type Name="Eigen::Matrix<*,2,2,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/>
+ <DisplayString>[2, 2] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- 3 x 3 Matrix -->
+ <Type Name="Eigen::Matrix<*,3,3,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/>
+ <DisplayString>[3, 3] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- 4 x 4 Matrix -->
+ <Type Name="Eigen::Matrix<*,4,4,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/>
+ <DisplayString>[4, 4] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 3]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 3]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic x Dynamic Matrix -->
+ <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Fixed x Dynamic Matrix -->
+ <Type Name="Eigen::Matrix<*,*,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic x Fixed Matrix -->
+ <Type Name="Eigen::Matrix<*,-1,*,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic Column Vector -->
+ <Type Name="Eigen::Matrix<*,1,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
+ <Expand>
+ <Item Name="[size]">m_storage.m_cols</Item>
+ <ArrayItems>
+ <Size>m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic Row Vector -->
+ <Type Name="Eigen::Matrix<*,-1,1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
+ <Expand>
+ <Item Name="[size]">m_storage.m_rows</Item>
+ <ArrayItems>
+ <Size>m_storage.m_rows</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Fixed Vector -->
+ <Type Name="Eigen::Matrix<*,1,1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/>
+ <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,2,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/>
+ <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,3,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/>
+ <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ <Item Name="[z]">m_storage.m_data.array[2]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,4,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/>
+ <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ <Item Name="[z]">m_storage.m_data.array[2]</Item>
+ <Item Name="[w]">m_storage.m_data.array[3]</Item>
+ </Expand>
+ </Type>
+
+</AutoVisualizer>
diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 35ef580..273c10d 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@
-; ***************************************************************
-; * Eigen Visualizer
-; *
-; * Author: Hauke Heibel <hauke.heibel@gmail.com>
-; *
-; * Support the enhanced debugging of the following Eigen
-; * types (*: any, +:fixed dimension) :
-; *
-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
-; * - Eigen::Matrix<*,-1,-1,*,*,*>
-; * - Eigen::Matrix<*,+,-1,*,*,*>
-; * - Eigen::Matrix<*,-1,+,*,*,*>
-; * - Eigen::Matrix<*,+,+,*,*,*>
-; *
-; * Matrices are displayed properly independently of the memory
-; * alignment (RowMajor vs. ColMajor).
-; *
-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
-; * that your original autoexp.dat file is copied to a safe
-; * place before proceeding with its modification.
-; ***************************************************************
-
-[Visualizer]
-
-; Fixed size 4-vectors
-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1],
- z : ($c.m_storage.m_data.array)[2],
- w : ($c.m_storage.m_data.array)[3]
- )
- )
-
- preview
- (
- #(
- "[",
- 4,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 4),
- ")"
- )
- )
-}
-
-; Fixed size 3-vectors
-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1],
- z : ($c.m_storage.m_data.array)[2]
- )
- )
-
- preview
- (
- #(
- "[",
- 3,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 3),
- ")"
- )
- )
-}
-
-; Fixed size 2-vectors
-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1]
- )
- )
-
- preview
- (
- #(
- "[",
- 2,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 2),
- ")"
- )
- )
-}
-
-; Fixed size 1-vectors
-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0]
- )
- )
-
- preview
- (
- #(
- "[",
- 1,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 1),
- ")"
- )
- )
-}
-
-; Dynamic matrices (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,-1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.m_storage.m_rows,
- cols: $c.m_storage.m_cols,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.m_storage.m_rows,
- ",",
- $c.m_storage.m_cols,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.m_storage.m_rows*$c.m_storage.m_cols
- ),
- ")"
- )
- )
-}
-
-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,-1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.RowsAtCompileTime,
- cols: $c.m_storage.m_cols,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.RowsAtCompileTime,
- ",",
- $c.m_storage.m_cols,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.RowsAtCompileTime*$c.m_storage.m_cols
- ),
- ")"
- )
- )
-}
-
-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,*,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.m_storage.m_rows,
- cols: $c.ColsAtCompileTime,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.m_storage.m_rows,
- ",",
- $c.ColsAtCompileTime,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.m_storage.m_rows*$c.ColsAtCompileTime
- ),
- ")"
- )
- )
-}
-
-; Fixed size matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,*,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.RowsAtCompileTime,
- cols: $c.ColsAtCompileTime,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data.array)[$i],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.RowsAtCompileTime,
- ",",
- $c.ColsAtCompileTime,
- "](",
- #array(
- expr : [($c.m_storage.m_data.array)[$i],g],
- size : $c.RowsAtCompileTime*$c.ColsAtCompileTime
- ),
- ")"
- )
- )
-}
+; ***************************************************************
+; * Eigen Visualizer
+; *
+; * Author: Hauke Heibel <hauke.heibel@gmail.com>
+; *
+; * Support the enhanced debugging of the following Eigen
+; * types (*: any, +:fixed dimension) :
+; *
+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
+; * - Eigen::Matrix<*,-1,-1,*,*,*>
+; * - Eigen::Matrix<*,+,-1,*,*,*>
+; * - Eigen::Matrix<*,-1,+,*,*,*>
+; * - Eigen::Matrix<*,+,+,*,*,*>
+; *
+; * Matrices are displayed properly independently of the memory
+; * alignment (RowMajor vs. ColMajor).
+; *
+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
+; * that your original autoexp.dat file is copied to a safe
+; * place before proceeding with its modification.
+; ***************************************************************
+
+[Visualizer]
+
+; Fixed size 4-vectors
+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1],
+ z : ($c.m_storage.m_data.array)[2],
+ w : ($c.m_storage.m_data.array)[3]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 4,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 4),
+ ")"
+ )
+ )
+}
+
+; Fixed size 3-vectors
+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1],
+ z : ($c.m_storage.m_data.array)[2]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 3,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 3),
+ ")"
+ )
+ )
+}
+
+; Fixed size 2-vectors
+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 2,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 2),
+ ")"
+ )
+ )
+}
+
+; Fixed size 1-vectors
+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 1,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 1),
+ ")"
+ )
+ )
+}
+
+; Dynamic matrices (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,-1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.m_storage.m_rows,
+ cols: $c.m_storage.m_cols,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.m_storage.m_rows,
+ ",",
+ $c.m_storage.m_cols,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.m_storage.m_rows*$c.m_storage.m_cols
+ ),
+ ")"
+ )
+ )
+}
+
+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,-1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.RowsAtCompileTime,
+ cols: $c.m_storage.m_cols,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.RowsAtCompileTime,
+ ",",
+ $c.m_storage.m_cols,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.RowsAtCompileTime*$c.m_storage.m_cols
+ ),
+ ")"
+ )
+ )
+}
+
+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,*,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.m_storage.m_rows,
+ cols: $c.ColsAtCompileTime,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.m_storage.m_rows,
+ ",",
+ $c.ColsAtCompileTime,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.m_storage.m_rows*$c.ColsAtCompileTime
+ ),
+ ")"
+ )
+ )
+}
+
+; Fixed size matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,*,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.RowsAtCompileTime,
+ cols: $c.ColsAtCompileTime,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data.array)[$i],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.RowsAtCompileTime,
+ ",",
+ $c.ColsAtCompileTime,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data.array)[$i],g],
+ size : $c.RowsAtCompileTime*$c.ColsAtCompileTime
+ ),
+ ")"
+ )
+ )
+}
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 439336c..0e582a7 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -112,11 +112,13 @@
DISABLE_INDEX = YES
FULL_SIDEBAR = NO
ENUM_VALUES_PER_LINE = 1
-FORMULA_FONTSIZE = 12
+USE_MATHJAX = @EIGEN_DOXY_USE_MATHJAX@
MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2
MATHJAX_EXTENSIONS = TeX/AMSmath \
TeX/AMSsymbols
GENERATE_LATEX = NO
+EXTRA_PACKAGES = amssymb \
+ amsmath
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = YES
PREDEFINED = EIGEN_EMPTY_STRUCT \
@@ -160,6 +162,7 @@
TAGFILES = ${EIGEN_DOXY_TAGFILES}
GENERATE_TAGFILE = ${Eigen_BINARY_DIR}/doc/${EIGEN_DOXY_PROJECT_NAME}.doxytags
EXTERNAL_GROUPS = NO
+EXTERNAL_PAGES = NO
HIDE_UNDOC_RELATIONS = NO
HAVE_DOT = YES
COLLABORATION_GRAPH = NO
@@ -172,3 +175,4 @@
DOT_GRAPH_MAX_NODES = 300
GENERATE_DEPRECATEDLIST = NO
GENERATE_TODOLIST = NO
+WARN_AS_ERROR = FAIL_ON_WARNINGS_PRINT
diff --git a/test/CustomComplex.h b/test/CustomComplex.h
new file mode 100644
index 0000000..048f65b
--- /dev/null
+++ b/test/CustomComplex.h
@@ -0,0 +1,132 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2025 The Eigen Authors.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TEST_CUSTOM_COMPLEX_H
+#define EIGEN_TEST_CUSTOM_COMPLEX_H
+
+#include <ostream>
+#include <sstream>
+
+namespace custom_complex {
+
+template <typename Real>
+struct CustomComplex {
+ CustomComplex() : re{0}, im{0} {}
+ CustomComplex(const CustomComplex& other) = default;
+ CustomComplex(CustomComplex&& other) = default;
+ CustomComplex& operator=(const CustomComplex& other) = default;
+ CustomComplex& operator=(CustomComplex&& other) = default;
+ CustomComplex(Real x) : re{x}, im{0} {}
+ CustomComplex(Real x, Real y) : re{x}, im{y} {}
+
+ CustomComplex operator+(const CustomComplex& other) const { return CustomComplex(re + other.re, im + other.im); }
+
+ CustomComplex operator-() const { return CustomComplex(-re, -im); }
+
+ CustomComplex operator-(const CustomComplex& other) const { return CustomComplex(re - other.re, im - other.im); }
+
+ CustomComplex operator*(const CustomComplex& other) const {
+ return CustomComplex(re * other.re - im * other.im, re * other.im + im * other.re);
+ }
+
+ CustomComplex operator/(const CustomComplex& other) const {
+ // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),
+ // guards against over/under-flow.
+ const bool scale_imag = numext::abs(other.im) <= numext::abs(other.re);
+ const Real rscale = scale_imag ? Real(1) : other.re / other.im;
+ const Real iscale = scale_imag ? other.im / other.re : Real(1);
+ const Real denominator = other.re * rscale + other.im * iscale;
+ return CustomComplex((re * rscale + im * iscale) / denominator, (im * rscale - re * iscale) / denominator);
+ }
+
+ CustomComplex& operator+=(const CustomComplex& other) {
+ *this = *this + other;
+ return *this;
+ }
+ CustomComplex& operator-=(const CustomComplex& other) {
+ *this = *this - other;
+ return *this;
+ }
+ CustomComplex& operator*=(const CustomComplex& other) {
+ *this = *this * other;
+ return *this;
+ }
+ CustomComplex& operator/=(const CustomComplex& other) {
+ *this = *this / other;
+ return *this;
+ }
+
+ bool operator==(const CustomComplex& other) const {
+ return numext::equal_strict(re, other.re) && numext::equal_strict(im, other.im);
+ }
+ bool operator!=(const CustomComplex& other) const { return !(*this == other); }
+
+ friend CustomComplex operator+(const Real& a, const CustomComplex& b) { return CustomComplex(a + b.re, b.im); }
+
+ friend CustomComplex operator-(const Real& a, const CustomComplex& b) { return CustomComplex(a - b.re, -b.im); }
+
+ friend CustomComplex operator*(const Real& a, const CustomComplex& b) { return CustomComplex(a * b.re, a * b.im); }
+
+ friend CustomComplex operator*(const CustomComplex& a, const Real& b) { return CustomComplex(a.re * b, a.im * b); }
+
+ friend CustomComplex operator/(const CustomComplex& a, const Real& b) { return CustomComplex(a.re / b, a.im / b); }
+
+ friend std::ostream& operator<<(std::ostream& stream, const CustomComplex& x) {
+ std::stringstream ss;
+ ss << "(" << x.re << ", " << x.im << ")";
+ stream << ss.str();
+ return stream;
+ }
+
+ Real re;
+ Real im;
+};
+
+template <typename Real>
+Real real(const CustomComplex<Real>& x) {
+ return x.re;
+}
+template <typename Real>
+Real imag(const CustomComplex<Real>& x) {
+ return x.im;
+}
+template <typename Real>
+CustomComplex<Real> conj(const CustomComplex<Real>& x) {
+ return CustomComplex<Real>(x.re, -x.im);
+}
+template <typename Real>
+CustomComplex<Real> sqrt(const CustomComplex<Real>& x) {
+ return Eigen::internal::complex_sqrt(x);
+}
+template <typename Real>
+Real abs(const CustomComplex<Real>& x) {
+ return Eigen::numext::sqrt(x.re * x.re + x.im * x.im);
+}
+
+} // namespace custom_complex
+
+template <typename Real>
+using CustomComplex = custom_complex::CustomComplex<Real>;
+
+namespace Eigen {
+template <typename Real>
+struct NumTraits<CustomComplex<Real>> : NumTraits<Real> {
+ enum { IsComplex = 1 };
+};
+
+namespace numext {
+template <typename Real>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const CustomComplex<Real>& x) {
+ return (numext::isfinite)(x.re) && (numext::isfinite)(x.im);
+}
+
+} // namespace numext
+} // namespace Eigen
+
+#endif // EIGEN_TEST_CUSTOM_COMPLEX_H
diff --git a/test/bicgstab.cpp b/test/bicgstab.cpp
index 3f53e3e..7ff2f3d 100644
--- a/test/bicgstab.cpp
+++ b/test/bicgstab.cpp
@@ -26,8 +26,63 @@
// CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ssor) );
}
+// https://gitlab.com/libeigen/eigen/-/issues/2856
+void test_2856() {
+ Eigen::MatrixXd D = Eigen::MatrixXd::Identity(14, 14);
+ D(6, 13) = 1;
+ D(13, 12) = 1;
+ using CSRMatrix = Eigen::SparseMatrix<double, Eigen::RowMajor>;
+ CSRMatrix A = D.sparseView();
+
+ Eigen::VectorXd b = Eigen::VectorXd::Zero(14);
+ b(12) = -1001;
+
+ Eigen::BiCGSTAB<CSRMatrix> solver;
+ solver.compute(A);
+ Eigen::VectorXd x = solver.solve(b);
+ Eigen::VectorXd expected = Eigen::VectorXd::Zero(14);
+ expected(6) = -1001;
+ expected(12) = -1001;
+ expected(13) = 1001;
+ VERIFY_IS_EQUAL(x, expected);
+
+ Eigen::VectorXd residual = b - A * x;
+ VERIFY(residual.isZero());
+}
+
+// https://gitlab.com/libeigen/eigen/-/issues/2899
+void test_2899() {
+ Eigen::MatrixXd A(4, 4);
+ A(0, 0) = 1;
+ A(1, 0) = -1.0 / 6;
+ A(1, 1) = 2.0 / 3;
+ A(1, 2) = -1.0 / 6;
+ A(1, 3) = -1.0 / 3;
+ A(2, 1) = -1.0 / 3;
+ A(2, 2) = 1;
+ A(2, 3) = -2.0 / 3;
+ A(3, 1) = -1.0 / 3;
+ A(3, 2) = -1.0 / 3;
+ A(3, 3) = 2.0 / 3;
+ Eigen::VectorXd b(4);
+ b(0) = 0;
+ b(1) = 1;
+ b(2) = 1;
+ b(3) = 1;
+ Eigen::BiCGSTAB<Eigen::MatrixXd> solver;
+ solver.compute(A);
+ Eigen::VectorXd x = solver.solve(b);
+ Eigen::VectorXd expected(4);
+ expected << 0, 15, 18, 18;
+ VERIFY_IS_APPROX(x, expected);
+ Eigen::VectorXd residual = b - A * x;
+ VERIFY(residual.isZero());
+}
+
EIGEN_DECLARE_TEST(bicgstab) {
CALL_SUBTEST_1((test_bicgstab_T<double, int>()));
CALL_SUBTEST_2((test_bicgstab_T<std::complex<double>, int>()));
CALL_SUBTEST_3((test_bicgstab_T<double, long int>()));
+ CALL_SUBTEST_4(test_2856());
+ CALL_SUBTEST_5(test_2899());
}
diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp
index afb24b9..76846a9 100644
--- a/test/eigensolver_complex.cpp
+++ b/test/eigensolver_complex.cpp
@@ -12,6 +12,7 @@
#include <limits>
#include <Eigen/Eigenvalues>
#include <Eigen/LU>
+#include "CustomComplex.h"
template <typename MatrixType>
bool find_pivot(typename MatrixType::Scalar tol, MatrixType& diffs, Index col = 0) {
@@ -165,5 +166,8 @@
// Test problem size constructors
CALL_SUBTEST_5(ComplexEigenSolver<MatrixXf> tmp(s));
+ // Test custom complex scalar type.
+ CALL_SUBTEST_6(eigensolver(Matrix<CustomComplex<double>, 5, 5>()));
+
TEST_SET_BUT_UNUSED_VARIABLE(s)
}
diff --git a/test/lu.cpp b/test/lu.cpp
index 1792c2b..b20bcfc 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -195,6 +195,29 @@
VERIFY_RAISES_ASSERT(plu.inverse())
}
+// Rank-deficient matrix returns 0.
+// https://gitlab.com/libeigen/eigen/-/issues/2889
+void test_2889() {
+ Eigen::MatrixXd A =
+ Eigen::MatrixXd({{0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000,
+ 0.34149999916553497, 0.0000000000000000, 0.79877008515664061},
+ {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.29200000315904617,
+ 0.0000000000000000, -0.37149999849498272, -0.16425902650844920},
+ {0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000,
+ 0.34149999916553497, 0.0000000000000000, 0.79877008515664061},
+ {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.040500000119209290,
+ 0.0000000000000000, -0.30099999904632568, -0.081170580429391403},
+ {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000,
+ 0.0000000000000000, 0.0000000000000000, -0.0000000000000000},
+ {0.0000000000000000, 0.70710689672598170, 0.70710666564709435, 0.027000000700354562,
+ 0.025455838867477515, -0.025455847186317101, -0.0068972271572272821},
+ {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000,
+ 0.0000000000000000, 0.0000000000000000, -0.0000000000000000}});
+ Eigen::FullPivLU<Eigen::MatrixXd> lu_factorization(A);
+ double rcond = lu_factorization.rcond();
+ VERIFY_IS_EQUAL(rcond, 0.0);
+}
+
EIGEN_DECLARE_TEST(lu) {
for (int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1(lu_non_invertible<Matrix3f>());
@@ -229,7 +252,9 @@
CALL_SUBTEST_7((lu_non_invertible<Matrix<float, Dynamic, 16> >()));
// Test problem size constructors
- CALL_SUBTEST_9(PartialPivLU<MatrixXf>(10));
- CALL_SUBTEST_9(FullPivLU<MatrixXf>(10, 20););
+ CALL_SUBTEST_8(PartialPivLU<MatrixXf>(10));
+ CALL_SUBTEST_8(FullPivLU<MatrixXf>(10, 20););
+
+ CALL_SUBTEST_9(test_2889());
}
}
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index dc1a5c7..724fa40 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -251,6 +251,24 @@
(Matrix1::Flags & RowMajorBit) ? PacketSize : 2 > (1, 2), DefaultTraversal, CompleteUnrolling));
}
+ // the actual packet type used by the assignment evaluator is not necessarily PacketType for small fixed-size arrays
+ if (internal::unpacket_traits<typename internal::find_best_packet<Scalar, 2>::type>::size > 2) {
+ // the expression should not be vectorized if the size is too small
+ using Vector2 = Matrix<Scalar, 2, 1, ColMajor>;
+ using VectorMax3 = Matrix<Scalar, Dynamic, 1, ColMajor, 3, 1>;
+ VERIFY(test_assign(Vector2(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+ VERIFY(test_assign(VectorMax3(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+ VERIFY(test_assign(Vector2(), VectorMax3(), LinearTraversal, InnerUnrolling + CompleteUnrolling));
+ VERIFY(test_assign(VectorMax3(), VectorMax3(), LinearTraversal, NoUnrolling));
+ }
+
+ if (PacketSize > 1 && PacketSize < 8) {
+ // the size of the expression should be deduced at compile time by considering both the lhs and rhs
+ using Lhs = Matrix<Scalar, 7, Dynamic, ColMajor>;
+ using Rhs = Matrix<Scalar, Dynamic, 7, ColMajor>;
+ VERIFY(test_assign(Lhs(), Rhs(), -1, InnerUnrolling + CompleteUnrolling));
+ }
+
VERIFY(
test_redux(Matrix44c().template block<2 * PacketSize, 1>(1, 2), LinearVectorizedTraversal, CompleteUnrolling));
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 2a59530..aad1647 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -27,6 +27,10 @@
static constexpr int NumDimensions = XprTraits::NumDimensions;
static constexpr int Layout = XprTraits::Layout;
typedef typename XprTraits::PointerType PointerType;
+ enum {
+ // Broadcast is read-only.
+ Flags = traits<XprType>::Flags & ~LvalueBit
+ };
};
template <typename Broadcast, typename XprType>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index 7521559..b9d6f37 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -15,7 +15,7 @@
namespace Eigen {
-template <bool NeedUprade>
+template <bool IsReal>
struct MakeComplex {
template <typename T>
EIGEN_DEVICE_FUNC T operator()(const T& val) const {
@@ -26,16 +26,8 @@
template <>
struct MakeComplex<true> {
template <typename T>
- EIGEN_DEVICE_FUNC std::complex<T> operator()(const T& val) const {
- return std::complex<T>(val, 0);
- }
-};
-
-template <>
-struct MakeComplex<false> {
- template <typename T>
- EIGEN_DEVICE_FUNC std::complex<T> operator()(const std::complex<T>& val) const {
- return val;
+ EIGEN_DEVICE_FUNC internal::make_complex_t<T> operator()(const T& val) const {
+ return internal::make_complex_t<T>(val, T(0));
}
};
@@ -49,17 +41,17 @@
template <>
struct PartOf<RealPart> {
- template <typename T>
- T operator()(const std::complex<T>& val) const {
- return val.real();
+ template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
+ typename NumTraits<T>::Real operator()(const T& val) const {
+ return Eigen::numext::real(val);
}
};
template <>
struct PartOf<ImagPart> {
- template <typename T>
- T operator()(const std::complex<T>& val) const {
- return val.imag();
+ template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
+ typename NumTraits<T>::Real operator()(const T& val) const {
+ return Eigen::numext::imag(val);
}
};
@@ -67,8 +59,9 @@
template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
typedef traits<XprType> XprTraits;
- typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
+ typedef typename XprTraits::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef make_complex_t<Scalar> ComplexScalar;
typedef typename XprTraits::Scalar InputScalar;
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
OutputScalar;
@@ -109,7 +102,7 @@
public:
typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
OutputScalar;
typedef OutputScalar CoeffReturnType;
@@ -137,7 +130,7 @@
typedef DSizes<Index, NumDims> Dimensions;
typedef typename XprType::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
typedef internal::traits<XprType> XprTraits;
typedef typename XprTraits::Scalar InputScalar;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
index 7061f51..e12923d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -52,7 +52,13 @@
typedef TensorEvaluator<Expr, Device> EvalType;
TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
- m_dims = m_impl.dimensions();
+ EIGEN_STATIC_ASSERT(
+ internal::array_size<Dimensions>::value == internal::array_size<typename EvalType::Dimensions>::value,
+ "Dimension sizes must match.");
+ const auto& other_dims = m_impl.dimensions();
+ for (std::size_t i = 0; i < m_dims.size(); ++i) {
+ m_dims[i] = other_dims[i];
+ }
m_impl.evalSubExprsIfNeeded(NULL);
}
virtual ~TensorLazyEvaluatorReadOnly() { m_impl.cleanup(); }
@@ -86,14 +92,12 @@
EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { return this->m_impl.coeffRef(index); }
};
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value),
- TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
- TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > {
+template <typename Dimensions, typename Expr, typename Device, bool IsWritable>
+class TensorLazyEvaluator : public std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
+ TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>> {
public:
- typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value),
- TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
- TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >
+ typedef std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
+ TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>>
Base;
typedef typename Base::Scalar Scalar;
@@ -101,24 +105,15 @@
virtual ~TensorLazyEvaluator() {}
};
-} // namespace internal
-
-/** \class TensorRef
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A reference to a tensor expression
- * The expression will be evaluated lazily (as much as possible).
- *
- */
-template <typename PlainObjectType>
-class TensorRef : public TensorBase<TensorRef<PlainObjectType> > {
+template <typename Derived>
+class TensorRefBase : public TensorBase<Derived> {
public:
- typedef TensorRef<PlainObjectType> Self;
+ typedef typename traits<Derived>::PlainObjectType PlainObjectType;
typedef typename PlainObjectType::Base Base;
- typedef typename Eigen::internal::nested<Self>::type Nested;
- typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
- typedef typename internal::traits<PlainObjectType>::Index Index;
- typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
+ typedef typename Eigen::internal::nested<Derived>::type Nested;
+ typedef typename traits<PlainObjectType>::StorageKind StorageKind;
+ typedef typename traits<PlainObjectType>::Index Index;
+ typedef typename traits<PlainObjectType>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename Base::CoeffReturnType CoeffReturnType;
typedef Scalar* PointerType;
@@ -138,33 +133,17 @@
};
//===- Tensor block evaluation strategy (see TensorBlock.h) -----------===//
- typedef internal::TensorBlockNotImplemented TensorBlock;
+ typedef TensorBlockNotImplemented TensorBlock;
//===------------------------------------------------------------------===//
- EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {}
+ EIGEN_STRONG_INLINE TensorRefBase() : m_evaluator(NULL) {}
- template <typename Expression>
- EIGEN_STRONG_INLINE TensorRef(const Expression& expr)
- : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) {
- m_evaluator->incrRefCount();
- }
-
- template <typename Expression>
- EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
- unrefEvaluator();
- m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice());
- m_evaluator->incrRefCount();
- return *this;
- }
-
- ~TensorRef() { unrefEvaluator(); }
-
- TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) {
+ TensorRefBase(const TensorRefBase& other) : TensorBase<Derived>(other), m_evaluator(other.m_evaluator) {
eigen_assert(m_evaluator->refCount() > 0);
m_evaluator->incrRefCount();
}
- TensorRef& operator=(const TensorRef& other) {
+ TensorRefBase& operator=(const TensorRefBase& other) {
if (this != &other) {
unrefEvaluator();
m_evaluator = other.m_evaluator;
@@ -174,6 +153,28 @@
return *this;
}
+ template <typename Expression,
+ typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>>
+ EIGEN_STRONG_INLINE TensorRefBase(const Expression& expr)
+ : m_evaluator(new TensorLazyEvaluator<Dimensions, Expression, DefaultDevice,
+ /*IsWritable=*/!std::is_const<PlainObjectType>::value &&
+ bool(is_lvalue<Expression>::value)>(expr, DefaultDevice())) {
+ m_evaluator->incrRefCount();
+ }
+
+ template <typename Expression,
+ typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>>
+ EIGEN_STRONG_INLINE TensorRefBase& operator=(const Expression& expr) {
+ unrefEvaluator();
+ m_evaluator = new TensorLazyEvaluator < Dimensions, Expression, DefaultDevice,
+ /*IsWritable=*/!std::is_const<PlainObjectType>::value&& bool(is_lvalue<Expression>::value) >
+ (expr, DefaultDevice());
+ m_evaluator->incrRefCount();
+ return *this;
+ }
+
+ ~TensorRefBase() { unrefEvaluator(); }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); }
@@ -188,12 +189,6 @@
const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
return coeff(indices);
}
- template <typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) {
- const std::size_t num_indices = (sizeof...(otherIndices) + 1);
- const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
- return coeffRef(indices);
- }
template <std::size_t NumIndices>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const {
@@ -212,6 +207,70 @@
}
return m_evaluator->coeff(index);
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); }
+
+ protected:
+ TensorLazyBaseEvaluator<Dimensions, Scalar>* evaluator() { return m_evaluator; }
+
+ private:
+ EIGEN_STRONG_INLINE void unrefEvaluator() {
+ if (m_evaluator) {
+ m_evaluator->decrRefCount();
+ if (m_evaluator->refCount() == 0) {
+ delete m_evaluator;
+ }
+ }
+ }
+
+ TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
+};
+
+} // namespace internal
+
+/**
+ * \ingroup CXX11_Tensor_Module
+ *
+ * \brief A reference to a tensor expression
+ * The expression will be evaluated lazily (as much as possible).
+ *
+ */
+template <typename PlainObjectType>
+class TensorRef : public internal::TensorRefBase<TensorRef<PlainObjectType>> {
+ typedef internal::TensorRefBase<TensorRef<PlainObjectType>> Base;
+
+ public:
+ using Scalar = typename Base::Scalar;
+ using Dimensions = typename Base::Dimensions;
+
+ EIGEN_STRONG_INLINE TensorRef() : Base() {}
+
+ template <typename Expression>
+ EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {
+ EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
+ "Expression must be mutable to create a mutable TensorRef<Expression>. Did you mean "
+ "TensorRef<const Expression>?)");
+ }
+
+ template <typename Expression>
+ EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
+ EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value,
+ "Expression must be mutable to create a mutable TensorRef<Expression>. Did you mean "
+ "TensorRef<const Expression>?)");
+ return Base::operator=(expr).derived();
+ }
+
+ TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
+
+ template <typename... IndexTypes>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) {
+ const std::size_t num_indices = (sizeof...(otherIndices) + 1);
+ const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
+ return coeffRef(indices);
+ }
+
template <std::size_t NumIndices>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) {
const Dimensions& dims = this->dimensions();
@@ -227,24 +286,37 @@
index = index * dims[i] + indices[i];
}
}
- return m_evaluator->coeffRef(index);
+ return Base::evaluator()->coeffRef(index);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return Base::evaluator()->coeffRef(index); }
+};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); }
+/**
+ * \ingroup CXX11_Tensor_Module
+ *
+ * \brief A reference to a constant tensor expression
+ * The expression will be evaluated lazily (as much as possible).
+ *
+ */
+template <typename PlainObjectType>
+class TensorRef<const PlainObjectType> : public internal::TensorRefBase<TensorRef<const PlainObjectType>> {
+ typedef internal::TensorRefBase<TensorRef<const PlainObjectType>> Base;
- private:
- EIGEN_STRONG_INLINE void unrefEvaluator() {
- if (m_evaluator) {
- m_evaluator->decrRefCount();
- if (m_evaluator->refCount() == 0) {
- delete m_evaluator;
- }
- }
+ public:
+ EIGEN_STRONG_INLINE TensorRef() : Base() {}
+
+ template <typename Expression>
+ EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {}
+
+ template <typename Expression>
+ EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) {
+ return Base::operator=(expr).derived();
}
- internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
+ TensorRef(const TensorRef& other) : Base(other) {}
+
+ TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); }
};
// evaluator for rvalues
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 017b4ff..f5954d6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -95,8 +95,9 @@
typedef typename MakePointer<Scalar>::Type PointerType;
};
-template <typename PlainObjectType>
-struct traits<TensorRef<PlainObjectType> > : public traits<PlainObjectType> {
+template <typename PlainObjectType_>
+struct traits<TensorRef<PlainObjectType_> > : public traits<PlainObjectType_> {
+ typedef PlainObjectType_ PlainObjectType;
typedef traits<PlainObjectType> BaseTraits;
typedef typename BaseTraits::Scalar Scalar;
typedef typename BaseTraits::StorageKind StorageKind;
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 182bd2e..6f6df3e 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -111,12 +111,13 @@
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename MatrixType::RealScalar RealScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Preconditioner_ Preconditioner;
typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
typedef Matrix<RealScalar, Dynamic, Dynamic> DenseRealMatrix;
typedef Matrix<Scalar, Dynamic, 1> DenseVector;
typedef Matrix<RealScalar, Dynamic, 1> DenseRealVector;
- typedef Matrix<std::complex<RealScalar>, Dynamic, 1> ComplexVector;
+ typedef Matrix<ComplexScalar, Dynamic, 1> ComplexVector;
/** Default constructor. */
DGMRES()
@@ -389,15 +390,15 @@
Index j = 0;
while (j < it - 1) {
if (T(j + 1, j) == Scalar(0)) {
- eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0));
+ eig(j) = ComplexScalar(T(j, j), RealScalar(0));
j++;
} else {
- eig(j) = std::complex<RealScalar>(T(j, j), T(j + 1, j));
- eig(j + 1) = std::complex<RealScalar>(T(j, j + 1), T(j + 1, j + 1));
+ eig(j) = ComplexScalar(T(j, j), T(j + 1, j));
+ eig(j + 1) = ComplexScalar(T(j, j + 1), T(j + 1, j + 1));
j++;
}
}
- if (j < it - 1) eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0));
+ if (j < it - 1) eig(j) = ComplexScalar(T(j, j), RealScalar(0));
return eig;
}
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index ff955e1..a28aa96 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -23,8 +23,10 @@
*
* This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$.
*/
-template <typename RealScalar>
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
struct MatrixExponentialScalingOp {
+ using RealScalar = typename NumTraits<Scalar>::Real;
+
/** \brief Constructor.
*
* \param[in] squarings The integer \f$ s \f$ in this document.
@@ -35,20 +37,30 @@
*
* \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
*/
- inline const RealScalar operator()(const RealScalar& x) const {
+ inline const Scalar operator()(const Scalar& x) const {
using std::ldexp;
- return ldexp(x, -m_squarings);
+ return Scalar(ldexp(Eigen::numext::real(x), -m_squarings), ldexp(Eigen::numext::imag(x), -m_squarings));
}
- typedef std::complex<RealScalar> ComplexScalar;
+ private:
+ int m_squarings;
+};
+
+template <typename Scalar>
+struct MatrixExponentialScalingOp<Scalar, /*IsComplex=*/false> {
+ /** \brief Constructor.
+ *
+ * \param[in] squarings The integer \f$ s \f$ in this document.
+ */
+ MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) {}
/** \brief Scale a matrix coefficient.
*
* \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
*/
- inline const ComplexScalar operator()(const ComplexScalar& x) const {
+ inline const Scalar operator()(const Scalar& x) const {
using std::ldexp;
- return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings));
+ return ldexp(x, -m_squarings);
}
private:
@@ -220,6 +232,7 @@
template <typename MatrixType>
struct matrix_exp_computeUV<MatrixType, float> {
+ using Scalar = typename traits<MatrixType>::Scalar;
template <typename ArgType>
static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
using std::frexp;
@@ -234,7 +247,7 @@
const float maxnorm = 3.925724783138660f;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
matrix_exp_pade7(A, U, V);
}
}
@@ -242,12 +255,12 @@
template <typename MatrixType>
struct matrix_exp_computeUV<MatrixType, double> {
- typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+ using Scalar = typename traits<MatrixType>::Scalar;
template <typename ArgType>
static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
using std::frexp;
using std::pow;
- const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+ const double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
squarings = 0;
if (l1norm < 1.495585217958292e-002) {
matrix_exp_pade3(arg, U, V);
@@ -258,10 +271,10 @@
} else if (l1norm < 2.097847961257068e+000) {
matrix_exp_pade9(arg, U, V);
} else {
- const RealScalar maxnorm = 5.371920351148152;
+ const double maxnorm = 5.371920351148152;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
matrix_exp_pade13(A, U, V);
}
}
@@ -271,6 +284,7 @@
struct matrix_exp_computeUV<MatrixType, long double> {
template <typename ArgType>
static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
+ using Scalar = typename traits<MatrixType>::Scalar;
#if LDBL_MANT_DIG == 53 // double precision
matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings);
@@ -295,7 +309,7 @@
const long double maxnorm = 4.0246098906697353063L;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
matrix_exp_pade13(A, U, V);
}
@@ -315,7 +329,7 @@
const long double maxnorm = 3.2579440895405400856599663723517L;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
matrix_exp_pade17(A, U, V);
}
@@ -335,7 +349,7 @@
const long double maxnorm = 2.884233277829519311757165057717815L;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings));
matrix_exp_pade17(A, U, V);
}
@@ -382,9 +396,7 @@
void matrix_exp_compute(const ArgType& arg, ResultType& result, false_type) // default
{
typedef typename ArgType::PlainObject MatrixType;
- typedef typename traits<MatrixType>::Scalar Scalar;
- typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
+ typedef make_complex_t<typename traits<MatrixType>::Scalar> ComplexScalar;
result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
}
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 68336a5..0c18ad6 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -382,7 +382,7 @@
static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime;
static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime;
- typedef std::complex<Scalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix;
ComplexMatrix CA = A.template cast<ComplexScalar>();
@@ -476,7 +476,7 @@
typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
typedef internal::remove_all_t<NestedEvalType> NestedEvalTypeClean;
typedef internal::traits<NestedEvalTypeClean> Traits;
- typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime>
DynMatrixType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index 4228166..398971e 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -330,7 +330,7 @@
typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
typedef internal::remove_all_t<DerivedEvalType> DerivedEvalTypeClean;
typedef internal::traits<DerivedEvalTypeClean> Traits;
- typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime>
DynMatrixType;
typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index bff619a..a420ee7 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -91,7 +91,7 @@
enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime };
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Block<MatrixType, Dynamic, Dynamic> ResultType;
const MatrixType& m_A;
@@ -380,7 +380,7 @@
Index cols() const { return m_A.cols(); }
private:
- typedef std::complex<RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<Scalar> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime>
ComplexMatrix;
@@ -628,7 +628,7 @@
class MatrixComplexPowerReturnValue : public ReturnByValue<MatrixComplexPowerReturnValue<Derived> > {
public:
typedef typename Derived::PlainObject PlainObject;
- typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
+ typedef internal::make_complex_t<typename Derived::Scalar> ComplexScalar;
/**
* \brief Constructor.
@@ -685,7 +685,7 @@
}
template <typename Derived>
-const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const {
+const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const internal::make_complex_t<Scalar>& p) const {
return MatrixComplexPowerReturnValue<Derived>(derived(), p);
}
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index 8c0ce3b..aa357a4 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -35,7 +35,7 @@
typedef Scalar_ Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef std::complex<RealScalar> RootType;
+ typedef internal::make_complex_t<Scalar> RootType;
typedef Matrix<RootType, Deg_, 1> RootsType;
typedef DenseIndex Index;
@@ -308,7 +308,7 @@
typedef std::conditional_t<NumTraits<Scalar>::IsComplex, ComplexEigenSolver<CompanionMatrixType>,
EigenSolver<CompanionMatrixType> >
EigenSolverType;
- typedef std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> > ComplexScalar;
+ typedef internal::make_complex_t<Scalar_> ComplexScalar;
public:
/** Computes the complex roots of a new polynomial. */
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index 0672572..55d4291 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -138,7 +138,7 @@
TensorMap<Tensor<const T, 1>> m(b, 1);
DSizes<DenseIndex, 1> offsets;
offsets[0] = 0;
- TensorRef<Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions()));
+ TensorRef<const Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions()));
VERIFY_IS_EQUAL(slice_ref(0), 42);
}
diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp
index d5ff196..cf09749 100644
--- a/unsupported/test/cxx11_tensor_ref.cpp
+++ b/unsupported/test/cxx11_tensor_ref.cpp
@@ -49,8 +49,8 @@
Tensor<int, 1> input2(6);
input2.setRandom();
- TensorRef<Tensor<int, 1>> ref3(input1 + input2);
- TensorRef<Tensor<int, 1>> ref4 = input1 + input2;
+ TensorRef<const Tensor<int, 1>> ref3(input1 + input2);
+ TensorRef<const Tensor<int, 1>> ref4 = input1 + input2;
VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data());
VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data());
@@ -144,7 +144,7 @@
Tensor<float, 3> result(3, 5, 7);
result.setRandom();
- TensorRef<Tensor<float, 3>> result_ref(result);
+ TensorRef<const Tensor<float, 3>> result_ref(result);
Tensor<float, 3> bias(3, 5, 7);
bias.setRandom();
@@ -192,7 +192,7 @@
paddings[2] = std::make_pair(3, 4);
paddings[3] = std::make_pair(0, 0);
DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3);
- TensorRef<Tensor<const float, 4>> ref(m.pad(paddings));
+ TensorRef<const Tensor<const float, 4>> ref(m.pad(paddings));
array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial;
trivial[0] = std::make_pair(0, 0);
trivial[1] = std::make_pair(0, 0);