Update Eigen to commit:66f7f51b7e069d0a03a21157fa60b24aece69aeb CHANGELOG ========= 66f7f51b7 - Disable fno-check-new on clang. 151f6127d - Fix Warray-bounds warning for fixed-size assignments 1d8b82b07 - Fix power builds for no VSX and no POWER8. eb3f9f443 - refactor AssignmentEvaluator 9c211430b - Fix TensorRef details 22cd7307d - Remove assumption of std::complex for complex scalar types. 6b4881ad4 - Eliminate type-punning UB in Eigen::half. 420d891de - Add missing mathjax/latex configuration. becefd59e - Returns condition number of zero if matrix is not invertible. 809d266b4 - Fix numerical issues with BiCGSTAB. ef475f277 - Add missing graphviz to doc build. a0591cbc9 - Fix doxygen-generated pages 715deac18 - Add EIGEN_CI_CTEST_ARGS to allow for custom timeout. PiperOrigin-RevId: 729330781 Change-Id: I16aadc1cb1ddf4816604ce120e933f9660da17f8
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 8954841..5a2a3ac 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h
@@ -27,125 +27,115 @@ // copy_using_evaluator_traits is based on assign_traits -template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1> +template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic> struct copy_using_evaluator_traits { - typedef typename DstEvaluator::XprType Dst; - typedef typename Dst::Scalar DstScalar; + using Src = typename SrcEvaluator::XprType; + using Dst = typename DstEvaluator::XprType; + using DstScalar = typename Dst::Scalar; - enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags }; + static constexpr int DstFlags = DstEvaluator::Flags; + static constexpr int SrcFlags = SrcEvaluator::Flags; public: - enum { - DstAlignment = DstEvaluator::Alignment, - SrcAlignment = SrcEvaluator::Alignment, - DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, - JointAlignment = plain_enum_min(DstAlignment, SrcAlignment) - }; - - private: - enum { - InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) - : int(DstFlags) & RowMajorBit ? int(Dst::ColsAtCompileTime) - : int(Dst::RowsAtCompileTime), - InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) - : int(DstFlags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime) - : int(Dst::MaxRowsAtCompileTime), - RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize), - RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize), - OuterStride = int(outer_stride_at_compile_time<Dst>::ret), - MaxSizeAtCompileTime = Dst::SizeAtCompileTime - }; + static constexpr int DstAlignment = DstEvaluator::Alignment; + static constexpr int SrcAlignment = SrcEvaluator::Alignment; + static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment); + static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit); + static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit); + static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit); + static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime; + static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime); + static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = + min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime); + static constexpr int MaxColsAtCompileTime = + min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime); + static constexpr int MaxSizeAtCompileTime = + min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime); + static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime + : DstIsRowMajor ? ColsAtCompileTime + : RowsAtCompileTime; + static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime + : DstIsRowMajor ? MaxColsAtCompileTime + : MaxRowsAtCompileTime; + static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize); + static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize); + static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret; // TODO distinguish between linear traversal and inner-traversals - typedef typename find_best_packet<DstScalar, RestrictedLinearSize>::type LinearPacketType; - typedef typename find_best_packet<DstScalar, RestrictedInnerSize>::type InnerPacketType; + using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type; + using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type; - enum { - LinearPacketSize = unpacket_traits<LinearPacketType>::size, - InnerPacketSize = unpacket_traits<InnerPacketType>::size - }; + static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size; + static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size; public: - enum { - LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment, - InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment - }; + static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment; + static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment; private: - enum { - DstIsRowMajor = DstFlags & RowMajorBit, - SrcIsRowMajor = SrcFlags & RowMajorBit, - StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), - MightVectorize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && - bool(functor_traits<AssignFunc>::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize) != Dynamic && int(InnerSize) % int(InnerPacketSize) == 0 && - int(OuterStride) != Dynamic && int(OuterStride) % int(InnerPacketSize) == 0 && - (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment) >= int(InnerRequiredAlignment)), - MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) && - (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)) || - MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) && - (int(InnerMaxSize) == Dynamic || - int(InnerMaxSize) >= (EIGEN_UNALIGNED_VECTORIZE ? InnerPacketSize : (3 * InnerPacketSize))) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix - However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ - }; + static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor; + static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) && + bool(functor_traits<AssignFunc>::PacketAccess); + static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) && + (InnerSizeAtCompileTime % InnerPacketSize == 0) && + (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) && + (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment); + static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit); + static constexpr bool MayLinearVectorize = + MightVectorize && MayLinearize && DstHasDirectAccess && + (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) && + (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize); + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize; + static constexpr bool MaySliceVectorize = + MightVectorize && DstHasDirectAccess && + (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold); + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ public: - enum { - Traversal = int(Dst::SizeAtCompileTime) == 0 - ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time. - : (int(MayLinearVectorize) && (LinearPacketSize > InnerPacketSize)) ? int(LinearVectorizedTraversal) - : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal || - int(Traversal) == SliceVectorizedTraversal - }; + static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal + : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize)) + ? LinearVectorizedTraversal + : MayInnerVectorize ? InnerVectorizedTraversal + : MayLinearVectorize ? LinearVectorizedTraversal + : MaySliceVectorize ? SliceVectorizedTraversal + : MayLinearize ? LinearTraversal + : DefaultTraversal; + static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal || + Traversal == SliceVectorizedTraversal; - typedef std::conditional_t<int(Traversal) == LinearVectorizedTraversal, LinearPacketType, InnerPacketType> PacketType; + using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>; private: - enum { - ActualPacketSize = int(Traversal) == LinearVectorizedTraversal ? LinearPacketSize - : Vectorized ? InnerPacketSize - : 1, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, - MayUnrollCompletely = - int(Dst::SizeAtCompileTime) != Dynamic && - int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= - int(UnrollingLimit), - MayUnrollInner = - int(InnerSize) != Dynamic && - int(InnerSize) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) - }; + static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1; + static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize; + static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost); + static constexpr bool MayUnrollCompletely = + (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit); + static constexpr bool MayUnrollInner = + (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit); public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? (int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling)) - : int(Traversal) == int(LinearVectorizedTraversal) - ? (bool(MayUnrollCompletely) && - (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment))) - ? int(CompleteUnrolling) - : int(NoUnrolling)) - : int(Traversal) == int(LinearTraversal) - ? (bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling)) + static constexpr int Unrolling = + (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal) + ? (MayUnrollCompletely ? CompleteUnrolling + : MayUnrollInner ? InnerUnrolling + : NoUnrolling) + : Traversal == LinearVectorizedTraversal + ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment)) + ? CompleteUnrolling + : NoUnrolling) + : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling) #if EIGEN_UNALIGNED_VECTORIZE - : int(Traversal) == int(SliceVectorizedTraversal) - ? (bool(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling)) + : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling) #endif - : int(NoUnrolling) - }; + : NoUnrolling; #ifdef EIGEN_DEBUG_ASSIGN static void debug() { @@ -162,8 +152,8 @@ EIGEN_DEBUG_VAR(LinearRequiredAlignment) EIGEN_DEBUG_VAR(InnerRequiredAlignment) EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(InnerSizeAtCompileTime) + EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime) EIGEN_DEBUG_VAR(LinearPacketSize) EIGEN_DEBUG_VAR(InnerPacketSize) EIGEN_DEBUG_VAR(ActualPacketSize) @@ -196,17 +186,14 @@ *** Default traversal *** ************************/ -template <typename Kernel, int Index, int Stop> +template <typename Kernel, int Index_, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - - enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime }; + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.assignCoeffByOuterInner(outer, inner); - copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel); + kernel.assignCoeffByOuterInner(Outer, Inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel); } }; @@ -232,11 +219,11 @@ *** Linear traversal *** ***********************/ -template <typename Kernel, int Index, int Stop> +template <typename Kernel, int Index_, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.assignCoeff(Index); - copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index + 1, Stop>::run(kernel); + kernel.assignCoeff(Index_); + copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel); } }; @@ -249,23 +236,17 @@ *** Inner vectorization *** **************************/ -template <typename Kernel, int Index, int Stop> +template <typename Kernel, int Index_, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - outer = Index / DstXprType::InnerSizeAtCompileTime, - inner = Index % DstXprType::InnerSizeAtCompileTime, - SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, - DstAlignment = Kernel::AssignmentTraits::DstAlignment - }; + using PacketType = typename Kernel::PacketType; + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); - enum { NextIndex = Index + unpacket_traits<PacketType>::size }; + kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner); copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); } }; @@ -277,10 +258,11 @@ template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> struct copy_using_evaluator_innervec_InnerUnrolling { - typedef typename Kernel::PacketType PacketType; + using PacketType = typename Kernel::PacketType; + static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); - enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer); } @@ -308,9 +290,10 @@ // Zero-sized assignment is a no-op. template <typename Kernel, int Unrolling> struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling> { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/) { - EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) + EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) } }; @@ -331,21 +314,21 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel); } }; template <typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { const Index outerSize = kernel.outerSize(); for (Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, - outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer); } }; @@ -380,18 +363,15 @@ } }; -template <typename Kernel, int Index, int Stop> +template <typename Kernel, int Index_, int Stop> struct copy_using_evaluator_linearvec_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment }; + using PacketType = typename Kernel::PacketType; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index); - enum { NextIndex = Index + unpacket_traits<PacketType>::size }; + kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_); copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); } }; @@ -403,26 +383,24 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits<PacketType>::size; + static constexpr int RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment; + static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = + packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { const Index size = kernel.size(); - typedef typename Kernel::Scalar Scalar; - typedef typename Kernel::PacketType PacketType; - enum { - requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, - packetSize = unpacket_traits<PacketType>::size, - dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), - dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) - : int(Kernel::AssignmentTraits::DstAlignment), - srcAlignment = Kernel::AssignmentTraits::JointAlignment - }; - const Index alignedStart = - dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size); - const Index alignedEnd = alignedStart + ((size - alignedStart) / packetSize) * packetSize; + const Index alignedStart = DstIsAligned ? 0 : first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size); + const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize); - unaligned_dense_assignment_loop<dstIsAligned != 0>::run(kernel, 0, alignedStart); + unaligned_dense_assignment_loop<DstIsAligned>::run(kernel, 0, alignedStart); - for (Index index = alignedStart; index < alignedEnd; index += packetSize) - kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); + for (Index index = alignedStart; index < alignedEnd; index += PacketSize) + kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(index); unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); } @@ -430,18 +408,14 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits<PacketType>::size; + static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime; + static constexpr int AlignedSize = numext::round_down(Size, PacketSize); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - size = DstXprType::SizeAtCompileTime, - packetSize = unpacket_traits<PacketType>::size, - alignedSize = (int(size) / packetSize) * packetSize - }; - - copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); - copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); + copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel); + copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, AlignedSize, Size>::run(kernel); } }; @@ -451,35 +425,40 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> { - typedef typename Kernel::PacketType PacketType; - enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment }; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits<PacketType>::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); - const Index packetSize = unpacket_traits<PacketType>::size; for (Index outer = 0; outer < outerSize; ++outer) - for (Index inner = 0; inner < innerSize; inner += packetSize) + for (Index inner = 0; inner < innerSize; inner += PacketSize) kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); } }; template <typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); + copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel); } }; template <typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> { + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::AssignmentTraits Traits; const Index outerSize = kernel.outerSize(); for (Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, Traits::SrcAlignment, - Traits::DstAlignment>::run(kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel, + outer); } }; @@ -498,8 +477,8 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); + copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run( + kernel); } }; @@ -509,42 +488,40 @@ template <typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits<PacketType>::size; + static constexpr int RequestedAlignment = Kernel::AssignmentTraits::InnerRequiredAlignment; + static constexpr bool Alignable = + packet_traits<Scalar>::AlignedOnScalar || Kernel::AssignmentTraits::DstAlignment >= sizeof(Scalar); + static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment; + static constexpr int DstAlignment = Alignable ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::Scalar Scalar; - typedef typename Kernel::PacketType PacketType; - enum { - packetSize = unpacket_traits<PacketType>::size, - requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), - alignable = - packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar), - dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), - dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment) - }; const Scalar* dst_ptr = kernel.dstDataPtr(); - if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) { + if ((!DstIsAligned) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) { // the pointer is not aligned-on scalar, so alignment is not possible return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel); } - const Index packetAlignedMask = packetSize - 1; const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); - const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0; Index alignedStart = - ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); + ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<RequestedAlignment>(dst_ptr, innerSize); for (Index outer = 0; outer < outerSize; ++outer) { - const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask); + const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize); // do the non-vectorizable part of the assignment for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner); // do the vectorizable part of the assignment - for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize) - kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); + for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize) + kernel.template assignPacketByOuterInner<DstAlignment, Unaligned, PacketType>(outer, inner); // do the non-vectorizable part of the assignment for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner); - alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize); + alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize); } } }; @@ -552,20 +529,15 @@ #if EIGEN_UNALIGNED_VECTORIZE template <typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits<PacketType>::size; + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - innerSize = DstXprType::InnerSizeAtCompileTime, - packetSize = unpacket_traits<PacketType>::size, - vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize), - size = DstXprType::SizeAtCompileTime - }; - for (Index outer = 0; outer < kernel.outerSize(); ++outer) { - copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer); - copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, 0, 0>::run(kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, VectorizableSize, InnerSize>::run(kernel, outer); } } };
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index c081499..528aed2 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -182,6 +182,35 @@ typedef typename NumTraits<Scalar>::Real& type; }; +} // namespace internal + +namespace numext { + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref( + const Scalar& x) { + return internal::real_ref_impl<Scalar>::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +} // namespace numext + +namespace internal { + /**************************************************************************** * Implementation of conj * ****************************************************************************/ @@ -221,7 +250,9 @@ struct abs2_impl_default<Scalar, true> // IsComplex { typedef typename NumTraits<Scalar>::Real RealScalar; - EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x.real() * x.real() + x.imag() * x.imag(); } + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x); + } }; template <typename Scalar> @@ -250,16 +281,14 @@ }; // Complex sqrt defined in MathFunctionsImpl.h. -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x); +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x); // Custom implementation is faster than `std::sqrt`, works on // GPU, and correctly handles special cases (unlike MSVC). template <typename T> struct sqrt_impl<std::complex<T>> { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { - return complex_sqrt<T>(x); - } + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { return complex_sqrt(x); } }; template <typename Scalar> @@ -272,13 +301,13 @@ struct rsqrt_impl; // Complex rsqrt defined in MathFunctionsImpl.h. -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x); +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x); template <typename T> struct rsqrt_impl<std::complex<T>> { EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { - return complex_rsqrt<T>(x); + return complex_rsqrt(x); } }; @@ -299,7 +328,7 @@ typedef typename NumTraits<Scalar>::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { EIGEN_USING_STD(abs); - return abs(x.real()) + abs(x.imag()); + return abs(numext::real(x)) + abs(numext::imag(x)); } }; @@ -469,8 +498,8 @@ ****************************************************************************/ // Complex log defined in MathFunctionsImpl.h. -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z); +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z); template <typename Scalar> struct log_impl { @@ -846,7 +875,7 @@ real_type aa = abs(a); if (aa == real_type(0)) return Scalar(0); aa = real_type(1) / aa; - return Scalar(a.real() * aa, a.imag() * aa); + return Scalar(numext::real(a) * aa, numext::imag(a) * aa); } }; @@ -1043,27 +1072,6 @@ #endif template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref( - const Scalar& x) { - return internal::real_ref_impl<Scalar>::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { - return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); -} - -template <typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); }
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 10ddabd..8e2705b 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -171,8 +171,8 @@ // Generic complex sqrt implementation that correctly handles corner cases // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) { +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) { // Computes the principal sqrt of the input. // // For a complex square root of the number x + i*y. We want to find real @@ -194,21 +194,21 @@ // if x == 0: u = w, v = sign(y) * w // if x > 0: u = w, v = y / (2 * w) // if x < 0: u = |y| / (2 * w), v = sign(y) * w - + using T = typename NumTraits<ComplexT>::Real; const T x = numext::real(z); const T y = numext::imag(z); const T zero = T(0); const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y))); - return (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y) - : numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w) - : x > zero ? std::complex<T>(w, y / (2 * w)) - : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w); + return (numext::isinf)(y) ? ComplexT(NumTraits<T>::infinity(), y) + : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w) + : x > zero ? ComplexT(w, y / (2 * w)) + : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w); } // Generic complex rsqrt implementation. -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) { +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) { // Computes the principal reciprocal sqrt of the input. // // For a complex reciprocal square root of the number z = x + i*y. We want to @@ -230,7 +230,7 @@ // if x == 0: u = w / |z|, v = -sign(y) * w / |z| // if x > 0: u = w / |z|, v = -y / (2 * w * |z|) // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z| - + using T = typename NumTraits<ComplexT>::Real; const T x = numext::real(z); const T y = numext::imag(z); const T zero = T(0); @@ -239,20 +239,21 @@ const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z)); const T woz = w / abs_z; // Corner cases consistent with 1/sqrt(z) on gcc/clang. - return numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN()) - : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero) - : numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz) - : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z)) - : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz); + return numext::is_exactly_zero(abs_z) ? ComplexT(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN()) + : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero) + : numext::is_exactly_zero(x) ? ComplexT(woz, y < zero ? woz : -woz) + : x > zero ? ComplexT(woz, -y / (2 * w * abs_z)) + : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz); } -template <typename T> -EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) { +template <typename ComplexT> +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) { // Computes complex log. + using T = typename NumTraits<ComplexT>::Real; T a = numext::abs(z); EIGEN_USING_STD(atan2); T b = atan2(z.imag(), z.real()); - return std::complex<T>(numext::log(a), b); + return ComplexT(numext::log(a), b); } } // end namespace internal
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 5466a57..8d5c47e 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h
@@ -112,7 +112,7 @@ ConstTransposeReturnType> AdjointReturnType; /** \internal Return type of eigenvalues() */ - typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> + typedef Matrix<internal::make_complex_t<Scalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>, PlainObject> IdentityReturnType; @@ -468,7 +468,7 @@ EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root) EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm) EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p) - EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p) + EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const internal::make_complex_t<Scalar>& p) protected: EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h index df5a0ef..038e233 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -1,877 +1,877 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H -#define EIGEN_PACKET_MATH_FP16_AVX512_H - -// IWYU pragma: private -#include "../../InternalHeaderCheck.h" - -namespace Eigen { - -namespace internal { - -typedef __m512h Packet32h; -typedef eigen_packet_wrapper<__m256i, 1> Packet16h; -typedef eigen_packet_wrapper<__m128i, 2> Packet8h; - -template <> -struct is_arithmetic<Packet8h> { - enum { value = true }; -}; - -template <> -struct packet_traits<half> : default_packet_traits { - typedef Packet32h type; - typedef Packet16h half; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size = 32, - - HasCmp = 1, - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasNegate = 1, - HasAbs = 1, - HasAbs2 = 0, - HasMin = 1, - HasMax = 1, - HasConj = 1, - HasSetLinear = 0, - HasLog = 1, - HasLog1p = 1, - HasExp = 1, - HasExpm1 = 1, - HasSqrt = 1, - HasRsqrt = 1, - // These ones should be implemented in future - HasBessel = 0, - HasNdtri = 0, - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasTanh = EIGEN_FAST_MATH, - HasErf = 0, // EIGEN_FAST_MATH, - HasBlend = 0 - }; -}; - -template <> -struct unpacket_traits<Packet32h> { - typedef Eigen::half type; - typedef Packet16h half; - enum { - size = 32, - alignment = Aligned64, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -template <> -struct unpacket_traits<Packet16h> { - typedef Eigen::half type; - typedef Packet8h half; - enum { - size = 16, - alignment = Aligned32, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -template <> -struct unpacket_traits<Packet8h> { - typedef Eigen::half type; - typedef Packet8h half; - enum { - size = 8, - alignment = Aligned16, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -// Memory functions - -// pset1 - -template <> -EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) { - // half/half_raw is bit compatible - return _mm512_set1_ph(numext::bit_cast<_Float16>(from)); -} - -template <> -EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) { - return _mm512_setzero_ph(); -} - -// pset1frombits -template <> -EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) { - return _mm512_castsi512_ph(_mm512_set1_epi16(from)); -} - -// pfirst - -template <> -EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - return half_impl::raw_uint16_to_half( - static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0))); -#else - Eigen::half dest[32]; - _mm512_storeu_ph(dest, from); - return dest[0]; -#endif -} - -// pload - -template <> -EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) { - EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from); -} - -// ploadu - -template <> -EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from); -} - -// pstore - -template <> -EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from); -} - -// pstoreu - -template <> -EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) { - EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from); -} - -// ploaddup -template <> -EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) { - __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from)); - return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, - 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), - a); -} - -// ploadquad -template <> -EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) { - __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from)); - return _mm512_permutexvar_ph( - _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0), - a); -} - -// pabs - -template <> -EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) { - return _mm512_abs_ph(a); -} - -// psignbit - -template <> -EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) { - return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15)); -} - -// pmin - -template <> -EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_min_ph(a, b); -} - -// pmax - -template <> -EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_max_ph(a, b); -} - -// plset -template <> -EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) { - return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, - 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); -} - -// por - -template <> -EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pxor - -template <> -EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pand - -template <> -EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pandnot - -template <> -EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a))); -} - -// pselect - -template <> -EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) { - __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); - return _mm512_mask_blend_ph(mask32, a, b); -} - -// pcmp_eq - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); -} - -// pcmp_le - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); -} - -// pcmp_lt - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); -} - -// pcmp_lt_or_nan - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu))); -} - -// padd - -template <> -EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_add_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// psub - -template <> -EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_sub_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pmul - -template <> -EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_mul_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pdiv - -template <> -EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_div_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pround - -template <> -EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) { - // Work-around for default std::round rounding mode. - - // Mask for the sign bit - const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u)); - // The largest half-preicision float less than 0.5 - const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu)); - - return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO); -} - -// print - -template <> -EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION); -} - -// pceil - -template <> -EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF); -} - -// pfloor - -template <> -EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF); -} - -// ptrunc - -template <> -EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO); -} - -// predux -template <> -EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) { - return (half)_mm512_reduce_add_ph(a); -} - -template <> -EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) { - return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a)); -} - -template <> -EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) { - return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a)); -} - -// predux_half_dowto4 -template <> -EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0)); - __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1)); - - return Packet16h(padd<Packet16h>(lowHalf, highHalf)); -#else - Eigen::half data[32]; - _mm512_storeu_ph(data, a); - - __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data)); - __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16)); - - return Packet16h(padd<Packet16h>(lowHalf, highHalf)); -#endif -} - -// predux_max - -// predux_min - -// predux_mul - -#ifdef EIGEN_VECTORIZE_FMA - -// pmadd - -template <> -EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fmadd_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pmsub - -template <> -EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fmsub_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pnmadd - -template <> -EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fnmadd_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pnmsub - -template <> -EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fnmsub_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -#endif - -// pnegate - -template <> -EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) { - return psub(pzero(a), a); -} - -// pconj - -template <> -EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) { - return a; -} - -// psqrt - -template <> -EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) { - return _mm512_sqrt_ph(a); -} - -// prsqrt - -template <> -EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) { - return _mm512_rsqrt_ph(a); -} - -// preciprocal - -template <> -EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) { - return _mm512_rcp_ph(a); -} - -// ptranspose - -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) { - __m512i t[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 16; i++) { - t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); - t[2 * i + 1] = - _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); - } - - __m512i p[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 8; i++) { - p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]); - p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]); - p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]); - p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]); - } - - __m512i q[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 4; i++) { - q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]); - q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]); - q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]); - q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]); - q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]); - q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]); - q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]); - q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]); - } - - __m512i f[32]; - -#define PACKET32H_TRANSPOSE_HELPER(X, Y) \ - do { \ - f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \ - f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \ - f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \ - f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \ - f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \ - f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \ - f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \ - f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \ - } while (false); - - PACKET32H_TRANSPOSE_HELPER(0, 0); - PACKET32H_TRANSPOSE_HELPER(1, 1); - PACKET32H_TRANSPOSE_HELPER(2, 2); - PACKET32H_TRANSPOSE_HELPER(3, 3); - - PACKET32H_TRANSPOSE_HELPER(1, 0); - PACKET32H_TRANSPOSE_HELPER(2, 0); - PACKET32H_TRANSPOSE_HELPER(3, 0); - PACKET32H_TRANSPOSE_HELPER(2, 1); - PACKET32H_TRANSPOSE_HELPER(3, 1); - PACKET32H_TRANSPOSE_HELPER(3, 2); - - PACKET32H_TRANSPOSE_HELPER(0, 1); - PACKET32H_TRANSPOSE_HELPER(0, 2); - PACKET32H_TRANSPOSE_HELPER(0, 3); - PACKET32H_TRANSPOSE_HELPER(1, 2); - PACKET32H_TRANSPOSE_HELPER(1, 3); - PACKET32H_TRANSPOSE_HELPER(2, 3); - -#undef PACKET32H_TRANSPOSE_HELPER - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 32; i++) { - a.packet[i] = _mm512_castsi512_ph(f[i]); - } -} - -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) { - __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3; - t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); - t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); - t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); - t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); - - p0 = _mm512_unpacklo_epi32(t0, t2); - p1 = _mm512_unpackhi_epi32(t0, t2); - p2 = _mm512_unpacklo_epi32(t1, t3); - p3 = _mm512_unpackhi_epi32(t1, t3); - - a0 = p0; - a1 = p1; - a2 = p2; - a3 = p3; - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1); - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0); - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2); - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0); - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0); - - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2); - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1); - - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2); - - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1); - - a.packet[0] = _mm512_castsi512_ph(a0); - a.packet[1] = _mm512_castsi512_ph(a1); - a.packet[2] = _mm512_castsi512_ph(a2); - a.packet[3] = _mm512_castsi512_ph(a3); -} - -// preverse - -template <> -EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) { - return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), - a); -} - -// pscatter - -template <> -EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) { - EIGEN_ALIGN64 half aux[32]; - pstore(aux, from); - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 32; i++) { - to[stride * i] = aux[i]; - } -} - -// pgather - -template <> -EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) { - return _mm512_castsi512_ph(_mm512_set_epi16( - from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x, - from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x, - from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x, - from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x, - from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x, - from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x, - from[1 * stride].x, from[0 * stride].x)); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&); - -EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) { - __m512d result = _mm512_undefined_pd(); - result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0); - result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1); - return _mm512_castpd_ph(result); -} - -EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) { - a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0)); - b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1)); -} - -// psin -template <> -EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = psin(low); - Packet16h highOut = psin(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pcos -template <> -EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pcos(low); - Packet16h highOut = pcos(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog -template <> -EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog(low); - Packet16h highOut = plog(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog2 -template <> -EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog2(low); - Packet16h highOut = plog2(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog1p -template <> -EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog1p(low); - Packet16h highOut = plog1p(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pexp -template <> -EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pexp(low); - Packet16h highOut = pexp(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pexpm1 -template <> -EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pexpm1(low); - Packet16h highOut = pexpm1(high); - - return combine2Packet16h(lowOut, highOut); -} - -// ptanh -template <> -EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = ptanh(low); - Packet16h highOut = ptanh(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pfrexp -template <> -EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h exp1 = _mm256_undefined_si256(); - Packet16h exp2 = _mm256_undefined_si256(); - - Packet16h lowOut = pfrexp(low, exp1); - Packet16h highOut = pfrexp(high, exp2); - - exponent = combine2Packet16h(exp1, exp2); - - return combine2Packet16h(lowOut, highOut); -} - -// pldexp -template <> -EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h exp1; - Packet16h exp2; - extract2Packet16h(exponent, exp1, exp2); - - Packet16h lowOut = pldexp(low, exp1); - Packet16h highOut = pldexp(high, exp2); - - return combine2Packet16h(lowOut, highOut); -} - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_PACKET_MATH_FP16_AVX512_H +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H +#define EIGEN_PACKET_MATH_FP16_AVX512_H + +// IWYU pragma: private +#include "../../InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +typedef __m512h Packet32h; +typedef eigen_packet_wrapper<__m256i, 1> Packet16h; +typedef eigen_packet_wrapper<__m128i, 2> Packet8h; + +template <> +struct is_arithmetic<Packet8h> { + enum { value = true }; +}; + +template <> +struct packet_traits<half> : default_packet_traits { + typedef Packet32h type; + typedef Packet16h half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 32, + + HasCmp = 1, + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 1, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 0, + HasLog = 1, + HasLog1p = 1, + HasExp = 1, + HasExpm1 = 1, + HasSqrt = 1, + HasRsqrt = 1, + // These ones should be implemented in future + HasBessel = 0, + HasNdtri = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, + HasErf = 0, // EIGEN_FAST_MATH, + HasBlend = 0 + }; +}; + +template <> +struct unpacket_traits<Packet32h> { + typedef Eigen::half type; + typedef Packet16h half; + enum { + size = 32, + alignment = Aligned64, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +template <> +struct unpacket_traits<Packet16h> { + typedef Eigen::half type; + typedef Packet8h half; + enum { + size = 16, + alignment = Aligned32, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +template <> +struct unpacket_traits<Packet8h> { + typedef Eigen::half type; + typedef Packet8h half; + enum { + size = 8, + alignment = Aligned16, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +// Memory functions + +// pset1 + +template <> +EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) { + // half/half_raw is bit compatible + return _mm512_set1_ph(numext::bit_cast<_Float16>(from)); +} + +template <> +EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) { + return _mm512_setzero_ph(); +} + +// pset1frombits +template <> +EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) { + return _mm512_castsi512_ph(_mm512_set1_epi16(from)); +} + +// pfirst + +template <> +EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) { +#ifdef EIGEN_VECTORIZE_AVX512DQ + return half_impl::raw_uint16_to_half( + static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0))); +#else + Eigen::half dest[32]; + _mm512_storeu_ph(dest, from); + return dest[0]; +#endif +} + +// pload + +template <> +EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from); +} + +// ploadu + +template <> +EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from); +} + +// pstore + +template <> +EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from); +} + +// pstoreu + +template <> +EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from); +} + +// ploaddup +template <> +EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) { + __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from)); + return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, + 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), + a); +} + +// ploadquad +template <> +EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) { + __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from)); + return _mm512_permutexvar_ph( + _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0), + a); +} + +// pabs + +template <> +EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) { + return _mm512_abs_ph(a); +} + +// psignbit + +template <> +EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) { + return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15)); +} + +// pmin + +template <> +EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_min_ph(a, b); +} + +// pmax + +template <> +EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_max_ph(a, b); +} + +// plset +template <> +EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) { + return _mm512_add_ph(pset1<Packet32h>(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); +} + +// por + +template <> +EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pxor + +template <> +EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pand + +template <> +EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pandnot + +template <> +EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a))); +} + +// pselect + +template <> +EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) { + __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); + return _mm512_mask_blend_ph(mask32, a, b); +} + +// pcmp_eq + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); +} + +// pcmp_le + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); +} + +// pcmp_lt + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, static_cast<short>(0xffffu))); +} + +// pcmp_lt_or_nan + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, static_cast<short>(0xffffu))); +} + +// padd + +template <> +EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_add_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// psub + +template <> +EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_sub_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pmul + +template <> +EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_mul_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pdiv + +template <> +EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_div_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pround + +template <> +EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) { + // Work-around for default std::round rounding mode. + + // Mask for the sign bit + const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u)); + // The largest half-preicision float less than 0.5 + const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu)); + + return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO); +} + +// print + +template <> +EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION); +} + +// pceil + +template <> +EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF); +} + +// pfloor + +template <> +EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF); +} + +// ptrunc + +template <> +EIGEN_STRONG_INLINE Packet32h ptrunc<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_ZERO); +} + +// predux +template <> +EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) { + return (half)_mm512_reduce_add_ph(a); +} + +template <> +EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) { + return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a)); +} + +template <> +EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) { + return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a)); +} + +// predux_half_dowto4 +template <> +EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) { +#ifdef EIGEN_VECTORIZE_AVX512DQ + __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0)); + __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1)); + + return Packet16h(padd<Packet16h>(lowHalf, highHalf)); +#else + Eigen::half data[32]; + _mm512_storeu_ph(data, a); + + __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data)); + __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16)); + + return Packet16h(padd<Packet16h>(lowHalf, highHalf)); +#endif +} + +// predux_max + +// predux_min + +// predux_mul + +#ifdef EIGEN_VECTORIZE_FMA + +// pmadd + +template <> +EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fmadd_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pmsub + +template <> +EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fmsub_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pnmadd + +template <> +EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fnmadd_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pnmsub + +template <> +EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fnmsub_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +#endif + +// pnegate + +template <> +EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) { + return psub(pzero(a), a); +} + +// pconj + +template <> +EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) { + return a; +} + +// psqrt + +template <> +EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) { + return _mm512_sqrt_ph(a); +} + +// prsqrt + +template <> +EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) { + return _mm512_rsqrt_ph(a); +} + +// preciprocal + +template <> +EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) { + return _mm512_rcp_ph(a); +} + +// ptranspose + +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) { + __m512i t[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 16; i++) { + t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); + t[2 * i + 1] = + _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); + } + + __m512i p[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 8; i++) { + p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]); + p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]); + p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]); + p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]); + } + + __m512i q[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 4; i++) { + q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]); + q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]); + q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]); + q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]); + q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]); + q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]); + q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]); + q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]); + } + + __m512i f[32]; + +#define PACKET32H_TRANSPOSE_HELPER(X, Y) \ + do { \ + f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \ + f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \ + f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \ + f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \ + f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \ + f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \ + f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \ + f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \ + } while (false); + + PACKET32H_TRANSPOSE_HELPER(0, 0); + PACKET32H_TRANSPOSE_HELPER(1, 1); + PACKET32H_TRANSPOSE_HELPER(2, 2); + PACKET32H_TRANSPOSE_HELPER(3, 3); + + PACKET32H_TRANSPOSE_HELPER(1, 0); + PACKET32H_TRANSPOSE_HELPER(2, 0); + PACKET32H_TRANSPOSE_HELPER(3, 0); + PACKET32H_TRANSPOSE_HELPER(2, 1); + PACKET32H_TRANSPOSE_HELPER(3, 1); + PACKET32H_TRANSPOSE_HELPER(3, 2); + + PACKET32H_TRANSPOSE_HELPER(0, 1); + PACKET32H_TRANSPOSE_HELPER(0, 2); + PACKET32H_TRANSPOSE_HELPER(0, 3); + PACKET32H_TRANSPOSE_HELPER(1, 2); + PACKET32H_TRANSPOSE_HELPER(1, 3); + PACKET32H_TRANSPOSE_HELPER(2, 3); + +#undef PACKET32H_TRANSPOSE_HELPER + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 32; i++) { + a.packet[i] = _mm512_castsi512_ph(f[i]); + } +} + +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) { + __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3; + t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); + t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); + t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); + t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); + + p0 = _mm512_unpacklo_epi32(t0, t2); + p1 = _mm512_unpackhi_epi32(t0, t2); + p2 = _mm512_unpacklo_epi32(t1, t3); + p3 = _mm512_unpackhi_epi32(t1, t3); + + a0 = p0; + a1 = p1; + a2 = p2; + a3 = p3; + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1); + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0); + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2); + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0); + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0); + + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2); + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1); + + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2); + + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1); + + a.packet[0] = _mm512_castsi512_ph(a0); + a.packet[1] = _mm512_castsi512_ph(a1); + a.packet[2] = _mm512_castsi512_ph(a2); + a.packet[3] = _mm512_castsi512_ph(a3); +} + +// preverse + +template <> +EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) { + return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), + a); +} + +// pscatter + +template <> +EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) { + EIGEN_ALIGN64 half aux[32]; + pstore(aux, from); + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 32; i++) { + to[stride * i] = aux[i]; + } +} + +// pgather + +template <> +EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) { + return _mm512_castsi512_ph(_mm512_set_epi16( + from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x, + from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x, + from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x, + from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x, + from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x, + from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x, + from[1 * stride].x, from[0 * stride].x)); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&); + +EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) { + __m512d result = _mm512_undefined_pd(); + result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0); + result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1); + return _mm512_castpd_ph(result); +} + +EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) { + a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0)); + b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1)); +} + +// psin +template <> +EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = psin(low); + Packet16h highOut = psin(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pcos +template <> +EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pcos(low); + Packet16h highOut = pcos(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog +template <> +EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog(low); + Packet16h highOut = plog(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog2 +template <> +EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog2(low); + Packet16h highOut = plog2(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog1p +template <> +EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog1p(low); + Packet16h highOut = plog1p(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pexp +template <> +EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pexp(low); + Packet16h highOut = pexp(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pexpm1 +template <> +EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pexpm1(low); + Packet16h highOut = pexpm1(high); + + return combine2Packet16h(lowOut, highOut); +} + +// ptanh +template <> +EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = ptanh(low); + Packet16h highOut = ptanh(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pfrexp +template <> +EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h exp1 = _mm256_undefined_si256(); + Packet16h exp2 = _mm256_undefined_si256(); + + Packet16h lowOut = pfrexp(low, exp1); + Packet16h highOut = pfrexp(high, exp2); + + exponent = combine2Packet16h(exp1, exp2); + + return combine2Packet16h(lowOut, highOut); +} + +// pldexp +template <> +EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h exp1; + Packet16h exp2; + extract2Packet16h(exponent, exp1, exp2); + + Packet16h lowOut = pldexp(low, exp1); + Packet16h highOut = pldexp(high, exp2); + + return combine2Packet16h(lowOut, highOut); +} + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_FP16_AVX512_H
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 8027cb5..482064e 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1514,16 +1514,13 @@ template <typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet) * from) { - EIGEN_DEBUG_ALIGNED_LOAD -#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) EIGEN_DEBUG_UNALIGNED_LOAD +#if defined(EIGEN_VECTORIZE_VSX) return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else - Packet16uc MSQ, LSQ; - Packet16uc mask; - MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword - LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword - mask = vec_lvsl(0, from); // create the permute mask + Packet16uc MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword + Packet16uc LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword + Packet16uc mask = vec_lvsl(0, from); // create the permute mask // TODO: Add static_cast here return (Packet)vec_perm(MSQ, LSQ, mask); // align the data #endif @@ -1733,7 +1730,7 @@ template <typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) { EIGEN_DEBUG_UNALIGNED_STORE -#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) +#if defined(EIGEN_VECTORIZE_VSX) vec_xst(from, 0, to); #else // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -2069,7 +2066,7 @@ input = padd<Packet4ui>(input, rounding_bias); const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000); -#ifdef _ARCH_PWR9 +#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX) Packet4bi nan_selector = vec_test_data_class(p4f, __VEC_CLASS_FP_NAN); input = vec_sel(input, p4ui_nan, nan_selector); @@ -2178,7 +2175,7 @@ Packet8bi rounding_bias = vec_cmplt(lsb, p4f2); Packet8us input = psub<Packet8us>(p4f, reinterpret_cast<Packet8us>(rounding_bias)); -#ifdef _ARCH_PWR9 +#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX) Packet4bi nan_selector_lo = vec_test_data_class(lo, __VEC_CLASS_FP_NAN); Packet4bi nan_selector_hi = vec_test_data_class(hi, __VEC_CLASS_FP_NAN); Packet8us nan_selector = @@ -3400,9 +3397,17 @@ return reinterpret_cast<Packet2d>(vec_cmpeq(a, b)); } template <> +#ifdef __POWER8_VECTOR__ EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) { return reinterpret_cast<Packet2l>(vec_cmpeq(a, b)); } +#else +EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) { + Packet4i halves = reinterpret_cast<Packet4i>(vec_cmpeq(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(b))); + Packet4i flipped = vec_perm(halves, halves, p16uc_COMPLEX32_REV); + return reinterpret_cast<Packet2l>(pand(halves, flipped)); +} +#endif template <> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a, b));
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index a8cb228..95697f3 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h
@@ -522,11 +522,6 @@ #endif } -union float32_bits { - unsigned int u; - float f; -}; - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) { #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE)) @@ -549,46 +544,45 @@ return h; #else - float32_bits f; - f.f = ff; - - const float32_bits f32infty = {255 << 23}; - const float32_bits f16max = {(127 + 16) << 23}; - const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23}; - unsigned int sign_mask = 0x80000000u; + uint32_t f_bits = Eigen::numext::bit_cast<uint32_t>(ff); + const uint32_t f32infty_bits = {255 << 23}; + const uint32_t f16max_bits = {(127 + 16) << 23}; + const uint32_t denorm_magic_bits = {((127 - 15) + (23 - 10) + 1) << 23}; + const uint32_t sign_mask = 0x80000000u; __half_raw o; - o.x = static_cast<numext::uint16_t>(0x0u); + o.x = static_cast<uint16_t>(0x0u); - unsigned int sign = f.u & sign_mask; - f.u ^= sign; + const uint32_t sign = f_bits & sign_mask; + f_bits ^= sign; // NOTE all the integer compares in this function can be safely // compiled into signed compares since all operands are below // 0x80000000. Important if you want fast straight SSE2 code // (since there's no unsigned PCMPGTD). - if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set) - o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf - } else { // (De)normalized number or zero - if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero + if (f_bits >= f16max_bits) { // result is Inf or NaN (all exponent bits set) + o.x = (f_bits > f32infty_bits) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf + } else { // (De)normalized number or zero + if (f_bits < (113 << 23)) { // resulting FP16 is subnormal or zero // use a magic value to align our 10 mantissa bits at the bottom of // the float. as long as FP addition is round-to-nearest-even this // just works. - f.f += denorm_magic.f; + f_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(f_bits) + + Eigen::numext::bit_cast<float>(denorm_magic_bits)); // and one integer subtract of the bias later, we have our final float! - o.x = static_cast<numext::uint16_t>(f.u - denorm_magic.u); + o.x = static_cast<numext::uint16_t>(f_bits - denorm_magic_bits); } else { - unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd + const uint32_t mant_odd = (f_bits >> 13) & 1; // resulting mantissa is odd // update exponent, rounding bias part 1 // Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but // without arithmetic overflow. - f.u += 0xc8000fffU; + f_bits += 0xc8000fffU; // rounding bias part 2 - f.u += mant_odd; + f_bits += mant_odd; // take the bits! - o.x = static_cast<numext::uint16_t>(f.u >> 13); + o.x = static_cast<numext::uint16_t>(f_bits >> 13); } } @@ -611,24 +605,23 @@ #elif defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) return static_cast<float>(h.x); #else - const float32_bits magic = {113 << 23}; - const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift - float32_bits o; - - o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits - unsigned int exp = shifted_exp & o.u; // just the exponent - o.u += (127 - 15) << 23; // exponent adjust + const float magic = Eigen::numext::bit_cast<float>(static_cast<uint32_t>(113 << 23)); + const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift + uint32_t o_bits = (h.x & 0x7fff) << 13; // exponent/mantissa bits + const uint32_t exp = shifted_exp & o_bits; // just the exponent + o_bits += (127 - 15) << 23; // exponent adjust // handle exponent special cases - if (exp == shifted_exp) { // Inf/NaN? - o.u += (128 - 16) << 23; // extra exp adjust - } else if (exp == 0) { // Zero/Denormal? - o.u += 1 << 23; // extra exp adjust - o.f -= magic.f; // renormalize + if (exp == shifted_exp) { // Inf/NaN? + o_bits += (128 - 16) << 23; // extra exp adjust + } else if (exp == 0) { // Zero/Denormal? + o_bits += 1 << 23; // extra exp adjust + // renormalize + o_bits = Eigen::numext::bit_cast<uint32_t>(Eigen::numext::bit_cast<float>(o_bits) - magic); } - o.u |= (h.x & 0x8000) << 16; // sign bit - return o.f; + o_bits |= (h.x & 0x8000) << 16; // sign bit + return Eigen::numext::bit_cast<float>(o_bits); #endif }
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index fcde64a..2488be4 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -497,7 +497,7 @@ namespace internal { template <typename Scalar> struct stem_function { - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; } // namespace internal
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 2e3f59e..39a117e 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h
@@ -698,6 +698,12 @@ } template <typename A, typename B> +inline constexpr int size_prefer_fixed(A a, B b) { + plain_enum_asserts(a, b); + return int(a) == Dynamic ? int(b) : int(a); +} + +template <typename A, typename B> inline constexpr bool enum_eq_not_dynamic(A a, B b) { plain_enum_asserts(a, b); if ((int)a == Dynamic || (int)b == Dynamic) return false; @@ -745,6 +751,9 @@ constexpr bool is_constant_evaluated() { return false; } #endif +template <typename Scalar> +using make_complex_t = std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar>>; + } // end namespace internal } // end namespace Eigen
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index cecbee8..a42bb0f 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h
@@ -885,8 +885,12 @@ }; template <typename T, bool Vectorized> -struct scalar_div_cost<std::complex<T>, Vectorized> { - enum { value = 2 * scalar_div_cost<T>::value + 6 * NumTraits<T>::MulCost + 3 * NumTraits<T>::AddCost }; +struct scalar_div_cost<T, Vectorized, std::enable_if_t<NumTraits<T>::IsComplex>> { + using RealScalar = typename NumTraits<T>::Real; + enum { + value = + 2 * scalar_div_cost<RealScalar>::value + 6 * NumTraits<RealScalar>::MulCost + 3 * NumTraits<RealScalar>::AddCost + }; }; template <bool Vectorized>
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index 60a24a8..50fa3b8 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -70,7 +70,7 @@ * \c float or \c double) and just \c Scalar if #Scalar is * complex. */ - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; /** \brief Type for vector of eigenvalues as returned by eigenvalues(). *
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h index a33e46e..22433f2 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -75,7 +75,7 @@ * \c float or \c double) and just \c Scalar if #Scalar is * complex. */ - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; /** \brief Type for the matrices in the Schur decomposition. *
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index f73d58f..9dba7bd 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -89,7 +89,7 @@ * \c float or \c double) and just \c Scalar if #Scalar is * complex. */ - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; /** \brief Type for vector of eigenvalues as returned by eigenvalues(). *
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index b114cfa..c0a61dc 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -83,7 +83,7 @@ * \c float or \c double) and just \c Scalar if #Scalar is * complex. */ - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; /** \brief Type for vector of real scalar values eigenvalues as returned by betas(). *
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index 3466f51..5915387 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -69,7 +69,7 @@ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h index 5cef658..54a74e2 100644 --- a/Eigen/src/Eigenvalues/RealSchur.h +++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -66,7 +66,7 @@ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index e3154b4..8fdeb84 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -31,8 +31,6 @@ template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner> bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, const Preconditioner& precond, Index& iters, typename Dest::RealScalar& tol_error) { - using std::abs; - using std::sqrt; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; typedef Matrix<Scalar, Dynamic, 1> VectorType; @@ -43,14 +41,15 @@ VectorType r = rhs - mat * x; VectorType r0 = r; - RealScalar r0_sqnorm = r0.squaredNorm(); - RealScalar rhs_sqnorm = rhs.squaredNorm(); - if (rhs_sqnorm == 0) { + RealScalar r0_norm = r0.stableNorm(); + RealScalar r_norm = r0_norm; + RealScalar rhs_norm = rhs.stableNorm(); + if (rhs_norm == 0) { x.setZero(); return true; } Scalar rho(1); - Scalar alpha(1); + Scalar alpha(0); Scalar w(1); VectorType v = VectorType::Zero(n), p = VectorType::Zero(n); @@ -59,21 +58,22 @@ VectorType s(n), t(n); - RealScalar tol2 = tol * tol * rhs_sqnorm; - RealScalar eps2 = NumTraits<Scalar>::epsilon() * NumTraits<Scalar>::epsilon(); + RealScalar eps = NumTraits<Scalar>::epsilon(); Index i = 0; Index restarts = 0; - while (r.squaredNorm() > tol2 && i < maxIters) { + while (r_norm > tol && i < maxIters) { Scalar rho_old = rho; - rho = r0.dot(r); - if (abs(rho) < eps2 * r0_sqnorm) { + if (Eigen::numext::abs(rho) / Eigen::numext::maxi(r0_norm, r_norm) < eps * Eigen::numext::mini(r0_norm, r_norm)) { // The new residual vector became too orthogonal to the arbitrarily chosen direction r0 // Let's restart with a new r0: r = rhs - mat * x; r0 = r; - rho = r0_sqnorm = r.squaredNorm(); + rho = r.squaredNorm(); + r0_norm = r.stableNorm(); + alpha = Scalar(0); + w = Scalar(1); if (restarts++ == 0) i = 0; } Scalar beta = (rho / rho_old) * (alpha / w); @@ -82,23 +82,38 @@ y = precond.solve(p); v.noalias() = mat * y; - - alpha = rho / r0.dot(v); + Scalar theta = r0.dot(v); + // For small angles ∠(r0, v) < eps, random restart. + RealScalar v_norm = v.stableNorm(); + if (Eigen::numext::abs(theta) / Eigen::numext::maxi(r0_norm, v_norm) < eps * Eigen::numext::mini(r0_norm, v_norm)) { + r = rhs - mat * x; + r0.setRandom(); + r0_norm = r0.stableNorm(); + rho = Scalar(1); + alpha = Scalar(0); + w = Scalar(1); + if (restarts++ == 0) i = 0; + continue; + } + alpha = rho / theta; s = r - alpha * v; z = precond.solve(s); t.noalias() = mat * z; RealScalar tmp = t.squaredNorm(); - if (tmp > RealScalar(0)) + if (tmp > RealScalar(0)) { w = t.dot(s) / tmp; - else + } else { w = Scalar(0); + } x += alpha * y + w * z; r = s - w * t; + r_norm = r.stableNorm(); ++i; } - tol_error = sqrt(r.squaredNorm() / rhs_sqnorm); + + tol_error = r_norm / rhs_norm; iters = i; return true; }
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 466834a..3e57764 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h
@@ -243,7 +243,10 @@ the LU decomposition. */ inline RealScalar rcond() const { - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + eigen_assert(m_isInitialized && "FullPivLU is not initialized."); + if (!isInvertible()) { + return RealScalar(0); + } return internal::rcond_estimate_helper(m_l1_norm, *this); }
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis index da89857..22cf346 100644 --- a/debug/msvc/eigen.natvis +++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@ -<?xml version="1.0" encoding="utf-8"?> - -<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010"> - - <!-- Fixed x Fixed Matrix --> - <Type Name="Eigen::Matrix<*,*,*,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> - <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : $T3</Size> - <ValuePointer>m_storage.m_data.array</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : $T3</Size> - <ValuePointer>m_storage.m_data.array</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- 2 x 2 Matrix --> - <Type Name="Eigen::Matrix<*,2,2,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/> - <DisplayString>[2, 2] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- 3 x 3 Matrix --> - <Type Name="Eigen::Matrix<*,3,3,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/> - <DisplayString>[3, 3] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- 4 x 4 Matrix --> - <Type Name="Eigen::Matrix<*,4,4,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/> - <DisplayString>[4, 4] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 3]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 3]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- Dynamic x Dynamic Matrix --> - <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Fixed x Dynamic Matrix --> - <Type Name="Eigen::Matrix<*,*,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic x Fixed Matrix --> - <Type Name="Eigen::Matrix<*,-1,*,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : $T2</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : $T2</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic Column Vector --> - <Type Name="Eigen::Matrix<*,1,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString> - <Expand> - <Item Name="[size]">m_storage.m_cols</Item> - <ArrayItems> - <Size>m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic Row Vector --> - <Type Name="Eigen::Matrix<*,-1,1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString> - <Expand> - <Item Name="[size]">m_storage.m_rows</Item> - <ArrayItems> - <Size>m_storage.m_rows</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Fixed Vector --> - <Type Name="Eigen::Matrix<*,1,1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/> - <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,2,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/> - <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,3,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/> - <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - <Item Name="[z]">m_storage.m_data.array[2]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,4,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/> - <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - <Item Name="[z]">m_storage.m_data.array[2]</Item> - <Item Name="[w]">m_storage.m_data.array[3]</Item> - </Expand> - </Type> - -</AutoVisualizer> +<?xml version="1.0" encoding="utf-8"?> + +<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010"> + + <!-- Fixed x Fixed Matrix --> + <Type Name="Eigen::Matrix<*,*,*,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> + <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : $T3</Size> + <ValuePointer>m_storage.m_data.array</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : $T3</Size> + <ValuePointer>m_storage.m_data.array</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- 2 x 2 Matrix --> + <Type Name="Eigen::Matrix<*,2,2,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/> + <DisplayString>[2, 2] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- 3 x 3 Matrix --> + <Type Name="Eigen::Matrix<*,3,3,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/> + <DisplayString>[3, 3] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- 4 x 4 Matrix --> + <Type Name="Eigen::Matrix<*,4,4,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/> + <DisplayString>[4, 4] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 3]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 3]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- Dynamic x Dynamic Matrix --> + <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Fixed x Dynamic Matrix --> + <Type Name="Eigen::Matrix<*,*,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic x Fixed Matrix --> + <Type Name="Eigen::Matrix<*,-1,*,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : $T2</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : $T2</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic Column Vector --> + <Type Name="Eigen::Matrix<*,1,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString> + <Expand> + <Item Name="[size]">m_storage.m_cols</Item> + <ArrayItems> + <Size>m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic Row Vector --> + <Type Name="Eigen::Matrix<*,-1,1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString> + <Expand> + <Item Name="[size]">m_storage.m_rows</Item> + <ArrayItems> + <Size>m_storage.m_rows</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Fixed Vector --> + <Type Name="Eigen::Matrix<*,1,1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/> + <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,2,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/> + <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,3,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/> + <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + <Item Name="[z]">m_storage.m_data.array[2]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,4,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/> + <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + <Item Name="[z]">m_storage.m_data.array[2]</Item> + <Item Name="[w]">m_storage.m_data.array[3]</Item> + </Expand> + </Type> + +</AutoVisualizer>
diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat index 35ef580..273c10d 100644 --- a/debug/msvc/eigen_autoexp_part.dat +++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@ -; *************************************************************** -; * Eigen Visualizer -; * -; * Author: Hauke Heibel <hauke.heibel@gmail.com> -; * -; * Support the enhanced debugging of the following Eigen -; * types (*: any, +:fixed dimension) : -; * -; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> -; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> -; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> -; * - Eigen::Matrix<*,-1,-1,*,*,*> -; * - Eigen::Matrix<*,+,-1,*,*,*> -; * - Eigen::Matrix<*,-1,+,*,*,*> -; * - Eigen::Matrix<*,+,+,*,*,*> -; * -; * Matrices are displayed properly independently of the memory -; * alignment (RowMajor vs. ColMajor). -; * -; * This file is distributed WITHOUT ANY WARRANTY. Please ensure -; * that your original autoexp.dat file is copied to a safe -; * place before proceeding with its modification. -; *************************************************************** - -[Visualizer] - -; Fixed size 4-vectors -Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2], - w : ($c.m_storage.m_data.array)[3] - ) - ) - - preview - ( - #( - "[", - 4, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 4), - ")" - ) - ) -} - -; Fixed size 3-vectors -Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2] - ) - ) - - preview - ( - #( - "[", - 3, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 3), - ")" - ) - ) -} - -; Fixed size 2-vectors -Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1] - ) - ) - - preview - ( - #( - "[", - 2, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 2), - ")" - ) - ) -} - -; Fixed size 1-vectors -Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0] - ) - ) - - preview - ( - #( - "[", - 1, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 1), - ")" - ) - ) -} - -; Dynamic matrices (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.RowsAtCompileTime*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.ColsAtCompileTime - ), - ")" - ) - ) -} - -; Fixed size matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data.array)[$i],g], - size : $c.RowsAtCompileTime*$c.ColsAtCompileTime - ), - ")" - ) - ) -} +; *************************************************************** +; * Eigen Visualizer +; * +; * Author: Hauke Heibel <hauke.heibel@gmail.com> +; * +; * Support the enhanced debugging of the following Eigen +; * types (*: any, +:fixed dimension) : +; * +; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> +; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> +; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> +; * - Eigen::Matrix<*,-1,-1,*,*,*> +; * - Eigen::Matrix<*,+,-1,*,*,*> +; * - Eigen::Matrix<*,-1,+,*,*,*> +; * - Eigen::Matrix<*,+,+,*,*,*> +; * +; * Matrices are displayed properly independently of the memory +; * alignment (RowMajor vs. ColMajor). +; * +; * This file is distributed WITHOUT ANY WARRANTY. Please ensure +; * that your original autoexp.dat file is copied to a safe +; * place before proceeding with its modification. +; *************************************************************** + +[Visualizer] + +; Fixed size 4-vectors +Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2], + w : ($c.m_storage.m_data.array)[3] + ) + ) + + preview + ( + #( + "[", + 4, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 4), + ")" + ) + ) +} + +; Fixed size 3-vectors +Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2] + ) + ) + + preview + ( + #( + "[", + 3, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 3), + ")" + ) + ) +} + +; Fixed size 2-vectors +Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1] + ) + ) + + preview + ( + #( + "[", + 2, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 2), + ")" + ) + ) +} + +; Fixed size 1-vectors +Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0] + ) + ) + + preview + ( + #( + "[", + 1, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 1), + ")" + ) + ) +} + +; Dynamic matrices (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.RowsAtCompileTime*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.ColsAtCompileTime + ), + ")" + ) + ) +} + +; Fixed size matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data.array)[$i],g], + size : $c.RowsAtCompileTime*$c.ColsAtCompileTime + ), + ")" + ) + ) +}
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 439336c..0e582a7 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in
@@ -112,11 +112,13 @@ DISABLE_INDEX = YES FULL_SIDEBAR = NO ENUM_VALUES_PER_LINE = 1 -FORMULA_FONTSIZE = 12 +USE_MATHJAX = @EIGEN_DOXY_USE_MATHJAX@ MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 MATHJAX_EXTENSIONS = TeX/AMSmath \ TeX/AMSsymbols GENERATE_LATEX = NO +EXTRA_PACKAGES = amssymb \ + amsmath MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = YES PREDEFINED = EIGEN_EMPTY_STRUCT \ @@ -160,6 +162,7 @@ TAGFILES = ${EIGEN_DOXY_TAGFILES} GENERATE_TAGFILE = ${Eigen_BINARY_DIR}/doc/${EIGEN_DOXY_PROJECT_NAME}.doxytags EXTERNAL_GROUPS = NO +EXTERNAL_PAGES = NO HIDE_UNDOC_RELATIONS = NO HAVE_DOT = YES COLLABORATION_GRAPH = NO @@ -172,3 +175,4 @@ DOT_GRAPH_MAX_NODES = 300 GENERATE_DEPRECATEDLIST = NO GENERATE_TODOLIST = NO +WARN_AS_ERROR = FAIL_ON_WARNINGS_PRINT
diff --git a/test/CustomComplex.h b/test/CustomComplex.h new file mode 100644 index 0000000..048f65b --- /dev/null +++ b/test/CustomComplex.h
@@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 The Eigen Authors. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TEST_CUSTOM_COMPLEX_H +#define EIGEN_TEST_CUSTOM_COMPLEX_H + +#include <ostream> +#include <sstream> + +namespace custom_complex { + +template <typename Real> +struct CustomComplex { + CustomComplex() : re{0}, im{0} {} + CustomComplex(const CustomComplex& other) = default; + CustomComplex(CustomComplex&& other) = default; + CustomComplex& operator=(const CustomComplex& other) = default; + CustomComplex& operator=(CustomComplex&& other) = default; + CustomComplex(Real x) : re{x}, im{0} {} + CustomComplex(Real x, Real y) : re{x}, im{y} {} + + CustomComplex operator+(const CustomComplex& other) const { return CustomComplex(re + other.re, im + other.im); } + + CustomComplex operator-() const { return CustomComplex(-re, -im); } + + CustomComplex operator-(const CustomComplex& other) const { return CustomComplex(re - other.re, im - other.im); } + + CustomComplex operator*(const CustomComplex& other) const { + return CustomComplex(re * other.re - im * other.im, re * other.im + im * other.re); + } + + CustomComplex operator/(const CustomComplex& other) const { + // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf), + // guards against over/under-flow. + const bool scale_imag = numext::abs(other.im) <= numext::abs(other.re); + const Real rscale = scale_imag ? Real(1) : other.re / other.im; + const Real iscale = scale_imag ? other.im / other.re : Real(1); + const Real denominator = other.re * rscale + other.im * iscale; + return CustomComplex((re * rscale + im * iscale) / denominator, (im * rscale - re * iscale) / denominator); + } + + CustomComplex& operator+=(const CustomComplex& other) { + *this = *this + other; + return *this; + } + CustomComplex& operator-=(const CustomComplex& other) { + *this = *this - other; + return *this; + } + CustomComplex& operator*=(const CustomComplex& other) { + *this = *this * other; + return *this; + } + CustomComplex& operator/=(const CustomComplex& other) { + *this = *this / other; + return *this; + } + + bool operator==(const CustomComplex& other) const { + return numext::equal_strict(re, other.re) && numext::equal_strict(im, other.im); + } + bool operator!=(const CustomComplex& other) const { return !(*this == other); } + + friend CustomComplex operator+(const Real& a, const CustomComplex& b) { return CustomComplex(a + b.re, b.im); } + + friend CustomComplex operator-(const Real& a, const CustomComplex& b) { return CustomComplex(a - b.re, -b.im); } + + friend CustomComplex operator*(const Real& a, const CustomComplex& b) { return CustomComplex(a * b.re, a * b.im); } + + friend CustomComplex operator*(const CustomComplex& a, const Real& b) { return CustomComplex(a.re * b, a.im * b); } + + friend CustomComplex operator/(const CustomComplex& a, const Real& b) { return CustomComplex(a.re / b, a.im / b); } + + friend std::ostream& operator<<(std::ostream& stream, const CustomComplex& x) { + std::stringstream ss; + ss << "(" << x.re << ", " << x.im << ")"; + stream << ss.str(); + return stream; + } + + Real re; + Real im; +}; + +template <typename Real> +Real real(const CustomComplex<Real>& x) { + return x.re; +} +template <typename Real> +Real imag(const CustomComplex<Real>& x) { + return x.im; +} +template <typename Real> +CustomComplex<Real> conj(const CustomComplex<Real>& x) { + return CustomComplex<Real>(x.re, -x.im); +} +template <typename Real> +CustomComplex<Real> sqrt(const CustomComplex<Real>& x) { + return Eigen::internal::complex_sqrt(x); +} +template <typename Real> +Real abs(const CustomComplex<Real>& x) { + return Eigen::numext::sqrt(x.re * x.re + x.im * x.im); +} + +} // namespace custom_complex + +template <typename Real> +using CustomComplex = custom_complex::CustomComplex<Real>; + +namespace Eigen { +template <typename Real> +struct NumTraits<CustomComplex<Real>> : NumTraits<Real> { + enum { IsComplex = 1 }; +}; + +namespace numext { +template <typename Real> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool(isfinite)(const CustomComplex<Real>& x) { + return (numext::isfinite)(x.re) && (numext::isfinite)(x.im); +} + +} // namespace numext +} // namespace Eigen + +#endif // EIGEN_TEST_CUSTOM_COMPLEX_H
diff --git a/test/bicgstab.cpp b/test/bicgstab.cpp index 3f53e3e..7ff2f3d 100644 --- a/test/bicgstab.cpp +++ b/test/bicgstab.cpp
@@ -26,8 +26,63 @@ // CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ssor) ); } +// https://gitlab.com/libeigen/eigen/-/issues/2856 +void test_2856() { + Eigen::MatrixXd D = Eigen::MatrixXd::Identity(14, 14); + D(6, 13) = 1; + D(13, 12) = 1; + using CSRMatrix = Eigen::SparseMatrix<double, Eigen::RowMajor>; + CSRMatrix A = D.sparseView(); + + Eigen::VectorXd b = Eigen::VectorXd::Zero(14); + b(12) = -1001; + + Eigen::BiCGSTAB<CSRMatrix> solver; + solver.compute(A); + Eigen::VectorXd x = solver.solve(b); + Eigen::VectorXd expected = Eigen::VectorXd::Zero(14); + expected(6) = -1001; + expected(12) = -1001; + expected(13) = 1001; + VERIFY_IS_EQUAL(x, expected); + + Eigen::VectorXd residual = b - A * x; + VERIFY(residual.isZero()); +} + +// https://gitlab.com/libeigen/eigen/-/issues/2899 +void test_2899() { + Eigen::MatrixXd A(4, 4); + A(0, 0) = 1; + A(1, 0) = -1.0 / 6; + A(1, 1) = 2.0 / 3; + A(1, 2) = -1.0 / 6; + A(1, 3) = -1.0 / 3; + A(2, 1) = -1.0 / 3; + A(2, 2) = 1; + A(2, 3) = -2.0 / 3; + A(3, 1) = -1.0 / 3; + A(3, 2) = -1.0 / 3; + A(3, 3) = 2.0 / 3; + Eigen::VectorXd b(4); + b(0) = 0; + b(1) = 1; + b(2) = 1; + b(3) = 1; + Eigen::BiCGSTAB<Eigen::MatrixXd> solver; + solver.compute(A); + Eigen::VectorXd x = solver.solve(b); + Eigen::VectorXd expected(4); + expected << 0, 15, 18, 18; + VERIFY_IS_APPROX(x, expected); + Eigen::VectorXd residual = b - A * x; + VERIFY(residual.isZero()); +} + EIGEN_DECLARE_TEST(bicgstab) { CALL_SUBTEST_1((test_bicgstab_T<double, int>())); CALL_SUBTEST_2((test_bicgstab_T<std::complex<double>, int>())); CALL_SUBTEST_3((test_bicgstab_T<double, long int>())); + CALL_SUBTEST_4(test_2856()); + CALL_SUBTEST_5(test_2899()); }
diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index afb24b9..76846a9 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp
@@ -12,6 +12,7 @@ #include <limits> #include <Eigen/Eigenvalues> #include <Eigen/LU> +#include "CustomComplex.h" template <typename MatrixType> bool find_pivot(typename MatrixType::Scalar tol, MatrixType& diffs, Index col = 0) { @@ -165,5 +166,8 @@ // Test problem size constructors CALL_SUBTEST_5(ComplexEigenSolver<MatrixXf> tmp(s)); + // Test custom complex scalar type. + CALL_SUBTEST_6(eigensolver(Matrix<CustomComplex<double>, 5, 5>())); + TEST_SET_BUT_UNUSED_VARIABLE(s) }
diff --git a/test/lu.cpp b/test/lu.cpp index 1792c2b..b20bcfc 100644 --- a/test/lu.cpp +++ b/test/lu.cpp
@@ -195,6 +195,29 @@ VERIFY_RAISES_ASSERT(plu.inverse()) } +// Rank-deficient matrix returns 0. +// https://gitlab.com/libeigen/eigen/-/issues/2889 +void test_2889() { + Eigen::MatrixXd A = + Eigen::MatrixXd({{0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000, + 0.34149999916553497, 0.0000000000000000, 0.79877008515664061}, + {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.29200000315904617, + 0.0000000000000000, -0.37149999849498272, -0.16425902650844920}, + {0.0000000000000000, 0.0000000000000000, 1.0000000000000000, 0.0000000000000000, + 0.34149999916553497, 0.0000000000000000, 0.79877008515664061}, + {0.0000000000000000, 1.0000000000000000, 0.0000000000000000, 0.040500000119209290, + 0.0000000000000000, -0.30099999904632568, -0.081170580429391403}, + {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, + 0.0000000000000000, 0.0000000000000000, -0.0000000000000000}, + {0.0000000000000000, 0.70710689672598170, 0.70710666564709435, 0.027000000700354562, + 0.025455838867477515, -0.025455847186317101, -0.0068972271572272821}, + {1.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, + 0.0000000000000000, 0.0000000000000000, -0.0000000000000000}}); + Eigen::FullPivLU<Eigen::MatrixXd> lu_factorization(A); + double rcond = lu_factorization.rcond(); + VERIFY_IS_EQUAL(rcond, 0.0); +} + EIGEN_DECLARE_TEST(lu) { for (int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1(lu_non_invertible<Matrix3f>()); @@ -229,7 +252,9 @@ CALL_SUBTEST_7((lu_non_invertible<Matrix<float, Dynamic, 16> >())); // Test problem size constructors - CALL_SUBTEST_9(PartialPivLU<MatrixXf>(10)); - CALL_SUBTEST_9(FullPivLU<MatrixXf>(10, 20);); + CALL_SUBTEST_8(PartialPivLU<MatrixXf>(10)); + CALL_SUBTEST_8(FullPivLU<MatrixXf>(10, 20);); + + CALL_SUBTEST_9(test_2889()); } }
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index dc1a5c7..724fa40 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp
@@ -251,6 +251,24 @@ (Matrix1::Flags & RowMajorBit) ? PacketSize : 2 > (1, 2), DefaultTraversal, CompleteUnrolling)); } + // the actual packet type used by the assignment evaluator is not necessarily PacketType for small fixed-size arrays + if (internal::unpacket_traits<typename internal::find_best_packet<Scalar, 2>::type>::size > 2) { + // the expression should not be vectorized if the size is too small + using Vector2 = Matrix<Scalar, 2, 1, ColMajor>; + using VectorMax3 = Matrix<Scalar, Dynamic, 1, ColMajor, 3, 1>; + VERIFY(test_assign(Vector2(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling)); + VERIFY(test_assign(VectorMax3(), Vector2(), LinearTraversal, InnerUnrolling + CompleteUnrolling)); + VERIFY(test_assign(Vector2(), VectorMax3(), LinearTraversal, InnerUnrolling + CompleteUnrolling)); + VERIFY(test_assign(VectorMax3(), VectorMax3(), LinearTraversal, NoUnrolling)); + } + + if (PacketSize > 1 && PacketSize < 8) { + // the size of the expression should be deduced at compile time by considering both the lhs and rhs + using Lhs = Matrix<Scalar, 7, Dynamic, ColMajor>; + using Rhs = Matrix<Scalar, Dynamic, 7, ColMajor>; + VERIFY(test_assign(Lhs(), Rhs(), -1, InnerUnrolling + CompleteUnrolling)); + } + VERIFY( test_redux(Matrix44c().template block<2 * PacketSize, 1>(1, 2), LinearVectorizedTraversal, CompleteUnrolling));
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 2a59530..aad1647 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -27,6 +27,10 @@ static constexpr int NumDimensions = XprTraits::NumDimensions; static constexpr int Layout = XprTraits::Layout; typedef typename XprTraits::PointerType PointerType; + enum { + // Broadcast is read-only. + Flags = traits<XprType>::Flags & ~LvalueBit + }; }; template <typename Broadcast, typename XprType>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 7521559..b9d6f37 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -15,7 +15,7 @@ namespace Eigen { -template <bool NeedUprade> +template <bool IsReal> struct MakeComplex { template <typename T> EIGEN_DEVICE_FUNC T operator()(const T& val) const { @@ -26,16 +26,8 @@ template <> struct MakeComplex<true> { template <typename T> - EIGEN_DEVICE_FUNC std::complex<T> operator()(const T& val) const { - return std::complex<T>(val, 0); - } -}; - -template <> -struct MakeComplex<false> { - template <typename T> - EIGEN_DEVICE_FUNC std::complex<T> operator()(const std::complex<T>& val) const { - return val; + EIGEN_DEVICE_FUNC internal::make_complex_t<T> operator()(const T& val) const { + return internal::make_complex_t<T>(val, T(0)); } }; @@ -49,17 +41,17 @@ template <> struct PartOf<RealPart> { - template <typename T> - T operator()(const std::complex<T>& val) const { - return val.real(); + template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>> + typename NumTraits<T>::Real operator()(const T& val) const { + return Eigen::numext::real(val); } }; template <> struct PartOf<ImagPart> { - template <typename T> - T operator()(const std::complex<T>& val) const { - return val.imag(); + template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>> + typename NumTraits<T>::Real operator()(const T& val) const { + return Eigen::numext::imag(val); } }; @@ -67,8 +59,9 @@ template <typename FFT, typename XprType, int FFTResultType, int FFTDir> struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { typedef traits<XprType> XprTraits; - typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; + typedef typename XprTraits::Scalar Scalar; + typedef typename NumTraits<Scalar>::Real RealScalar; + typedef make_complex_t<Scalar> ComplexScalar; typedef typename XprTraits::Scalar InputScalar; typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar; @@ -109,7 +102,7 @@ public: typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar; typedef OutputScalar CoeffReturnType; @@ -137,7 +130,7 @@ typedef DSizes<Index, NumDims> Dimensions; typedef typename XprType::Scalar Scalar; typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; typedef internal::traits<XprType> XprTraits; typedef typename XprTraits::Scalar InputScalar;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h index 7061f51..e12923d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
@@ -52,7 +52,13 @@ typedef TensorEvaluator<Expr, Device> EvalType; TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { - m_dims = m_impl.dimensions(); + EIGEN_STATIC_ASSERT( + internal::array_size<Dimensions>::value == internal::array_size<typename EvalType::Dimensions>::value, + "Dimension sizes must match."); + const auto& other_dims = m_impl.dimensions(); + for (std::size_t i = 0; i < m_dims.size(); ++i) { + m_dims[i] = other_dims[i]; + } m_impl.evalSubExprsIfNeeded(NULL); } virtual ~TensorLazyEvaluatorReadOnly() { m_impl.cleanup(); } @@ -86,14 +92,12 @@ EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { return this->m_impl.coeffRef(index); } }; -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluator : public std::conditional_t<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > { +template <typename Dimensions, typename Expr, typename Device, bool IsWritable> +class TensorLazyEvaluator : public std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, + TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>> { public: - typedef std::conditional_t<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> > + typedef std::conditional_t<IsWritable, TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, + TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device>> Base; typedef typename Base::Scalar Scalar; @@ -101,24 +105,15 @@ virtual ~TensorLazyEvaluator() {} }; -} // namespace internal - -/** \class TensorRef - * \ingroup CXX11_Tensor_Module - * - * \brief A reference to a tensor expression - * The expression will be evaluated lazily (as much as possible). - * - */ -template <typename PlainObjectType> -class TensorRef : public TensorBase<TensorRef<PlainObjectType> > { +template <typename Derived> +class TensorRefBase : public TensorBase<Derived> { public: - typedef TensorRef<PlainObjectType> Self; + typedef typename traits<Derived>::PlainObjectType PlainObjectType; typedef typename PlainObjectType::Base Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; - typedef typename internal::traits<PlainObjectType>::Index Index; - typedef typename internal::traits<PlainObjectType>::Scalar Scalar; + typedef typename Eigen::internal::nested<Derived>::type Nested; + typedef typename traits<PlainObjectType>::StorageKind StorageKind; + typedef typename traits<PlainObjectType>::Index Index; + typedef typename traits<PlainObjectType>::Scalar Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; typedef Scalar* PointerType; @@ -138,33 +133,17 @@ }; //===- Tensor block evaluation strategy (see TensorBlock.h) -----------===// - typedef internal::TensorBlockNotImplemented TensorBlock; + typedef TensorBlockNotImplemented TensorBlock; //===------------------------------------------------------------------===// - EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {} + EIGEN_STRONG_INLINE TensorRefBase() : m_evaluator(NULL) {} - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef(const Expression& expr) - : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) { - m_evaluator->incrRefCount(); - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) { - unrefEvaluator(); - m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice()); - m_evaluator->incrRefCount(); - return *this; - } - - ~TensorRef() { unrefEvaluator(); } - - TensorRef(const TensorRef& other) : TensorBase<TensorRef<PlainObjectType> >(other), m_evaluator(other.m_evaluator) { + TensorRefBase(const TensorRefBase& other) : TensorBase<Derived>(other), m_evaluator(other.m_evaluator) { eigen_assert(m_evaluator->refCount() > 0); m_evaluator->incrRefCount(); } - TensorRef& operator=(const TensorRef& other) { + TensorRefBase& operator=(const TensorRefBase& other) { if (this != &other) { unrefEvaluator(); m_evaluator = other.m_evaluator; @@ -174,6 +153,28 @@ return *this; } + template <typename Expression, + typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>> + EIGEN_STRONG_INLINE TensorRefBase(const Expression& expr) + : m_evaluator(new TensorLazyEvaluator<Dimensions, Expression, DefaultDevice, + /*IsWritable=*/!std::is_const<PlainObjectType>::value && + bool(is_lvalue<Expression>::value)>(expr, DefaultDevice())) { + m_evaluator->incrRefCount(); + } + + template <typename Expression, + typename EnableIf = std::enable_if_t<!std::is_same<std::decay_t<Expression>, Derived>::value>> + EIGEN_STRONG_INLINE TensorRefBase& operator=(const Expression& expr) { + unrefEvaluator(); + m_evaluator = new TensorLazyEvaluator < Dimensions, Expression, DefaultDevice, + /*IsWritable=*/!std::is_const<PlainObjectType>::value&& bool(is_lvalue<Expression>::value) > + (expr, DefaultDevice()); + m_evaluator->incrRefCount(); + return *this; + } + + ~TensorRefBase() { unrefEvaluator(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); } @@ -188,12 +189,6 @@ const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; return coeff(indices); } - template <typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) { - const std::size_t num_indices = (sizeof...(otherIndices) + 1); - const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; - return coeffRef(indices); - } template <std::size_t NumIndices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const { @@ -212,6 +207,70 @@ } return m_evaluator->coeff(index); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); } + + protected: + TensorLazyBaseEvaluator<Dimensions, Scalar>* evaluator() { return m_evaluator; } + + private: + EIGEN_STRONG_INLINE void unrefEvaluator() { + if (m_evaluator) { + m_evaluator->decrRefCount(); + if (m_evaluator->refCount() == 0) { + delete m_evaluator; + } + } + } + + TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator; +}; + +} // namespace internal + +/** + * \ingroup CXX11_Tensor_Module + * + * \brief A reference to a tensor expression + * The expression will be evaluated lazily (as much as possible). + * + */ +template <typename PlainObjectType> +class TensorRef : public internal::TensorRefBase<TensorRef<PlainObjectType>> { + typedef internal::TensorRefBase<TensorRef<PlainObjectType>> Base; + + public: + using Scalar = typename Base::Scalar; + using Dimensions = typename Base::Dimensions; + + EIGEN_STRONG_INLINE TensorRef() : Base() {} + + template <typename Expression> + EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) { + EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value, + "Expression must be mutable to create a mutable TensorRef<Expression>. Did you mean " + "TensorRef<const Expression>?)"); + } + + template <typename Expression> + EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) { + EIGEN_STATIC_ASSERT(internal::is_lvalue<Expression>::value, + "Expression must be mutable to create a mutable TensorRef<Expression>. Did you mean " + "TensorRef<const Expression>?)"); + return Base::operator=(expr).derived(); + } + + TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); } + + template <typename... IndexTypes> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) { + const std::size_t num_indices = (sizeof...(otherIndices) + 1); + const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; + return coeffRef(indices); + } + template <std::size_t NumIndices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) { const Dimensions& dims = this->dimensions(); @@ -227,24 +286,37 @@ index = index * dims[i] + indices[i]; } } - return m_evaluator->coeffRef(index); + return Base::evaluator()->coeffRef(index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_evaluator->coeff(index); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return Base::evaluator()->coeffRef(index); } +}; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_evaluator->coeffRef(index); } +/** + * \ingroup CXX11_Tensor_Module + * + * \brief A reference to a constant tensor expression + * The expression will be evaluated lazily (as much as possible). + * + */ +template <typename PlainObjectType> +class TensorRef<const PlainObjectType> : public internal::TensorRefBase<TensorRef<const PlainObjectType>> { + typedef internal::TensorRefBase<TensorRef<const PlainObjectType>> Base; - private: - EIGEN_STRONG_INLINE void unrefEvaluator() { - if (m_evaluator) { - m_evaluator->decrRefCount(); - if (m_evaluator->refCount() == 0) { - delete m_evaluator; - } - } + public: + EIGEN_STRONG_INLINE TensorRef() : Base() {} + + template <typename Expression> + EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : Base(expr) {} + + template <typename Expression> + EIGEN_STRONG_INLINE TensorRef& operator=(const Expression& expr) { + return Base::operator=(expr).derived(); } - internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator; + TensorRef(const TensorRef& other) : Base(other) {} + + TensorRef& operator=(const TensorRef& other) { return Base::operator=(other).derived(); } }; // evaluator for rvalues
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 017b4ff..f5954d6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -95,8 +95,9 @@ typedef typename MakePointer<Scalar>::Type PointerType; }; -template <typename PlainObjectType> -struct traits<TensorRef<PlainObjectType> > : public traits<PlainObjectType> { +template <typename PlainObjectType_> +struct traits<TensorRef<PlainObjectType_> > : public traits<PlainObjectType_> { + typedef PlainObjectType_ PlainObjectType; typedef traits<PlainObjectType> BaseTraits; typedef typename BaseTraits::Scalar Scalar; typedef typename BaseTraits::StorageKind StorageKind;
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index 182bd2e..6f6df3e 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -111,12 +111,13 @@ typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Preconditioner_ Preconditioner; typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix; typedef Matrix<RealScalar, Dynamic, Dynamic> DenseRealMatrix; typedef Matrix<Scalar, Dynamic, 1> DenseVector; typedef Matrix<RealScalar, Dynamic, 1> DenseRealVector; - typedef Matrix<std::complex<RealScalar>, Dynamic, 1> ComplexVector; + typedef Matrix<ComplexScalar, Dynamic, 1> ComplexVector; /** Default constructor. */ DGMRES() @@ -389,15 +390,15 @@ Index j = 0; while (j < it - 1) { if (T(j + 1, j) == Scalar(0)) { - eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0)); + eig(j) = ComplexScalar(T(j, j), RealScalar(0)); j++; } else { - eig(j) = std::complex<RealScalar>(T(j, j), T(j + 1, j)); - eig(j + 1) = std::complex<RealScalar>(T(j, j + 1), T(j + 1, j + 1)); + eig(j) = ComplexScalar(T(j, j), T(j + 1, j)); + eig(j + 1) = ComplexScalar(T(j, j + 1), T(j + 1, j + 1)); j++; } } - if (j < it - 1) eig(j) = std::complex<RealScalar>(T(j, j), RealScalar(0)); + if (j < it - 1) eig(j) = ComplexScalar(T(j, j), RealScalar(0)); return eig; }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h index ff955e1..a28aa96 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -23,8 +23,10 @@ * * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$. */ -template <typename RealScalar> +template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex> struct MatrixExponentialScalingOp { + using RealScalar = typename NumTraits<Scalar>::Real; + /** \brief Constructor. * * \param[in] squarings The integer \f$ s \f$ in this document. @@ -35,20 +37,30 @@ * * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. */ - inline const RealScalar operator()(const RealScalar& x) const { + inline const Scalar operator()(const Scalar& x) const { using std::ldexp; - return ldexp(x, -m_squarings); + return Scalar(ldexp(Eigen::numext::real(x), -m_squarings), ldexp(Eigen::numext::imag(x), -m_squarings)); } - typedef std::complex<RealScalar> ComplexScalar; + private: + int m_squarings; +}; + +template <typename Scalar> +struct MatrixExponentialScalingOp<Scalar, /*IsComplex=*/false> { + /** \brief Constructor. + * + * \param[in] squarings The integer \f$ s \f$ in this document. + */ + MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) {} /** \brief Scale a matrix coefficient. * * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. */ - inline const ComplexScalar operator()(const ComplexScalar& x) const { + inline const Scalar operator()(const Scalar& x) const { using std::ldexp; - return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings)); + return ldexp(x, -m_squarings); } private: @@ -220,6 +232,7 @@ template <typename MatrixType> struct matrix_exp_computeUV<MatrixType, float> { + using Scalar = typename traits<MatrixType>::Scalar; template <typename ArgType> static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) { using std::frexp; @@ -234,7 +247,7 @@ const float maxnorm = 3.925724783138660f; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings)); + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings)); matrix_exp_pade7(A, U, V); } } @@ -242,12 +255,12 @@ template <typename MatrixType> struct matrix_exp_computeUV<MatrixType, double> { - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; + using Scalar = typename traits<MatrixType>::Scalar; template <typename ArgType> static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) { using std::frexp; using std::pow; - const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); + const double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); squarings = 0; if (l1norm < 1.495585217958292e-002) { matrix_exp_pade3(arg, U, V); @@ -258,10 +271,10 @@ } else if (l1norm < 2.097847961257068e+000) { matrix_exp_pade9(arg, U, V); } else { - const RealScalar maxnorm = 5.371920351148152; + const double maxnorm = 5.371920351148152; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings)); + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings)); matrix_exp_pade13(A, U, V); } } @@ -271,6 +284,7 @@ struct matrix_exp_computeUV<MatrixType, long double> { template <typename ArgType> static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) { + using Scalar = typename traits<MatrixType>::Scalar; #if LDBL_MANT_DIG == 53 // double precision matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings); @@ -295,7 +309,7 @@ const long double maxnorm = 4.0246098906697353063L; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings)); matrix_exp_pade13(A, U, V); } @@ -315,7 +329,7 @@ const long double maxnorm = 3.2579440895405400856599663723517L; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings)); matrix_exp_pade17(A, U, V); } @@ -335,7 +349,7 @@ const long double maxnorm = 2.884233277829519311757165057717815L; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); + MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<Scalar>(squarings)); matrix_exp_pade17(A, U, V); } @@ -382,9 +396,7 @@ void matrix_exp_compute(const ArgType& arg, ResultType& result, false_type) // default { typedef typename ArgType::PlainObject MatrixType; - typedef typename traits<MatrixType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; + typedef make_complex_t<typename traits<MatrixType>::Scalar> ComplexScalar; result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h index 68336a5..0c18ad6 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -382,7 +382,7 @@ static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime; static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime; - typedef std::complex<Scalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix; ComplexMatrix CA = A.template cast<ComplexScalar>(); @@ -476,7 +476,7 @@ typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType; typedef internal::remove_all_t<NestedEvalType> NestedEvalTypeClean; typedef internal::traits<NestedEvalTypeClean> Traits; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h index 4228166..398971e 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -330,7 +330,7 @@ typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType; typedef internal::remove_all_t<DerivedEvalType> DerivedEvalTypeClean; typedef internal::traits<DerivedEvalTypeClean> Traits; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType; typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h index bff619a..a420ee7 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -91,7 +91,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Block<MatrixType, Dynamic, Dynamic> ResultType; const MatrixType& m_A; @@ -380,7 +380,7 @@ Index cols() const { return m_A.cols(); } private: - typedef std::complex<RealScalar> ComplexScalar; + typedef internal::make_complex_t<Scalar> ComplexScalar; typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> ComplexMatrix; @@ -628,7 +628,7 @@ class MatrixComplexPowerReturnValue : public ReturnByValue<MatrixComplexPowerReturnValue<Derived> > { public: typedef typename Derived::PlainObject PlainObject; - typedef typename std::complex<typename Derived::RealScalar> ComplexScalar; + typedef internal::make_complex_t<typename Derived::Scalar> ComplexScalar; /** * \brief Constructor. @@ -685,7 +685,7 @@ } template <typename Derived> -const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const { +const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const internal::make_complex_t<Scalar>& p) const { return MatrixComplexPowerReturnValue<Derived>(derived(), p); }
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h index 8c0ce3b..aa357a4 100644 --- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -35,7 +35,7 @@ typedef Scalar_ Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; - typedef std::complex<RealScalar> RootType; + typedef internal::make_complex_t<Scalar> RootType; typedef Matrix<RootType, Deg_, 1> RootsType; typedef DenseIndex Index; @@ -308,7 +308,7 @@ typedef std::conditional_t<NumTraits<Scalar>::IsComplex, ComplexEigenSolver<CompanionMatrixType>, EigenSolver<CompanionMatrixType> > EigenSolverType; - typedef std::conditional_t<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> > ComplexScalar; + typedef internal::make_complex_t<Scalar_> ComplexScalar; public: /** Computes the complex roots of a new polynomial. */
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp index 0672572..55d4291 100644 --- a/unsupported/test/cxx11_tensor_morphing.cpp +++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -138,7 +138,7 @@ TensorMap<Tensor<const T, 1>> m(b, 1); DSizes<DenseIndex, 1> offsets; offsets[0] = 0; - TensorRef<Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions())); + TensorRef<const Tensor<const T, 1>> slice_ref(m.slice(offsets, m.dimensions())); VERIFY_IS_EQUAL(slice_ref(0), 42); }
diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp index d5ff196..cf09749 100644 --- a/unsupported/test/cxx11_tensor_ref.cpp +++ b/unsupported/test/cxx11_tensor_ref.cpp
@@ -49,8 +49,8 @@ Tensor<int, 1> input2(6); input2.setRandom(); - TensorRef<Tensor<int, 1>> ref3(input1 + input2); - TensorRef<Tensor<int, 1>> ref4 = input1 + input2; + TensorRef<const Tensor<int, 1>> ref3(input1 + input2); + TensorRef<const Tensor<int, 1>> ref4 = input1 + input2; VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data()); VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data()); @@ -144,7 +144,7 @@ Tensor<float, 3> result(3, 5, 7); result.setRandom(); - TensorRef<Tensor<float, 3>> result_ref(result); + TensorRef<const Tensor<float, 3>> result_ref(result); Tensor<float, 3> bias(3, 5, 7); bias.setRandom(); @@ -192,7 +192,7 @@ paddings[2] = std::make_pair(3, 4); paddings[3] = std::make_pair(0, 0); DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3); - TensorRef<Tensor<const float, 4>> ref(m.pad(paddings)); + TensorRef<const Tensor<const float, 4>> ref(m.pad(paddings)); array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial; trivial[0] = std::make_pair(0, 0); trivial[1] = std::make_pair(0, 0);