Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/853a5c4b843a3f1de5de2a25429eefd62dbd153a
BEGIN_PUBLIC
Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/853a5c4b843a3f1de5de2a25429eefd62dbd153a
Removed `std::hash<Eigen::half>` for MSVC from tensorflow, since this is properly supported in Eigen now.
This will temporarily break the `tensorflow/compiler/tests:special_math*` tests - though only due to a change in handling some edge cases. This will be fixed in a follow-up.
END_PUBLIC
Note: The special_math tests are fixed in another CL that depends on this Eigen update (cl/357925270).
PiperOrigin-RevId: 361831493
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 47ccf62..508f17d 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -450,7 +450,7 @@
enum { size = DstXprType::SizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
- alignedSize = (size/packetSize)*packetSize };
+ alignedSize = (int(size)/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 767a8e2..20cc482 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -449,9 +449,23 @@
EIGEN_DEVICE_FUNC Scalar prod() const;
+ template<int NaNPropagation>
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+ template<int NaNPropagation>
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+
+ // By default, the fastest version with undefined NaN propagation semantics is
+ // used.
+ // TODO(rmlarsen): Replace with default template argument when we move to
+ // c++11 or beyond.
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const {
+ return minCoeff<PropagateFast>();
+ }
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const {
+ return maxCoeff<PropagateFast>();
+ }
+
template<typename IndexType> EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
template<typename IndexType> EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index f8c7826..bc0fe39 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -684,7 +684,7 @@
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet psqrt(const Packet& a) { EIGEN_USING_STD(sqrt); return sqrt(a); }
+Packet psqrt(const Packet& a) { return numext::sqrt(a); }
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index f2c7b8c..595a6c1 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -975,8 +975,8 @@
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void _check_template_params()
{
- EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
- && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (int(Options)&RowMajor)==RowMajor)
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (int(Options)&RowMajor)==0)
&& ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
&& ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
&& ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 079189a..b766e1a 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -549,7 +549,7 @@
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
: InnerSize == Dynamic ? HugeCost
- : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost))
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
@@ -576,7 +576,7 @@
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: (bool(RhsRowMajor) && !CanVectorizeLhs),
- Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
@@ -597,8 +597,8 @@
CanVectorizeInner = SameType
&& LhsRowMajor
&& (!RhsRowMajor)
- && (LhsFlags & RhsFlags & ActualPacketAccessBit)
- && (InnerSize % packet_traits<Scalar>::size == 0)
+ && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit)
+ && (int(InnerSize) % packet_traits<Scalar>::size == 0)
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 2eef5ab..30598f4 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -419,25 +419,33 @@
}
/** \returns the minimum of all coefficients of \c *this.
+ * In case \c *this contains NaN, NaNPropagation determines the behavior:
+ * NaNPropagation == PropagateFast : undefined
+ * NaNPropagation == PropagateNaN : result is NaN
+ * NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
* \warning the matrix must be not empty, otherwise an assertion is triggered.
- * \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
+template<int NaNPropagation>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const
{
- return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
+ return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
}
-/** \returns the maximum of all coefficients of \c *this.
+/** \returns the maximum of all coefficients of \c *this.
+ * In case \c *this contains NaN, NaNPropagation determines the behavior:
+ * NaNPropagation == PropagateFast : undefined
+ * NaNPropagation == PropagateNaN : result is NaN
+ * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
* \warning the matrix must be not empty, otherwise an assertion is triggered.
- * \warning the result is undefined if \c *this contains NaN.
*/
template<typename Derived>
+template<int NaNPropagation>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const
{
- return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
+ return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
}
/** \returns the sum of all coefficients of \c *this
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 10cd551..04fe0b3 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -10,10 +10,6 @@
#ifndef EIGEN_STABLENORM_H
#define EIGEN_STABLENORM_H
-#if EIGEN_HAS_CXX11_ATOMIC
-#include <atomic>
-#endif
-
namespace Eigen {
namespace internal {
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index 9d9bbeb..03d474a 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -12,12 +12,21 @@
#include "MatrixProductCommon.h"
-#if __GNUC__ > 10 || \
- (__GNUC__ == 10 && (__GNUC_MINOR__ > 2 || \
- (__GNUC_MINOR__ == 2 && \
- __GNUC_PATCHLEVEL__ >= 1)))
+#if EIGEN_COMP_LLVM
+#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
+#ifdef __MMA__
+#define EIGEN_ALTIVEC_MMA_ONLY
+#else
+#define EIGEN_ALTIVEC_DISABLE_MMA
+#endif
+#endif
+#endif
+
+#ifdef __has_builtin
+#if __has_builtin(__builtin_mma_assemble_acc)
#define ALTIVEC_MMA_SUPPORT
#endif
+#endif
#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
#include "MatrixProductMMA.h"
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
index 87b60c2..a1799c0 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
@@ -2,6 +2,52 @@
namespace internal {
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
+EIGEN_STRONG_INLINE void gemm_extra_col(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index row,
+ Index col,
+ Index remaining_rows,
+ Index remaining_cols,
+ const Packet& pAlpha);
+
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
+EIGEN_STRONG_INLINE void gemm_extra_row(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index row,
+ Index col,
+ Index cols,
+ Index remaining_rows,
+ const Packet& pAlpha,
+ const Packet& pMask);
+
+template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
+EIGEN_STRONG_INLINE void gemm_unrolled_col(
+ const DataMapper& res,
+ const Scalar *lhs_base,
+ const Scalar *rhs_base,
+ Index depth,
+ Index strideA,
+ Index offsetA,
+ Index& row,
+ Index rows,
+ Index col,
+ Index remaining_cols,
+ const Packet& pAlpha);
+
+template<typename Packet>
+EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows);
+
const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3,
16, 17, 18, 19,
4, 5, 6, 7,
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index a67dbcc..64f1172 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -12,6 +12,12 @@
#pragma GCC target("cpu=power10")
+#ifdef __has_builtin
+#if !__has_builtin(__builtin_vsx_assemble_pair)
+#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
+#endif
+#endif
+
namespace Eigen {
namespace internal {
@@ -193,13 +199,31 @@
template<>
EIGEN_STRONG_INLINE void pgerMMA<Packet2d, __vector_pair, false>(__vector_quad *acc, const __vector_pair& a, const Packet2d& b)
{
- __builtin_mma_xvf64gerpp(acc, a, (__vector unsigned char)b);
+ __builtin_mma_xvf64gerpp(acc, (__vector_pair)a, (__vector unsigned char)b);
}
template<>
EIGEN_STRONG_INLINE void pgerMMA<Packet2d, __vector_pair, true>(__vector_quad *acc, const __vector_pair& a, const Packet2d& b)
{
- __builtin_mma_xvf64gernp(acc, a, (__vector unsigned char)b);
+ __builtin_mma_xvf64gernp(acc, (__vector_pair)a, (__vector unsigned char)b);
+}
+
+template<>
+EIGEN_STRONG_INLINE void pgerMMA<Packet4f, __vector_pair, false>(__vector_quad *acc, const __vector_pair& a, const Packet4f& b)
+{
+ // Just for compilation
+ EIGEN_UNUSED_VARIABLE(acc)
+ EIGEN_UNUSED_VARIABLE(a)
+ EIGEN_UNUSED_VARIABLE(b)
+}
+
+template<>
+EIGEN_STRONG_INLINE void pgerMMA<Packet4f, __vector_pair, true>(__vector_quad *acc, const __vector_pair& a, const Packet4f& b)
+{
+ // Just for compilation
+ EIGEN_UNUSED_VARIABLE(acc)
+ EIGEN_UNUSED_VARIABLE(a)
+ EIGEN_UNUSED_VARIABLE(b)
}
// This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
@@ -219,55 +243,9 @@
template<>
EIGEN_STRONG_INLINE void ploadRhsMMA<double, __vector_pair>(const double *rhs, __vector_pair &rhsV)
{
- __builtin_mma_assemble_pair(&rhsV, (__vector unsigned char)(*(((Packet2d *)rhs) + 1)), (__vector unsigned char)(*((Packet2d *)rhs)));
+ __builtin_vsx_assemble_pair(&rhsV, (__vector unsigned char)(*(((Packet2d *)rhs) + 1)), (__vector unsigned char)(*((Packet2d *)rhs)));
}
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_col(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index row,
- Index col,
- Index remaining_rows,
- Index remaining_cols,
- const Packet& pAlpha);
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
-EIGEN_STRONG_INLINE void gemm_extra_row(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index row,
- Index col,
- Index cols,
- Index remaining_rows,
- const Packet& pAlpha,
- const Packet& pMask);
-
-template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
-EIGEN_STRONG_INLINE void gemm_unrolled_col(
- const DataMapper& res,
- const Scalar *lhs_base,
- const Scalar *rhs_base,
- Index depth,
- Index strideA,
- Index offsetA,
- Index& row,
- Index rows,
- Index col,
- Index remaining_cols,
- const Packet& pAlpha);
-
-template<typename Packet>
-EIGEN_STRONG_INLINE Packet bmask(const int remaining_rows);
-
#define MICRO_MMA_DST \
__vector_quad *accZero0, __vector_quad *accZero1, __vector_quad *accZero2, \
__vector_quad *accZero3, __vector_quad *accZero4, __vector_quad *accZero5, \
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index 72a489b..3c0cd39 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -67,7 +67,7 @@
: bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
template<class T>
- explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val)
+ explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(T val)
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
explicit EIGEN_DEVICE_FUNC bfloat16(float f)
@@ -655,20 +655,6 @@
} // namespace Eigen
-namespace std {
-
-#if __cplusplus > 199711L
-template <>
-struct hash<Eigen::bfloat16> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
- return hash<float>()(static_cast<float>(a));
- }
-};
-#endif
-
-} // namespace std
-
-
namespace Eigen {
namespace numext {
@@ -703,4 +689,16 @@
} // namespace numext
} // namespace Eigen
+#if EIGEN_HAS_STD_HASH
+namespace std {
+template <>
+struct hash<Eigen::bfloat16> {
+ EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
+ return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
+ }
+};
+} // namespace std
+#endif
+
+
#endif // EIGEN_BFLOAT16_H
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index e4c7e62..411640e 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -484,7 +484,7 @@
y1 = pmadd(y1, r, cst_cephes_exp_p5);
y = pmadd(y, r3, y1);
y = pmadd(y, r2, y2);
-
+
// Return 2^m * exp(r).
// TODO: replace pldexp with faster implementation since y in [-1, 1).
return pmax(pldexp(y,m), _x);
@@ -630,14 +630,6 @@
#endif
Packet psincos_float(const Packet& _x)
{
-// Workaround -ffast-math aggressive optimizations
-// See bug 1674
-#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE)
-#define EIGEN_SINCOS_DONT_OPT(X) __asm__ ("" : "+x" (X));
-#else
-#define EIGEN_SINCOS_DONT_OPT(X)
-#endif
-
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI
@@ -652,7 +644,7 @@
// Rounding trick:
Packet y_round = padd(y, cst_rounding_magic);
- EIGEN_SINCOS_DONT_OPT(y_round)
+ EIGEN_OPTIMIZATION_BARRIER(y_round)
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
@@ -674,9 +666,9 @@
// and 2 ULP up to:
const float huge_th = ComputeSine ? 25966.f : 18838.f;
x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000
x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee
@@ -753,8 +745,6 @@
// Update the sign and filter huge inputs
return pxor(y, sign_bit);
-
-#undef EIGEN_SINCOS_DONT_OPT
}
template<typename Packet>
@@ -1003,6 +993,20 @@
fast_twosum(r_hi, s, s_hi, s_lo);
}
+// This is a version of twosum for adding a floating point number x to
+// double word number {y_hi, y_lo} number, with the assumption
+// that |x| >= |y_hi|.
+template<typename Packet>
+EIGEN_STRONG_INLINE
+void fast_twosum(const Packet& x,
+ const Packet& y_hi, const Packet& y_lo,
+ Packet& s_hi, Packet& s_lo) {
+ Packet r_hi, r_lo;
+ fast_twosum(x, y_hi, r_hi, r_lo);
+ const Packet s = padd(y_lo, r_lo);
+ fast_twosum(r_hi, s, s_hi, s_lo);
+}
+
// This function implements the multiplication of a double word
// number represented by {x_hi, x_lo} by a floating point number y.
// It returns the result as a pair {p_hi, p_lo} such that
@@ -1024,6 +1028,50 @@
fast_twosum(t_hi, t_lo2, p_hi, p_lo);
}
+// This function implements the multiplication of two double word
+// numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.
+// It returns the result as a pair {p_hi, p_lo} such that
+// (x_hi + x_lo) * (y_hi + y_lo) = p_hi + p_lo holds with a relative error
+// of less than 2*2^{-2p}, where p is the number of significand bit
+// in the floating point type.
+template<typename Packet>
+EIGEN_STRONG_INLINE
+void twoprod(const Packet& x_hi, const Packet& x_lo,
+ const Packet& y_hi, const Packet& y_lo,
+ Packet& p_hi, Packet& p_lo) {
+ Packet p_hi_hi, p_hi_lo;
+ twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
+ Packet p_lo_hi, p_lo_lo;
+ twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
+ fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
+}
+
+// This function computes the reciprocal of a floating point number
+// with extra precision and returns the result as a double word.
+template <typename Packet>
+void doubleword_reciprocal(const Packet& x, Packet& recip_hi, Packet& recip_lo) {
+ typedef typename unpacket_traits<Packet>::type Scalar;
+ // 1. Approximate the reciprocal as the reciprocal of the high order element.
+ Packet approx_recip = prsqrt(x);
+ approx_recip = pmul(approx_recip, approx_recip);
+
+ // 2. Run one step of Newton-Raphson iteration in double word arithmetic
+ // to get the bottom half. The NR iteration for reciprocal of 'a' is
+ // x_{i+1} = x_i * (2 - a * x_i)
+
+ // -a*x_i
+ Packet t1_hi, t1_lo;
+ twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);
+ // 2 - a*x_i
+ Packet t2_hi, t2_lo;
+ fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
+ Packet t3_hi, t3_lo;
+ fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);
+ // x_i * (2 - a * x_i)
+ twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
+}
+
+
// This function computes log2(x) and returns the result as a double word.
template <typename Scalar>
struct accurate_log2 {
@@ -1115,6 +1163,101 @@
}
};
+// This specialization uses a more accurate algorithm to compute log2(x) for
+// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~1.27e-18.
+// This additional accuracy is needed to counter the error-magnification
+// inherent in multiplying by a potentially large exponent in pow(x,y).
+// The minimax polynomial used was calculated using the Sollya tool.
+// See sollya.org.
+
+template <>
+struct accurate_log2<double> {
+ template <typename Packet>
+ EIGEN_STRONG_INLINE
+ void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
+ // We use a transformation of variables:
+ // r = c * (x-1) / (x+1),
+ // such that
+ // log2(x) = log2((1 + r/c) / (1 - r/c)) = f(r).
+ // The function f(r) can be approximated well using an odd polynomial
+ // of the form
+ // P(r) = ((Q(r^2) * r^2 + C) * r^2 + 1) * r,
+ // For the implementation of log2<double> here, Q is of degree 6 with
+ // coefficient represented in working precision (double), while C is a
+ // constant represented in extra precision as a double word to achieve
+ // full accuracy.
+ //
+ // The polynomial coefficients were computed by the Sollya script:
+ //
+ // c = 2 / log(2);
+ // trans = c * (x-1)/(x+1);
+ // itrans = (1+x/c)/(1-x/c);
+ // interval=[trans(sqrt(0.5)); trans(sqrt(2))];
+ // print(interval);
+ // f = log2(itrans(x));
+ // p=fpminimax(f,[|1,3,5,7,9,11,13,15,17|],[|1,DD,double...|],interval,relative,floating);
+ const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
+ const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
+ const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
+ const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
+ const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
+ const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
+ const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
+ const Packet C_hi = pset1<Packet>(0.0400377511598501157);
+ const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
+ const Packet one = pset1<Packet>(1.0);
+
+ const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
+ const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
+ // c * (x - 1)
+ Packet num_hi, num_lo;
+ twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
+ // TODO(rmlarsen): Investigate if using the division algorithm by
+ // Muller et al. is faster/more accurate.
+ // 1 / (x + 1)
+ Packet denom_hi, denom_lo;
+ doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
+ // r = c * (x-1) / (x+1),
+ Packet r_hi, r_lo;
+ twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
+ // r2 = r * r
+ Packet r2_hi, r2_lo;
+ twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
+ // r4 = r2 * r2
+ Packet r4_hi, r4_lo;
+ twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
+
+ // Evaluate Q(r^2) in working precision. We evaluate it in two parts
+ // (even and odd in r^2) to improve instruction level parallelism.
+ Packet q_even = pmadd(q12, r4_hi, q8);
+ Packet q_odd = pmadd(q10, r4_hi, q6);
+ q_even = pmadd(q_even, r4_hi, q4);
+ q_odd = pmadd(q_odd, r4_hi, q2);
+ q_even = pmadd(q_even, r4_hi, q0);
+ Packet q = pmadd(q_odd, r2_hi, q_even);
+
+ // Now evaluate the low order terms of P(x) in double word precision.
+ // In the following, due to the increasing magnitude of the coefficients
+ // and r being constrained to [-0.5, 0.5] we can use fast_twosum instead
+ // of the slower twosum.
+ // Q(r^2) * r^2
+ Packet p_hi, p_lo;
+ twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
+ // Q(r^2) * r^2 + C
+ Packet p1_hi, p1_lo;
+ fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
+ // (Q(r^2) * r^2 + C) * r^2
+ Packet p2_hi, p2_lo;
+ twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
+ // ((Q(r^2) * r^2 + C) * r^2 + 1)
+ Packet p3_hi, p3_lo;
+ fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);
+
+ // log(z) ~= ((Q(r^2) * r^2 + C) * r^2 + 1) * r
+ twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
+ }
+};
+
// This function computes exp2(x) (i.e. 2**x).
template <typename Scalar>
struct fast_accurate_exp2 {
@@ -1161,8 +1304,75 @@
// to gain some instruction level parallelism.
Packet x2 = pmul(x,x);
Packet p_even = pmadd(p4, x2, p2);
- p_even = pmadd(p_even, x2, p0);
Packet p_odd = pmadd(p3, x2, p1);
+ p_even = pmadd(p_even, x2, p0);
+ Packet p = pmadd(p_odd, x, p_even);
+
+ // Evaluate the remaining terms of Q(x) with extra precision using
+ // double word arithmetic.
+ Packet p_hi, p_lo;
+ // x * p(x)
+ twoprod(p, x, p_hi, p_lo);
+ // C + x * p(x)
+ Packet q1_hi, q1_lo;
+ twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
+ // x * (C + x * p(x))
+ Packet q2_hi, q2_lo;
+ twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
+ // 1 + x * (C + x * p(x))
+ Packet q3_hi, q3_lo;
+ // Since |q2_hi| <= sqrt(2)-1 < 1, we can use fast_twosum
+ // for adding it to unity here.
+ fast_twosum(one, q2_hi, q3_hi, q3_lo);
+ return padd(q3_hi, padd(q2_lo, q3_lo));
+ }
+};
+
+// in [-0.5;0.5] with a relative accuracy of 1 ulp.
+// The minimax polynomial used was calculated using the Sollya tool.
+// See sollya.org.
+template <>
+struct fast_accurate_exp2<double> {
+ template <typename Packet>
+ EIGEN_STRONG_INLINE
+ Packet operator()(const Packet& x) {
+ // This function approximates exp2(x) by a degree 10 polynomial of the form
+ // Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in
+ // single precision, and the remaining steps are evaluated with extra precision using
+ // double word arithmetic. C is an extra precise constant stored as a double word.
+ //
+ // The polynomial coefficients were calculated using Sollya commands:
+ // > n = 11;
+ // > f = 2^x;
+ // > interval = [-0.5;0.5];
+ // > p = fpminimax(f,n,[|1,DD,double...|],interval,relative,floating);
+
+ const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
+ const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
+ const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
+ const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
+ const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
+ const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
+ const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
+ const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
+ const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
+ const Packet p0 = pset1<Packet>(0.240226506959101332);
+ const Packet C_hi = pset1<Packet>(0.693147180559945286);
+ const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
+ const Packet one = pset1<Packet>(1.0);
+
+ // Evaluate P(x) in working precision.
+ // We evaluate even and odd parts of the polynomial separately
+ // to gain some instruction level parallelism.
+ Packet x2 = pmul(x,x);
+ Packet p_even = pmadd(p8, x2, p6);
+ Packet p_odd = pmadd(p9, x2, p7);
+ p_even = pmadd(p_even, x2, p4);
+ p_odd = pmadd(p_odd, x2, p5);
+ p_even = pmadd(p_even, x2, p2);
+ p_odd = pmadd(p_odd, x2, p3);
+ p_even = pmadd(p_even, x2, p0);
+ p_odd = pmadd(p_odd, x2, p1);
Packet p = pmadd(p_odd, x, p_even);
// Evaluate the remaining terms of Q(x) with extra precision using
@@ -1234,7 +1444,6 @@
// Since r_z is in [-0.5;0.5], we compute the first factor to high accuracy
// using a specialized algorithm. Multiplication by the second factor can
// be done exactly using pldexp(), since it is an integer power of 2.
- // Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
return pldexp(e_r, n_z);
}
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index da8499b..637e5f4 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -35,7 +35,7 @@
// a floating-point Packet type. Used by pfrexp_generic. Override this if
// there is no unpacket_traits<Packet>::integer_packet.
template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
-Packet pfrexp_generic_get_biased_exponent(const Packet& a);
+Packet pfrexp_generic_get_biased_exponent(const Packet& p);
/** Default implementation of pldexp.
* It is expected to be called by implementers of template<> pldexp.
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index b273abe..eb3030a 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -63,10 +63,38 @@
namespace half_impl {
-#if !defined(EIGEN_HAS_GPU_FP16)
+// We want to use the __half_raw struct from the HIP header file only during the device compile phase.
+// This is required because of a quirk in the way TensorFlow GPU builds are done.
+// When compiling TensorFlow source code with GPU support, files that
+// * contain GPU kernels (i.e. *.cu.cc files) are compiled via hipcc
+// * do not contain GPU kernels ( i.e. *.cc files) are compiled via gcc (typically)
+//
+// Tensorflow uses the Eigen::half type as its FP16 type, and there are functions that
+// * are defined in a file that gets compiled via hipcc AND
+// * have Eigen::half as a pass-by-value argument AND
+// * are called in a file that gets compiled via gcc
+//
+// In the scenario described above the caller and callee will see different versions
+// of the Eigen::half base class __half_raw, and they will be compiled by different compilers
+//
+// There appears to be an ABI mismatch between gcc and clang (which is called by hipcc) that results in
+// the callee getting corrupted values for the Eigen::half argument.
+//
+// Making the host side compile phase of hipcc use the same Eigen::half impl, as the gcc compile, resolves
+// this error, and hence the following convoluted #if condition
+#if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)
// Make our own __half_raw definition that is similar to CUDA's.
struct __half_raw {
+#if (defined(EIGEN_HAS_GPU_FP16) && !defined(EIGEN_GPU_COMPILE_PHASE))
+ // Eigen::half can be used as the datatype for shared memory declarations (in Eigen and TF)
+ // The element type for shared memory cannot have non-trivial constructors
+ // and hence the following special casing (which skips the zero-initilization).
+ // Note that this check gets done even in the host compilation phase, and
+ // hence the need for this
+ EIGEN_DEVICE_FUNC __half_raw() {}
+#else
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw() : x(0) {}
+#endif
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(numext::uint16_t raw) : x(numext::bit_cast<__fp16>(raw)) {
}
@@ -115,7 +143,10 @@
// Writing this out as separate #if-else blocks to make the code easier to follow
// The same applies to most #if-else blocks in this file
-#if !defined(EIGEN_HAS_GPU_FP16)
+#if !defined(EIGEN_HAS_GPU_FP16) || !defined(EIGEN_GPU_COMPILE_PHASE)
+ // Use the same base class for the following two scenarios
+ // * when compiling without GPU support enabled
+ // * during host compile phase when compiling with GPU support enabled
typedef half_impl::__half_raw __half_raw;
#elif defined(EIGEN_HAS_HIP_FP16)
// Nothing to do here
@@ -147,7 +178,7 @@
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(bool b)
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
template<class T>
- explicit EIGEN_DEVICE_FUNC half(const T& val)
+ explicit EIGEN_DEVICE_FUNC half(T val)
: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
explicit EIGEN_DEVICE_FUNC half(float f)
: half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
@@ -161,6 +192,14 @@
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
return half_impl::half_to_float(*this);
}
+
+#if defined(EIGEN_HAS_GPU_FP16) && !defined(EIGEN_GPU_COMPILE_PHASE)
+ EIGEN_DEVICE_FUNC operator __half() const {
+ ::__half_raw hr;
+ hr.x = x;
+ return __half(hr);
+ }
+#endif
};
} // end namespace Eigen
@@ -757,19 +796,6 @@
#pragma pop_macro("EIGEN_CONSTEXPR")
#endif
-namespace std {
-
-#if __cplusplus > 199711L
-template <>
-struct hash<Eigen::half> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
- return static_cast<std::size_t>(a.x);
- }
-};
-#endif
-
-} // end namespace std
-
namespace Eigen {
namespace numext {
@@ -822,19 +848,23 @@
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_sync(mask, static_cast<__half>(var), srcLane, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_up_sync(mask, static_cast<__half>(var), delta, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_up_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_down_sync(mask, static_cast<__half>(var), delta, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_down_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_xor_sync(mask, static_cast<__half>(var), laneMask, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_xor_sync(mask, h, laneMask, width));
}
#else // HIP or CUDA SDK < 9.0
@@ -870,4 +900,15 @@
}
#endif // __ldg
+#if EIGEN_HAS_STD_HASH
+namespace std {
+template <>
+struct hash<Eigen::half> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
+ return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
+ }
+};
+} // end namespace std
+#endif
+
#endif // EIGEN_HALF_H
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index c16f95e..689110d 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -15,12 +15,16 @@
namespace internal {
// Read-only data cached load available.
-#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350)
#define EIGEN_GPU_HAS_LDG 1
#endif
// FP16 math available.
-#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
+#if (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530)
+#define EIGEN_CUDA_HAS_FP16_ARITHMETIC 1
+#endif
+
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
#define EIGEN_GPU_HAS_FP16_ARITHMETIC 1
#endif
@@ -603,7 +607,8 @@
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
const Eigen::half* from) {
#if defined(EIGEN_GPU_HAS_LDG)
- return __ldg((const half2*)from);
+ // Input is guaranteed to be properly aligned.
+ return __ldg(reinterpret_cast<const half2*>(from));
#else
return combine_half(*(from+0), *(from+1));
#endif
@@ -922,7 +927,7 @@
return __floats2half2_rn(r1, r2);
}
-#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \
defined(EIGEN_HIP_DEVICE_COMPILE)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1033,7 +1038,7 @@
ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
#if defined(EIGEN_GPU_HAS_LDG)
Packet4h2 r;
- r = __ldg((const Packet4h2*)from);
+ r = __ldg(reinterpret_cast<const Packet4h2*>(from));
return r;
#else
Packet4h2 r;
@@ -1226,7 +1231,7 @@
p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),
__hadd(a, __float2half(7.0f)));
return r;
-#elif EIGEN_CUDA_ARCH >= 530
+#elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
@@ -1478,7 +1483,7 @@
predux_max(a_alias[3]));
__half first = predux_max(m0);
__half second = predux_max(m1);
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
return (__hgt(first, second) ? first : second);
#else
float ffirst = __half2float(first);
@@ -1497,7 +1502,7 @@
predux_min(a_alias[3]));
__half first = predux_min(m0);
__half second = predux_min(m1);
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
return (__hlt(first, second) ? first : second);
#else
float ffirst = __half2float(first);
@@ -1669,6 +1674,7 @@
#endif // defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
#undef EIGEN_GPU_HAS_LDG
+#undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
#undef EIGEN_GPU_HAS_FP16_ARITHMETIC
} // end namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 2e06bef..7d69de6 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -194,13 +194,16 @@
HasBlend = 0,
HasDiv = 1,
- HasFloor = 0,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasRint = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH,
HasBessel = 0, // Issues with accuracy.
@@ -1462,32 +1465,6 @@
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan<Packet4f>(const Packet4f& a, const Packet4f& b)
{ return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); }
-// WARNING: this pfloor implementation makes sense for inputs that fit in
-// signed int32 integers (up to ~2.14e9), hence this is currently only used
-// by pexp and not exposed through HasFloor.
-template<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)
-{
- const Packet2f cst_1 = pset1<Packet2f>(1.0f);
- /* perform a floorf */
- Packet2f tmp = vcvt_f32_s32(vcvt_s32_f32(a));
-
- /* if greater, substract 1 */
- Packet2ui mask = vcgt_f32(tmp, a);
- mask = vand_u32(mask, vreinterpret_u32_f32(cst_1));
- return vsub_f32(tmp, vreinterpret_f32_u32(mask));
-}
-template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
-{
- const Packet4f cst_1 = pset1<Packet4f>(1.0f);
- /* perform a floorf */
- Packet4f tmp = vcvtq_f32_s32(vcvtq_s32_f32(a));
-
- /* if greater, substract 1 */
- Packet4ui mask = vcgtq_f32(tmp, a);
- mask = vandq_u32(mask, vreinterpretq_u32_f32(cst_1));
- return vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
-}
-
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet2f pand<Packet2f>(const Packet2f& a, const Packet2f& b)
{ return vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a),vreinterpret_u32_f32(b))); }
@@ -3206,6 +3183,98 @@
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b)
{ return vbslq_u64(mask, a, b); }
+// Use armv8 rounding intinsics if available.
+#if EIGEN_ARCH_ARMV8
+template<> EIGEN_STRONG_INLINE Packet2f print<Packet2f>(const Packet2f& a)
+{ return vrndn_f32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
+{ return vrndnq_f32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)
+{ return vrndm_f32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
+{ return vrndmq_f32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2f pceil<Packet2f>(const Packet2f& a)
+{ return vrndp_f32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
+{ return vrndpq_f32(a); }
+
+#else
+
+template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
+ // Adds and subtracts signum(a) * 2^23 to force rounding.
+ const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
+ const Packet4f abs_a = pabs(a);
+ Packet4f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {
+ // Adds and subtracts signum(a) * 2^23 to force rounding.
+ const Packet2f limit = pset1<Packet2f>(static_cast<float>(1<<23));
+ const Packet2f abs_a = pabs(a);
+ Packet2f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
+{
+ const Packet4f cst_1 = pset1<Packet4f>(1.0f);
+ Packet4f tmp = print<Packet4f>(a);
+ // If greater, subtract one.
+ Packet4f mask = pcmp_lt(a, tmp);
+ mask = pand(mask, cst_1);
+ return psub(tmp, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)
+{
+ const Packet2f cst_1 = pset1<Packet2f>(1.0f);
+ Packet2f tmp = print<Packet2f>(a);
+ // If greater, subtract one.
+ Packet2f mask = pcmp_lt(a, tmp);
+ mask = pand(mask, cst_1);
+ return psub(tmp, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
+{
+ const Packet4f cst_1 = pset1<Packet4f>(1.0f);
+ Packet4f tmp = print<Packet4f>(a);
+ // If smaller, add one.
+ Packet4f mask = pcmp_lt(tmp, a);
+ mask = pand(mask, cst_1);
+ return padd(tmp, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2f pceil<Packet2f>(const Packet2f& a)
+{
+ const Packet2f cst_1 = pset1<Packet2f>(1.0);
+ Packet2f tmp = print<Packet2f>(a);
+ // If smaller, add one.
+ Packet2f mask = pcmp_lt(tmp, a);
+ mask = pand(mask, cst_1);
+ return padd(tmp, mask);
+}
+
+#endif
+
/**
* Computes the integer square root
* @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result
@@ -3297,41 +3366,41 @@
return res;
}
-#if EIGEN_FAST_MATH
-
-/* Functions for sqrt support packet2f/packet4f.*/
-// The EIGEN_FAST_MATH version uses the vrsqrte_f32 approximation and one step
-// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
-// exact solution. It does not handle +inf, or denormalized numbers correctly.
-// The main advantage of this approach is not just speed, but also the fact that
-// it can be inlined and pipelined with other computations, further reducing its
-// effective latency. This is similar to Quake3's fast inverse square root.
-// For more details see: http://www.beyond3d.com/content/articles/8/
-template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& _x){
- Packet4ui denormal_mask = vandq_u32(vcgeq_f32(_x, vdupq_n_f32(0.0f)),
- vcltq_f32(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+template<> EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
// Compute approximate reciprocal sqrt.
- Packet4f x = vrsqrteq_f32(_x);
- // Do one Newton's iteration for 1/sqrt(x).
- x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(_x, x), x), x);
- // Flush results for denormals to zero.
- return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(pmul(_x, x)), denormal_mask));
+ Packet4f x = vrsqrteq_f32(a);
+ // Do Newton iterations for 1/sqrt(x).
+ x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);
+ x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, x), x), x);
+ const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());
+ return pselect(pcmp_eq(a, pzero(a)), infinity, x);
}
-template<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& _x) {
- Packet2ui denormal_mask = vand_u32(vcge_f32(_x, vdup_n_f32(0.0f)),
- vclt_f32(_x, pset1<Packet2f>((std::numeric_limits<float>::min)())));
+template<> EIGEN_STRONG_INLINE Packet2f prsqrt(const Packet2f& a) {
// Compute approximate reciprocal sqrt.
- Packet2f x = vrsqrte_f32(_x);
- // Do one Newton's iteration for 1/sqrt(x).
- x = vmul_f32(vrsqrts_f32(vmul_f32(_x, x), x), x);
- // Flush results for denormals to zero.
- return vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(pmul(_x, x)), denormal_mask));
+ Packet2f x = vrsqrte_f32(a);
+ // Do Newton iterations for 1/sqrt(x).
+ x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);
+ x = vmul_f32(vrsqrts_f32(vmul_f32(a, x), x), x);
+ const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());
+ return pselect(pcmp_eq(a, pzero(a)), infinity, x);
}
-#else
+// Unfortunately vsqrt_f32 is only available for A64.
+#if EIGEN_ARCH_ARM64
template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& _x){return vsqrtq_f32(_x);}
template<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& _x){return vsqrt_f32(_x); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
+ const Packet4f infinity = pset1<Packet4f>(NumTraits<float>::infinity());
+ const Packet4f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));
+ return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));
+}
+template<> EIGEN_STRONG_INLINE Packet2f psqrt(const Packet2f& a) {
+ const Packet2f infinity = pset1<Packet2f>(NumTraits<float>::infinity());
+ const Packet2f is_zero_or_inf = por(pcmp_eq(a, pzero(a)), pcmp_eq(a, infinity));
+ return pselect(is_zero_or_inf, a, pmul(a, prsqrt(a)));
+}
#endif
//---------- bfloat16 ----------
@@ -3370,6 +3439,8 @@
HasBlend = 0,
HasDiv = 1,
HasFloor = 1,
+ HasCeil = 1,
+ HasRint = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
@@ -3531,11 +3602,21 @@
return pselect<Packet4us>(mask, a, b);
}
+template<> EIGEN_STRONG_INLINE Packet4bf print<Packet4bf>(const Packet4bf& a)
+{
+ return F32ToBf16(print<Packet4f>(Bf16ToF32(a)));
+}
+
template<> EIGEN_STRONG_INLINE Packet4bf pfloor<Packet4bf>(const Packet4bf& a)
{
return F32ToBf16(pfloor<Packet4f>(Bf16ToF32(a)));
}
+template<> EIGEN_STRONG_INLINE Packet4bf pceil<Packet4bf>(const Packet4bf& a)
+{
+ return F32ToBf16(pceil<Packet4f>(Bf16ToF32(a)));
+}
+
template<> EIGEN_STRONG_INLINE Packet4bf pconj(const Packet4bf& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4bf padd<Packet4bf>(const Packet4bf& a, const Packet4bf& b) {
@@ -3710,13 +3791,16 @@
HasBlend = 0,
HasDiv = 1,
- HasFloor = 0,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasRint = 1,
HasSin = 0,
HasCos = 0,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
+ HasRsqrt = 1,
HasTanh = 0,
HasErf = 0
};
@@ -3788,21 +3872,6 @@
template<> EIGEN_STRONG_INLINE Packet2d pmax<PropagateNaN, Packet2d>(const Packet2d& a, const Packet2d& b) { return pmax<Packet2d>(a, b); }
-// WARNING: this pfloor implementation makes sense for inputs that fit in
-// signed int64 integers (up to ~9.22e18), hence this is currently only used
-// by pexp and not exposed through HasFloor.
-template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)
-{
- const Packet2d cst_1 = pset1<Packet2d>(1.0);
- /* perform a floorf */
- const Packet2d tmp = vcvtq_f64_s64(vcvtq_s64_f64(a));
-
- /* if greater, substract 1 */
- uint64x2_t mask = vcgtq_f64(tmp, a);
- mask = vandq_u64(mask, vreinterpretq_u64_f64(cst_1));
- return vsubq_f64(tmp, vreinterpretq_f64_u64(mask));
-}
-
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
{ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }
@@ -3906,6 +3975,15 @@
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b)
{ return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d print<Packet2d>(const Packet2d& a)
+{ return vrndnq_f64(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)
+{ return vrndmq_f64(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a)
+{ return vrndpq_f64(a); }
+
template<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent)
{ return pldexp_generic(a, exponent); }
@@ -3915,6 +3993,17 @@
template<> EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from)
{ return vreinterpretq_f64_u64(vdupq_n_u64(from)); }
+template<> EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
+ // Compute approximate reciprocal sqrt.
+ Packet2d x = vrsqrteq_f64(a);
+ // Do Newton iterations for 1/sqrt(x).
+ x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
+ x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
+ x = vmulq_f64(vrsqrtsq_f64(vmulq_f64(a, x), x), x);
+ const Packet2d infinity = pset1<Packet2d>(NumTraits<double>::infinity());
+ return pselect(pcmp_eq(a, pzero(a)), infinity, x);
+}
+
template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq_f64(_x); }
#endif // EIGEN_ARCH_ARM64
@@ -3954,11 +4043,14 @@
HasReduxp = 1,
HasDiv = 1,
HasFloor = 1,
+ HasCeil = 1,
+ HasRint = 1,
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 0,
HasSqrt = 1,
+ HasRsqrt = 1,
HasErf = EIGEN_FAST_MATH,
HasBessel = 0, // Issues with accuracy.
HasNdtri = 0,
@@ -4164,28 +4256,28 @@
}
template <>
-EIGEN_STRONG_INLINE Packet8hf pfloor<Packet8hf>(const Packet8hf& a) {
- const Packet8hf cst_1 = pset1<Packet8hf>(Eigen::half(1.0f));
- /* perform a floorf */
- Packet8hf tmp = vcvtq_f16_s16(vcvtq_s16_f16(a));
-
- /* if greater, substract 1 */
- uint16x8_t mask = vcgtq_f16(tmp, a);
- mask = vandq_u16(mask, vreinterpretq_u16_f16(cst_1));
- return vsubq_f16(tmp, vreinterpretq_f16_u16(mask));
-}
+EIGEN_STRONG_INLINE Packet8hf print<Packet8hf>(const Packet8hf& a)
+{ return vrndnq_f16(a); }
template <>
-EIGEN_STRONG_INLINE Packet4hf pfloor<Packet4hf>(const Packet4hf& a) {
- const Packet4hf cst_1 = pset1<Packet4hf>(Eigen::half(1.0f));
- /* perform a floorf */
- Packet4hf tmp = vcvt_f16_s16(vcvt_s16_f16(a));
+EIGEN_STRONG_INLINE Packet4hf print<Packet4hf>(const Packet4hf& a)
+{ return vrndn_f16(a); }
- /* if greater, substract 1 */
- uint16x4_t mask = vcgt_f16(tmp, a);
- mask = vand_u16(mask, vreinterpret_u16_f16(cst_1));
- return vsub_f16(tmp, vreinterpret_f16_u16(mask));
-}
+template <>
+EIGEN_STRONG_INLINE Packet8hf pfloor<Packet8hf>(const Packet8hf& a)
+{ return vrndmq_f16(a); }
+
+template <>
+EIGEN_STRONG_INLINE Packet4hf pfloor<Packet4hf>(const Packet4hf& a)
+{ return vrndm_f16(a); }
+
+template <>
+EIGEN_STRONG_INLINE Packet8hf pceil<Packet8hf>(const Packet8hf& a)
+{ return vrndpq_f16(a); }
+
+template <>
+EIGEN_STRONG_INLINE Packet4hf pceil<Packet4hf>(const Packet4hf& a)
+{ return vrndp_f16(a); }
template <>
EIGEN_STRONG_INLINE Packet8hf psqrt<Packet8hf>(const Packet8hf& a) {
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 401a497..d7b8bc8 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -148,12 +148,11 @@
HasErf = EIGEN_FAST_MATH,
HasBlend = 1,
HasCeil = 1,
- HasFloor = 1
+ HasFloor = 1,
+ HasRint = 1,
#ifdef EIGEN_VECTORIZE_SSE4_1
- ,
- HasRint = 1,
- HasRound = 1
+ HasRound = 1,
#endif
};
};
@@ -175,12 +174,10 @@
HasRsqrt = 1,
HasBlend = 1,
HasFloor = 1,
- HasCeil = 1
-
-#ifdef EIGEN_VECTORIZE_SSE4_1
- ,
+ HasCeil = 1,
+ HasRint = 1,
+ #ifdef EIGEN_VECTORIZE_SSE4_1
HasRound = 1,
- HasRint = 1
#endif
};
};
@@ -598,9 +595,29 @@
return pminmax_propagate_nan(a, b, pmax<Packet2d>);
}
-template<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(Packet4i a) { return _mm_srai_epi32(a,N); }
-template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right(Packet4i a) { return _mm_srli_epi32(a,N); }
-template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left(Packet4i a) { return _mm_slli_epi32(a,N); }
+template<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { return _mm_srai_epi32(a,N); }
+template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right (const Packet4i& a) { return _mm_srli_epi32(a,N); }
+template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_left (const Packet4i& a) { return _mm_slli_epi32(a,N); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
#ifdef EIGEN_VECTORIZE_SSE4_1
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
@@ -627,25 +644,49 @@
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
#else
+template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
+ // Adds and subtracts signum(a) * 2^23 to force rounding.
+ const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
+ const Packet4f abs_a = pabs(a);
+ Packet4f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) {
+ // Adds and subtracts signum(a) * 2^52 to force rounding.
+ const Packet2d limit = pset1<Packet2d>(static_cast<double>(1ull<<52));
+ const Packet2d abs_a = pabs(a);
+ Packet2d r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
+}
+
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
{
const Packet4f cst_1 = pset1<Packet4f>(1.0f);
- Packet4i emm0 = _mm_cvttps_epi32(a);
- Packet4f tmp = _mm_cvtepi32_ps(emm0);
- /* if greater, substract 1 */
+ Packet4f tmp = print<Packet4f>(a);
+ // If greater, subtract one.
Packet4f mask = _mm_cmpgt_ps(tmp, a);
mask = pand(mask, cst_1);
return psub(tmp, mask);
}
-// WARNING: this pfloor implementation makes sense for small inputs only,
-// It is currently only used by pexp and not exposed through HasFloor.
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)
{
const Packet2d cst_1 = pset1<Packet2d>(1.0);
- Packet4i emm0 = _mm_cvttpd_epi32(a);
- Packet2d tmp = _mm_cvtepi32_pd(emm0);
- /* if greater, substract 1 */
+ Packet2d tmp = print<Packet2d>(a);
+ // If greater, subtract one.
Packet2d mask = _mm_cmpgt_pd(tmp, a);
mask = pand(mask, cst_1);
return psub(tmp, mask);
@@ -654,22 +695,18 @@
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
{
const Packet4f cst_1 = pset1<Packet4f>(1.0f);
- Packet4i emm0 = _mm_cvttps_epi32(a);
- Packet4f tmp = _mm_cvtepi32_ps(emm0);
- /* if greater, substract 1 */
+ Packet4f tmp = print<Packet4f>(a);
+ // If smaller, add one.
Packet4f mask = _mm_cmplt_ps(tmp, a);
mask = pand(mask, cst_1);
return padd(tmp, mask);
}
-// WARNING: this pfloor implementation makes sense for small inputs only,
-// It is currently only used by pexp and not exposed through HasFloor.
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a)
{
const Packet2d cst_1 = pset1<Packet2d>(1.0);
- Packet4i emm0 = _mm_cvttpd_epi32(a);
- Packet2d tmp = _mm_cvtepi32_pd(emm0);
- /* if greater, substract 1 */
+ Packet2d tmp = print<Packet2d>(a);
+ // If smaller, add one.
Packet2d mask = _mm_cmplt_pd(tmp, a);
mask = pand(mask, cst_1);
return padd(tmp, mask);
@@ -866,26 +903,6 @@
#endif
}
-template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
-{
- const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
- return _mm_and_ps(a,mask);
-}
-template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
-{
- const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
- return _mm_and_pd(a,mask);
-}
-template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
-{
- #ifdef EIGEN_VECTORIZE_SSSE3
- return _mm_abs_epi32(a);
- #else
- Packet4i aux = _mm_srai_epi32(a,31);
- return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
- #endif
-}
-
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
return pfrexp_generic(a,exponent);
}
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 2716081..a182b4b 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -266,111 +266,6 @@
};
/** \internal
- * \brief Template functors for comparison of two scalars and cast the output from boolean to input datatype
- */
-template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_with_cast_op;
-
-template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
-struct functor_traits<scalar_cmp_with_cast_op<LhsScalar,RhsScalar, cmp> > {
- enum {
- Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
- PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasCmp
- };
-};
-
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a==b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_eq(a,b), internal::pset1<Packet>(static_cast<result_type>(1)), internal::pzero(a)); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a<b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_lt(a,b), internal::pset1<Packet>(static_cast<result_type>(1)), internal::pzero(a)); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a<=b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_le(a,b), internal::pset1<Packet>(static_cast<result_type>(1)), internal::pzero(a)); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a>b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_le(a,b), internal::pzero(a), internal::pset1<Packet>(static_cast<result_type>(1))); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a>=b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_lt(a,b), internal::pzero(a), internal::pset1<Packet>(static_cast<result_type>(1))); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a<=b || b<=a) return static_cast<result_type>(0);
- else return static_cast<result_type>(1);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(por(internal::pcmp_le(a,b), internal::pcmp_le(b,a)), internal::pzero(a), internal::pset1<Packet>(static_cast<result_type>(1))); }
-};
-template<typename LhsScalar, typename RhsScalar>
-struct scalar_cmp_with_cast_op<LhsScalar, RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_cmp_with_cast_op>::ReturnType result_type;
- EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_with_cast_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const {
- if(a!=b) return static_cast<result_type>(1);
- else return static_cast<result_type>(0);
- }
- template<typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
- { return internal::pselect(internal::pcmp_eq(a,b), internal::pzero(a), internal::pset1<Packet>(static_cast<result_type>(1))); }
-};
-
-/** \internal
* \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
*
* \sa MatrixBase::stableNorm(), class Redux
diff --git a/Eigen/src/Core/functors/StlFunctors.h b/Eigen/src/Core/functors/StlFunctors.h
index 9c1d758..d2e7b5b 100644
--- a/Eigen/src/Core/functors/StlFunctors.h
+++ b/Eigen/src/Core/functors/StlFunctors.h
@@ -72,7 +72,7 @@
struct functor_traits<std::not_equal_to<T> >
{ enum { Cost = 1, PacketAccess = false }; };
-#if (__cplusplus < 201103L) && (EIGEN_COMP_MSVC <= 1900)
+#if (EIGEN_COMP_CXXVER < 11)
// std::binder* are deprecated since c++11 and will be removed in c++17
template<typename T>
struct functor_traits<std::binder2nd<T> >
@@ -83,7 +83,7 @@
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif
-#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
+#if (EIGEN_COMP_CXXVER < 17)
// std::unary_negate is deprecated since c++17 and will be removed in c++20
template<typename T>
struct functor_traits<std::unary_negate<T> >
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index 4501d32..fe0cfec 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -44,6 +44,9 @@
#if __clang_major__ >= 3 && __clang_minor__ >= 5
#pragma clang diagnostic ignored "-Wabsolute-value"
#endif
+ #if __clang_major__ >= 10
+ #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
+ #endif
#if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L
// warning: generic selections are a C11-specific feature
// ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 8691aa1..252d2be 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -269,6 +269,14 @@
#define EIGEN_ARCH_ARM_OR_ARM64 0
#endif
+/// \internal EIGEN_ARCH_ARMV8 set to 1 if the architecture is armv8 or greater.
+#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8
+#define EIGEN_ARCH_ARMV8 1
+#else
+#define EIGEN_ARCH_ARMV8 0
+#endif
+
+
/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE
/// compliant Arm fp16 type
#if EIGEN_ARCH_ARM64
@@ -601,18 +609,36 @@
#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0
#endif
+// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that
+// works for both platforms, indicating the C++ standard version number.
+//
+// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will
+// report 199711L regardless of the language standard specified via /std.
+// We need to rely on _MSVC_LANG instead, which is only available after
+// VS2015.3.
+#if EIGEN_COMP_MSVC_LANG > 0
+#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG
+#elif EIGEN_COMP_MSVC >= 1900
+#define EIGEN_CPLUSPLUS 201103L
+#elif defined(__cplusplus)
+#define EIGEN_CPLUSPLUS __cplusplus
+#else
+#define EIGEN_CPLUSPLUS 0
+#endif
// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler.
// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
// is defined to 17.
-#if (defined(__cplusplus) && (__cplusplus > 201402L) || EIGEN_COMP_MSVC_LANG > 201402L)
-#define EIGEN_COMP_CXXVER 17
-#elif (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910)
-#define EIGEN_COMP_CXXVER 14
-#elif (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
-#define EIGEN_COMP_CXXVER 11
+#if EIGEN_CPLUSPLUS > 201703L
+ #define EIGEN_COMP_CXXVER 20
+#elif EIGEN_CPLUSPLUS > 201402L
+ #define EIGEN_COMP_CXXVER 17
+#elif EIGEN_CPLUSPLUS > 201103L
+ #define EIGEN_COMP_CXXVER 14
+#elif EIGEN_CPLUSPLUS >= 201103L
+ #define EIGEN_COMP_CXXVER 11
#else
-#define EIGEN_COMP_CXXVER 03
+ #define EIGEN_COMP_CXXVER 03
#endif
@@ -637,8 +663,7 @@
#ifndef EIGEN_HAS_RVALUE_REFERENCES
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_rvalue_references) || \
- (defined(__cplusplus) && __cplusplus >= 201103L) || \
- (EIGEN_COMP_MSVC >= 1600))
+ (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAS_RVALUE_REFERENCES 1
#else
#define EIGEN_HAS_RVALUE_REFERENCES 0
@@ -664,14 +689,33 @@
// result_of was deprecated in c++17 and removed in c++ 20
#ifndef EIGEN_HAS_STD_RESULT_OF
#if EIGEN_MAX_CPP_VER >= 11 && \
- ((defined(__cplusplus) && __cplusplus >= 201103L && __cplusplus < 201703L) || \
- __has_feature(cxx_lambdas))
+ (defined(__cplusplus) && __cplusplus >= 201103L && __cplusplus < 201703L)
#define EIGEN_HAS_STD_RESULT_OF 1
#else
#define EIGEN_HAS_STD_RESULT_OF 0
#endif
#endif
+// Does the compiler support std::hash?
+#ifndef EIGEN_HAS_STD_HASH
+// The std::hash struct is defined in C++11 but is not labelled as a __device__
+// function and is not constexpr, so cannot be used on device.
+#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE)
+#define EIGEN_HAS_STD_HASH 1
+#else
+#define EIGEN_HAS_STD_HASH 0
+#endif
+#endif // EIGEN_HAS_STD_HASH
+
+#ifndef EIGEN_HAS_STD_INVOKE_RESULT
+#if EIGEN_MAX_CPP_VER >= 17 && \
+ (defined(__cplusplus) && __cplusplus >= 201703L)
+#define EIGEN_HAS_STD_INVOKE_RESULT 1
+#else
+#define EIGEN_HAS_STD_INVOKE_RESULT 0
+#endif
+#endif
+
#ifndef EIGEN_HAS_ALIGNAS
#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \
( __has_feature(cxx_alignas) \
@@ -704,12 +748,12 @@
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
-#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
+#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \
&& (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
-#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(SYCL_DEVICE_ONLY)
+#elif EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY)
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
#else
#define EIGEN_HAS_VARIADIC_TEMPLATES 0
@@ -720,12 +764,12 @@
#ifndef EIGEN_HAS_CONSTEXPR
#if defined(EIGEN_CUDACC)
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
- #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
+ #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
#define EIGEN_HAS_CONSTEXPR 1
#endif
- #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
- (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \
- (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L)))
+ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \
+ (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \
+ (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11)))
#define EIGEN_HAS_CONSTEXPR 1
#endif
@@ -744,7 +788,7 @@
// Does the compiler support C++11 math?
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
#ifndef EIGEN_HAS_CXX11_MATH
- #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
+ #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
#define EIGEN_HAS_CXX11_MATH 1
#else
@@ -755,9 +799,8 @@
// Does the compiler support proper C++11 containers?
#ifndef EIGEN_HAS_CXX11_CONTAINERS
#if EIGEN_MAX_CPP_VER>=11 && \
- ((__cplusplus > 201103L) \
- || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
- || EIGEN_COMP_MSVC >= 1900)
+ ((EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
#define EIGEN_HAS_CXX11_CONTAINERS 1
#else
#define EIGEN_HAS_CXX11_CONTAINERS 0
@@ -768,9 +811,8 @@
#ifndef EIGEN_HAS_CXX11_NOEXCEPT
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_noexcept) \
- || (__cplusplus > 201103L) \
- || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
- || EIGEN_COMP_MSVC >= 1900)
+ || (EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
#define EIGEN_HAS_CXX11_NOEXCEPT 1
#else
#define EIGEN_HAS_CXX11_NOEXCEPT 0
@@ -780,8 +822,8 @@
#ifndef EIGEN_HAS_CXX11_ATOMIC
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_atomic) \
- || (__cplusplus > 201103L) \
- || ((__cplusplus >= 201103L) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))
+ || (EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))
#define EIGEN_HAS_CXX11_ATOMIC 1
#else
#define EIGEN_HAS_CXX11_ATOMIC 0
@@ -790,7 +832,7 @@
#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL
#if EIGEN_MAX_CPP_VER>=11 && \
- (__cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1700)
+ (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700)
#define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1
#else
#define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0
@@ -1040,6 +1082,64 @@
#endif
+// Acts as a barrier preventing operations involving `X` from crossing. This
+// occurs, for example, in the fast rounding trick where a magic constant is
+// added then subtracted, which is otherwise compiled away with -ffast-math.
+//
+// See bug 1674
+#if !defined(EIGEN_OPTIMIZATION_BARRIER)
+ #if EIGEN_COMP_GNUC
+ // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html:
+ // X: Any operand whatsoever.
+ // r: A register operand is allowed provided that it is in a general
+ // register.
+ // g: Any register, memory or immediate integer operand is allowed, except
+ // for registers that are not general registers.
+ // w: (AArch32/AArch64) Floating point register, Advanced SIMD vector
+ // register or SVE vector register.
+ // x: (SSE) Any SSE register.
+ // (AArch64) Like w, but restricted to registers 0 to 15 inclusive.
+ // v: (PowerPC) An Altivec vector register.
+ // wa:(PowerPC) A VSX register.
+ //
+ // "X" (uppercase) should work for all cases, though this seems to fail for
+ // some versions of GCC for arm/aarch64 with
+ // "error: inconsistent operand constraints in an 'asm'"
+ // Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and
+ // vectors, otherwise
+ // "error: non-trivial scalar-to-vector conversion, possible invalid
+ // constraint for vector type"
+ //
+ // GCC for ppc64le generates an internal compiler error with x/X/g.
+ // GCC for AVX generates an internal compiler error with X.
+ //
+ // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq
+ // gcc for arm, aarch64,
+ // gcc for ppc64le,
+ // both vectors and scalars.
+ //
+ // Note that this is restricted to plain types - this will not work
+ // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these,
+ // you will need to apply to the underlying POD type.
+ #if EIGEN_ARCH_PPC
+ // General, Altivec, VSX.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X));
+ #elif EIGEN_ARCH_ARM_OR_ARM64
+ // General, NEON.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X));
+ #elif EIGEN_ARCH_i386_OR_x86_64
+ // General, SSE.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,x" (X));
+ #else
+ // Not implemented for other architectures.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
+ #endif
+ #else
+ // Not implemented for other compilers.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
+ #endif
+#endif
+
#if EIGEN_COMP_MSVC
// NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.
// This workaround is ugly, but it does the job.
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index bb9144d..7cbe8a6 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -780,49 +780,45 @@
#else
-#if EIGEN_MAX_ALIGN_BYTES!=0
-#if !defined(EIGEN_HIPCC)
- #define EIGEN_DEVICE_FUNC_NO_HIPCC EIGEN_DEVICE_FUNC
-#else
- #define EIGEN_DEVICE_FUNC_NO_HIPCC
-#endif
+// HIP does not support new/delete on device.
+#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
EIGEN_CATCH (...) { return 0; } \
}
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void *operator new(std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void *operator new[](std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
- EIGEN_DEVICE_FUNC_NO_HIPCC \
+ EIGEN_DEVICE_FUNC \
void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
} \
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 32a03ae..cad57c3 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -482,13 +482,29 @@
Index size(const T (&) [N]) { return N; }
/** \internal
- * Convenient struct to get the result type of a unary or binary functor.
- *
- * It supports both the current STL mechanism (using the result_type member) as well as
- * upcoming next STL generation (using a templated result member).
- * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
+ * Convenient struct to get the result type of a nullary, unary, binary, or
+ * ternary functor.
+ *
+ * Pre C++11:
+ * Supports both a Func::result_type member and templated
+ * Func::result<Func(ArgTypes...)>::type member.
+ *
+ * If none of these members is provided, then the type of the first
+ * argument is returned.
+ *
+ * Post C++11:
+ * This uses std::result_of. However, note the `type` member removes
+ * const and converts references/pointers to their corresponding value type.
*/
-#if EIGEN_HAS_STD_RESULT_OF
+#if EIGEN_HAS_STD_INVOKE_RESULT
+template<typename T> struct result_of;
+
+template<typename F, typename... ArgTypes>
+struct result_of<F(ArgTypes...)> {
+ typedef typename std::invoke_result<F, ArgTypes...>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#elif EIGEN_HAS_STD_RESULT_OF
template<typename T> struct result_of {
typedef typename std::result_of<T>::type type1;
typedef typename remove_all<type1>::type type;
@@ -500,6 +516,28 @@
struct has_std_result_type {int a[2];};
struct has_tr1_result {int a[3];};
+template<typename Func, int SizeOf>
+struct nullary_result_of_select {};
+
+template<typename Func>
+struct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+
+template<typename Func>
+struct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;};
+
+template<typename Func>
+struct result_of<Func()> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T()>::type const * = 0);
+ static has_none testFunctor(...);
+
+ // note that the following indirection is needed for gcc-3.3
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename nullary_result_of_select<Func, FunctorType>::type type;
+};
+
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
@@ -569,6 +607,45 @@
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
};
+
+#endif
+
+#if EIGEN_HAS_STD_INVOKE_RESULT
+template<typename F, typename... ArgTypes>
+struct invoke_result {
+ typedef typename std::invoke_result<F, ArgTypes...>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#elif EIGEN_HAS_CXX11
+template<typename F, typename... ArgTypes>
+struct invoke_result {
+ typedef typename result_of<F(ArgTypes...)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#else
+template<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void>
+struct invoke_result {
+ typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F>
+struct invoke_result<F, void, void, void> {
+ typedef typename result_of<F()>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F, typename ArgType0>
+struct invoke_result<F, ArgType0, void, void> {
+ typedef typename result_of<F(ArgType0)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F, typename ArgType0, typename ArgType1>
+struct invoke_result<F, ArgType0, ArgType1, void> {
+ typedef typename result_of<F(ArgType0, ArgType1)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
#endif
struct meta_yes { char a[1]; };
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 2ef19a4..c45de59 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -27,7 +27,7 @@
#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT
- #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
+ #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
// if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index fd2db56..f83bca1 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -129,7 +129,7 @@
template<typename T, int Value> class variable_if_dynamic
{
public:
- EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+ EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); }
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 7d258c0..1cb1d2f 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -223,9 +223,9 @@
/** type of the matrix used to represent the linear part of the transformation */
typedef Matrix<Scalar,Dim,Dim,Options> LinearMatrixType;
/** type of read/write reference to the linear part of the transformation */
- typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> LinearPart;
+ typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (int(Options)&RowMajor)==0> LinearPart;
/** type of read reference to the linear part of the transformation */
- typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> ConstLinearPart;
+ typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (int(Options)&RowMajor)==0> ConstLinearPart;
/** type of read/write reference to the affine part of the transformation */
typedef typename internal::conditional<int(Mode)==int(AffineCompact),
MatrixType&,
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index bfb9dcb..76668a5 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -453,7 +453,7 @@
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{
typedef typename VectorX::Scalar Scalar;
- const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
+ const bool Vectorizable = (int(VectorX::Flags) & int(VectorY::Flags) & PacketAccessBit)
&& (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
eigen_assert(xpr_x.size() == xpr_y.size());
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 06edb86..94c9f0f 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -287,8 +287,8 @@
typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL;
typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
- static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+ static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
};
template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
@@ -301,8 +301,8 @@
typedef SparseMatrix<Scalar, ColMajor, StorageIndex> CholMatrixType;
typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL;
typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
- static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
- static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+ static inline MatrixL getL(const CholMatrixType& m) { return MatrixL(m); }
+ static inline MatrixU getU(const CholMatrixType& m) { return MatrixU(m.adjoint()); }
};
template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 6e6cab3..616b4a0 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -579,10 +579,12 @@
else if (innerChange < 0)
{
// Inner size decreased: allocate a new m_innerNonZeros
- m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize+outerChange+1) * sizeof(StorageIndex)));
+ m_innerNonZeros = static_cast<StorageIndex*>(std::malloc((m_outerSize + outerChange) * sizeof(StorageIndex)));
if (!m_innerNonZeros) internal::throw_std_bad_alloc();
- for(Index i = 0; i < m_outerSize; i++)
+ for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)
m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i];
+ for(Index i = m_outerSize; i < m_outerSize + outerChange; i++)
+ m_innerNonZeros[i] = 0;
}
// Change the m_innerNonZeros in case of a decrease of inner size
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 76117a0..85b00e1 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -142,6 +142,9 @@
return *this = src.twistedBy(pnull);
}
+ // Since we override the copy-assignment operator, we need to explicitly re-declare the copy-constructor
+ EIGEN_DEFAULT_COPY_CONSTRUCTOR(SparseSelfAdjointView)
+
template<typename SrcMatrixType,unsigned int SrcMode>
SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcMode>& src)
{
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index 190210d..d1d3ad7 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -217,12 +217,12 @@
res.setScalarType<typename MatrixType::Scalar>();
// FIXME the following is not very accurate
- if (MatrixType::Flags & Upper)
+ if (int(MatrixType::Flags) & int(Upper))
res.Mtype = SLU_TRU;
- if (MatrixType::Flags & Lower)
+ if (int(MatrixType::Flags) & int(Lower))
res.Mtype = SLU_TRL;
- eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU");
+ eigen_assert(((int(MatrixType::Flags) & int(SelfAdjoint))==0) && "SelfAdjoint matrix shape not supported by SuperLU");
return res;
}
diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt
index fda8dcc..b186004 100644
--- a/bench/spbench/CMakeLists.txt
+++ b/bench/spbench/CMakeLists.txt
@@ -13,7 +13,7 @@
# set(SPARSE_LIBS ${SPARSE_LIBS} ${PARDISO_LIBRARIES})
# endif()
-find_package(Cholmod)
+find_package(CHOLMOD)
if(CHOLMOD_FOUND AND BLAS_FOUND AND LAPACK_FOUND)
add_definitions("-DEIGEN_CHOLMOD_SUPPORT")
include_directories(${CHOLMOD_INCLUDES})
@@ -21,7 +21,7 @@
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
endif()
-find_package(Umfpack)
+find_package(UMFPACK)
if(UMFPACK_FOUND AND BLAS_FOUND)
add_definitions("-DEIGEN_UMFPACK_SUPPORT")
include_directories(${UMFPACK_INCLUDES})
@@ -37,7 +37,7 @@
endif()
find_package(SuperLU 4.0)
-if(SUPERLU_FOUND AND BLAS_FOUND)
+if(SuperLU_FOUND AND BLAS_FOUND)
add_definitions("-DEIGEN_SUPERLU_SUPPORT")
include_directories(${SUPERLU_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${BLAS_LIBRARIES})
diff --git a/cmake/FindAdolc.cmake b/cmake/FindAdolc.cmake
index 374af76..13c59fc 100644
--- a/cmake/FindAdolc.cmake
+++ b/cmake/FindAdolc.cmake
@@ -4,17 +4,17 @@
endif ()
find_path(ADOLC_INCLUDES
- NAMES
- adolc/adtl.h
- PATHS
- $ENV{ADOLCDIR}
- ${INCLUDE_INSTALL_DIR}
+ NAMES adolc/adtl.h
+ PATHS $ENV{ADOLCDIR} $ENV{ADOLCDIR}/include ${INCLUDE_INSTALL_DIR}
)
-find_library(ADOLC_LIBRARIES adolc PATHS $ENV{ADOLCDIR} ${LIB_INSTALL_DIR})
+find_library(ADOLC_LIBRARIES
+ adolc
+ PATHS $ENV{ADOLCDIR} ${LIB_INSTALL_DIR}
+ PATH_SUFFIXES lib lib64)
include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ADOLC DEFAULT_MSG
+find_package_handle_standard_args(Adolc DEFAULT_MSG
ADOLC_INCLUDES ADOLC_LIBRARIES)
mark_as_advanced(ADOLC_INCLUDES ADOLC_LIBRARIES)
diff --git a/cmake/FindCholmod.cmake b/cmake/FindCHOLMOD.cmake
similarity index 97%
rename from cmake/FindCholmod.cmake
rename to cmake/FindCHOLMOD.cmake
index cea43ec..e470cb2 100644
--- a/cmake/FindCholmod.cmake
+++ b/cmake/FindCHOLMOD.cmake
@@ -1,4 +1,4 @@
-# Cholmod lib usually requires linking to a blas and lapack library.
+# CHOLMOD lib usually requires linking to a blas and lapack library.
# It is up to the user of this module to find a BLAS and link to it.
if (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES)
diff --git a/cmake/FindGoogleHash.cmake b/cmake/FindGoogleHash.cmake
index f46ea9c..481eb4d 100644
--- a/cmake/FindGoogleHash.cmake
+++ b/cmake/FindGoogleHash.cmake
@@ -18,6 +18,6 @@
endif()
include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(GOOGLEHASH DEFAULT_MSG GOOGLEHASH_INCLUDES GOOGLEHASH_COMPILE)
+find_package_handle_standard_args(GoogleHash DEFAULT_MSG GOOGLEHASH_INCLUDES GOOGLEHASH_COMPILE)
mark_as_advanced(GOOGLEHASH_INCLUDES)
diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake
index e86fe5c..4b779f5 100644
--- a/cmake/FindSuperLU.cmake
+++ b/cmake/FindSuperLU.cmake
@@ -90,7 +90,7 @@
endif()
include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(SUPERLU
+find_package_handle_standard_args(SuperLU
REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK
VERSION_VAR SUPERLU_VERSION_VAR)
diff --git a/cmake/FindUmfpack.cmake b/cmake/FindUMFPACK.cmake
similarity index 100%
rename from cmake/FindUmfpack.cmake
rename to cmake/FindUMFPACK.cmake
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index 22cf346..da89857 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
- <!-- Fixed x Fixed Matrix -->
- <Type Name="Eigen::Matrix<*,*,*,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
- <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : $T3</Size>
- <ValuePointer>m_storage.m_data.array</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : $T3</Size>
- <ValuePointer>m_storage.m_data.array</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- 2 x 2 Matrix -->
- <Type Name="Eigen::Matrix<*,2,2,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/>
- <DisplayString>[2, 2] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- 3 x 3 Matrix -->
- <Type Name="Eigen::Matrix<*,3,3,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/>
- <DisplayString>[3, 3] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- 4 x 4 Matrix -->
- <Type Name="Eigen::Matrix<*,4,4,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/>
- <DisplayString>[4, 4] (fixed matrix)</DisplayString>
- <Expand>
- <Synthetic Name="[row 0]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 0]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 1]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 2]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 3]" Condition="Flags%2">
- <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
- </Synthetic>
- <Synthetic Name="[row 3]" Condition="!(Flags%2)">
- <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
- </Synthetic>
- </Expand>
- </Type>
-
- <!-- Dynamic x Dynamic Matrix -->
- <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Fixed x Dynamic Matrix -->
- <Type Name="Eigen::Matrix<*,*,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic x Fixed Matrix -->
- <Type Name="Eigen::Matrix<*,-1,*,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
- <Expand>
- <ArrayItems Condition="Flags%2"> <!-- row major layout -->
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : $T2</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
- <Direction>Backward</Direction>
- <Rank>2</Rank>
- <Size>$i==0 ? m_storage.m_rows : $T2</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic Column Vector -->
- <Type Name="Eigen::Matrix<*,1,-1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
- <Expand>
- <Item Name="[size]">m_storage.m_cols</Item>
- <ArrayItems>
- <Size>m_storage.m_cols</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Dynamic Row Vector -->
- <Type Name="Eigen::Matrix<*,-1,1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/>
- <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
- <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
- <Expand>
- <Item Name="[size]">m_storage.m_rows</Item>
- <ArrayItems>
- <Size>m_storage.m_rows</Size>
- <ValuePointer>m_storage.m_data</ValuePointer>
- </ArrayItems>
- </Expand>
- </Type>
-
- <!-- Fixed Vector -->
- <Type Name="Eigen::Matrix<*,1,1,*,*,*>">
- <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/>
- <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,2,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/>
- <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,3,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/>
- <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- <Item Name="[z]">m_storage.m_data.array[2]</Item>
- </Expand>
- </Type>
-
- <Type Name="Eigen::Matrix<*,4,1,*,*,*>">
- <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/>
- <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/>
- <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
- <Expand>
- <Item Name="[x]">m_storage.m_data.array[0]</Item>
- <Item Name="[y]">m_storage.m_data.array[1]</Item>
- <Item Name="[z]">m_storage.m_data.array[2]</Item>
- <Item Name="[w]">m_storage.m_data.array[3]</Item>
- </Expand>
- </Type>
-
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+ <!-- Fixed x Fixed Matrix -->
+ <Type Name="Eigen::Matrix<*,*,*,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
+ <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : $T3</Size>
+ <ValuePointer>m_storage.m_data.array</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : $T3</Size>
+ <ValuePointer>m_storage.m_data.array</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- 2 x 2 Matrix -->
+ <Type Name="Eigen::Matrix<*,2,2,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/>
+ <DisplayString>[2, 2] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- 3 x 3 Matrix -->
+ <Type Name="Eigen::Matrix<*,3,3,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/>
+ <DisplayString>[3, 3] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- 4 x 4 Matrix -->
+ <Type Name="Eigen::Matrix<*,4,4,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/>
+ <DisplayString>[4, 4] (fixed matrix)</DisplayString>
+ <Expand>
+ <Synthetic Name="[row 0]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 3]" Condition="Flags%2">
+ <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
+ </Synthetic>
+ <Synthetic Name="[row 3]" Condition="!(Flags%2)">
+ <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
+ </Synthetic>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic x Dynamic Matrix -->
+ <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Fixed x Dynamic Matrix -->
+ <Type Name="Eigen::Matrix<*,*,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic x Fixed Matrix -->
+ <Type Name="Eigen::Matrix<*,-1,*,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
+ <Expand>
+ <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+ <Direction>Backward</Direction>
+ <Rank>2</Rank>
+ <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic Column Vector -->
+ <Type Name="Eigen::Matrix<*,1,-1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
+ <Expand>
+ <Item Name="[size]">m_storage.m_cols</Item>
+ <ArrayItems>
+ <Size>m_storage.m_cols</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Dynamic Row Vector -->
+ <Type Name="Eigen::Matrix<*,-1,1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/>
+ <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+ <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
+ <Expand>
+ <Item Name="[size]">m_storage.m_rows</Item>
+ <ArrayItems>
+ <Size>m_storage.m_rows</Size>
+ <ValuePointer>m_storage.m_data</ValuePointer>
+ </ArrayItems>
+ </Expand>
+ </Type>
+
+ <!-- Fixed Vector -->
+ <Type Name="Eigen::Matrix<*,1,1,*,*,*>">
+ <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/>
+ <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,2,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/>
+ <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,3,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/>
+ <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ <Item Name="[z]">m_storage.m_data.array[2]</Item>
+ </Expand>
+ </Type>
+
+ <Type Name="Eigen::Matrix<*,4,1,*,*,*>">
+ <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/>
+ <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/>
+ <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+ <Expand>
+ <Item Name="[x]">m_storage.m_data.array[0]</Item>
+ <Item Name="[y]">m_storage.m_data.array[1]</Item>
+ <Item Name="[z]">m_storage.m_data.array[2]</Item>
+ <Item Name="[w]">m_storage.m_data.array[3]</Item>
+ </Expand>
+ </Type>
+
+</AutoVisualizer>
diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 273c10d..35ef580 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@
-; ***************************************************************
-; * Eigen Visualizer
-; *
-; * Author: Hauke Heibel <hauke.heibel@gmail.com>
-; *
-; * Support the enhanced debugging of the following Eigen
-; * types (*: any, +:fixed dimension) :
-; *
-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
-; * - Eigen::Matrix<*,-1,-1,*,*,*>
-; * - Eigen::Matrix<*,+,-1,*,*,*>
-; * - Eigen::Matrix<*,-1,+,*,*,*>
-; * - Eigen::Matrix<*,+,+,*,*,*>
-; *
-; * Matrices are displayed properly independently of the memory
-; * alignment (RowMajor vs. ColMajor).
-; *
-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
-; * that your original autoexp.dat file is copied to a safe
-; * place before proceeding with its modification.
-; ***************************************************************
-
-[Visualizer]
-
-; Fixed size 4-vectors
-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1],
- z : ($c.m_storage.m_data.array)[2],
- w : ($c.m_storage.m_data.array)[3]
- )
- )
-
- preview
- (
- #(
- "[",
- 4,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 4),
- ")"
- )
- )
-}
-
-; Fixed size 3-vectors
-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1],
- z : ($c.m_storage.m_data.array)[2]
- )
- )
-
- preview
- (
- #(
- "[",
- 3,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 3),
- ")"
- )
- )
-}
-
-; Fixed size 2-vectors
-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0],
- y : ($c.m_storage.m_data.array)[1]
- )
- )
-
- preview
- (
- #(
- "[",
- 2,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 2),
- ")"
- )
- )
-}
-
-; Fixed size 1-vectors
-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- x : ($c.m_storage.m_data.array)[0]
- )
- )
-
- preview
- (
- #(
- "[",
- 1,
- "](",
- #array(expr: $e.m_storage.m_data.array[$i], size: 1),
- ")"
- )
- )
-}
-
-; Dynamic matrices (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,-1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.m_storage.m_rows,
- cols: $c.m_storage.m_cols,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.m_storage.m_rows,
- ",",
- $c.m_storage.m_cols,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.m_storage.m_rows*$c.m_storage.m_cols
- ),
- ")"
- )
- )
-}
-
-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,-1,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.RowsAtCompileTime,
- cols: $c.m_storage.m_cols,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.RowsAtCompileTime,
- ",",
- $c.m_storage.m_cols,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.RowsAtCompileTime*$c.m_storage.m_cols
- ),
- ")"
- )
- )
-}
-
-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,*,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.m_storage.m_rows,
- cols: $c.ColsAtCompileTime,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data)[$i],
- size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.m_storage.m_rows,
- ",",
- $c.ColsAtCompileTime,
- "](",
- #array(
- expr : [($c.m_storage.m_data)[$i],g],
- size : $c.m_storage.m_rows*$c.ColsAtCompileTime
- ),
- ")"
- )
- )
-}
-
-; Fixed size matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,*,*,*,*>{
- children
- (
- #(
- [internals]: [$c,!],
- rows: $c.RowsAtCompileTime,
- cols: $c.ColsAtCompileTime,
- ; Check for RowMajorBit
- #if ($c.Flags & 0x1) (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
- )
- ) #else (
- #array(
- rank: 2,
- base: 0,
- expr: ($c.m_storage.m_data.array)[$i],
- size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
- )
- )
- )
- )
-
- preview
- (
- #(
- "[",
- $c.RowsAtCompileTime,
- ",",
- $c.ColsAtCompileTime,
- "](",
- #array(
- expr : [($c.m_storage.m_data.array)[$i],g],
- size : $c.RowsAtCompileTime*$c.ColsAtCompileTime
- ),
- ")"
- )
- )
-}
+; ***************************************************************
+; * Eigen Visualizer
+; *
+; * Author: Hauke Heibel <hauke.heibel@gmail.com>
+; *
+; * Support the enhanced debugging of the following Eigen
+; * types (*: any, +:fixed dimension) :
+; *
+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
+; * - Eigen::Matrix<*,-1,-1,*,*,*>
+; * - Eigen::Matrix<*,+,-1,*,*,*>
+; * - Eigen::Matrix<*,-1,+,*,*,*>
+; * - Eigen::Matrix<*,+,+,*,*,*>
+; *
+; * Matrices are displayed properly independently of the memory
+; * alignment (RowMajor vs. ColMajor).
+; *
+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
+; * that your original autoexp.dat file is copied to a safe
+; * place before proceeding with its modification.
+; ***************************************************************
+
+[Visualizer]
+
+; Fixed size 4-vectors
+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1],
+ z : ($c.m_storage.m_data.array)[2],
+ w : ($c.m_storage.m_data.array)[3]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 4,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 4),
+ ")"
+ )
+ )
+}
+
+; Fixed size 3-vectors
+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1],
+ z : ($c.m_storage.m_data.array)[2]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 3,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 3),
+ ")"
+ )
+ )
+}
+
+; Fixed size 2-vectors
+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0],
+ y : ($c.m_storage.m_data.array)[1]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 2,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 2),
+ ")"
+ )
+ )
+}
+
+; Fixed size 1-vectors
+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ x : ($c.m_storage.m_data.array)[0]
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ 1,
+ "](",
+ #array(expr: $e.m_storage.m_data.array[$i], size: 1),
+ ")"
+ )
+ )
+}
+
+; Dynamic matrices (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,-1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.m_storage.m_rows,
+ cols: $c.m_storage.m_cols,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.m_storage.m_rows,
+ ",",
+ $c.m_storage.m_cols,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.m_storage.m_rows*$c.m_storage.m_cols
+ ),
+ ")"
+ )
+ )
+}
+
+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,-1,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.RowsAtCompileTime,
+ cols: $c.m_storage.m_cols,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.RowsAtCompileTime,
+ ",",
+ $c.m_storage.m_cols,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.RowsAtCompileTime*$c.m_storage.m_cols
+ ),
+ ")"
+ )
+ )
+}
+
+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,-1,*,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.m_storage.m_rows,
+ cols: $c.ColsAtCompileTime,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data)[$i],
+ size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.m_storage.m_rows,
+ ",",
+ $c.ColsAtCompileTime,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data)[$i],g],
+ size : $c.m_storage.m_rows*$c.ColsAtCompileTime
+ ),
+ ")"
+ )
+ )
+}
+
+; Fixed size matrix (ColMajor and RowMajor support)
+Eigen::Matrix<*,*,*,*,*,*>{
+ children
+ (
+ #(
+ [internals]: [$c,!],
+ rows: $c.RowsAtCompileTime,
+ cols: $c.ColsAtCompileTime,
+ ; Check for RowMajorBit
+ #if ($c.Flags & 0x1) (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+ )
+ ) #else (
+ #array(
+ rank: 2,
+ base: 0,
+ expr: ($c.m_storage.m_data.array)[$i],
+ size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
+ )
+ )
+ )
+ )
+
+ preview
+ (
+ #(
+ "[",
+ $c.RowsAtCompileTime,
+ ",",
+ $c.ColsAtCompileTime,
+ "](",
+ #array(
+ expr : [($c.m_storage.m_data.array)[$i],g],
+ size : $c.RowsAtCompileTime*$c.ColsAtCompileTime
+ ),
+ ")"
+ )
+ )
+}
diff --git a/doc/QuickReference.dox b/doc/QuickReference.dox
index 9c8e6fb..c5dfce4 100644
--- a/doc/QuickReference.dox
+++ b/doc/QuickReference.dox
@@ -480,13 +480,14 @@
while the second one (based on .array()) returns an array expression.
Recall that .array() has no cost, it only changes the available API and interpretation of the data.
-It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with <a href="http://en.cppreference.com/w/cpp/utility/functional/ptr_fun">std::ptr_fun</a> (c++03), <a href="http://en.cppreference.com/w/cpp/utility/functional/ref">std::ref</a> (c++11), or <a href="http://en.cppreference.com/w/cpp/language/lambda">lambdas</a> (c++11):
+It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with <a href="http://en.cppreference.com/w/cpp/utility/functional/ptr_fun">std::ptr_fun</a> (c++03, deprecated or removed in newer C++ versions), <a href="http://en.cppreference.com/w/cpp/utility/functional/ref">std::ref</a> (c++11), or <a href="http://en.cppreference.com/w/cpp/language/lambda">lambdas</a> (c++11):
\code
mat1.unaryExpr(std::ptr_fun(foo));
mat1.unaryExpr(std::ref(foo));
mat1.unaryExpr([](double x) { return foo(x); });
\endcode
+Please note that it's not possible to pass a raw function pointer to \c unaryExpr, so please warp it as shown above.
<a href="#" class="top">top</a>
\section QuickRef_Reductions Reductions
diff --git a/doc/examples/nullary_indexing.cpp b/doc/examples/nullary_indexing.cpp
index ca17456..b74db5f 100644
--- a/doc/examples/nullary_indexing.cpp
+++ b/doc/examples/nullary_indexing.cpp
@@ -56,7 +56,7 @@
B = mat_indexing(A, ri+1, ci);
std::cout << "A(ri+1,ci) =" << std::endl;
std::cout << B << std::endl << std::endl;
-#if __cplusplus >= 201103L
+#if EIGEN_COMP_CXXVER >= 11
B = mat_indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3));
std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl;
std::cout << B << std::endl << std::endl;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c624950..f6390e8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -41,26 +41,26 @@
set(SPARSE_LIBS " ")
-find_package(Cholmod)
+find_package(CHOLMOD)
if(CHOLMOD_FOUND)
add_definitions("-DEIGEN_CHOLMOD_SUPPORT")
include_directories(${CHOLMOD_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
- ei_add_property(EIGEN_TESTED_BACKENDS "Cholmod, ")
+ ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
else()
- ei_add_property(EIGEN_MISSING_BACKENDS "Cholmod, ")
+ ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
endif()
-find_package(Umfpack)
+find_package(UMFPACK)
if(UMFPACK_FOUND)
add_definitions("-DEIGEN_UMFPACK_SUPPORT")
include_directories(${UMFPACK_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
- ei_add_property(EIGEN_TESTED_BACKENDS "UmfPack, ")
+ ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
else()
- ei_add_property(EIGEN_MISSING_BACKENDS "UmfPack, ")
+ ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
endif()
find_package(KLU)
@@ -75,7 +75,7 @@
endif()
find_package(SuperLU 4.0)
-if(SUPERLU_FOUND)
+if(SuperLU_FOUND)
add_definitions("-DEIGEN_SUPERLU_SUPPORT")
include_directories(${SUPERLU_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 7f7e44f..6a88e0e 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -25,7 +25,7 @@
const Scalar denorm_min = std::numeric_limits<Scalar>::denorm_min();
const Scalar min = (std::numeric_limits<Scalar>::min)();
const Scalar max = (std::numeric_limits<Scalar>::max)();
- const Scalar max_exp = (static_cast<Scalar>(std::numeric_limits<Scalar>::max_exponent) * Scalar(EIGEN_LN2)) / eps;
+ const Scalar max_exp = (static_cast<Scalar>(int(std::numeric_limits<Scalar>::max_exponent)) * Scalar(EIGEN_LN2)) / eps;
const static Scalar abs_vals[] = {zero,
denorm_min,
@@ -610,6 +610,20 @@
VERIFY_IS_APPROX(ArrayType::Constant(rows,cols, maxM1), (m1.max)( maxM1));
VERIFY_IS_APPROX(m1, (m1.max)( minM1));
+
+ // min/max with various NaN propagation options.
+ if (m1.size() > 1 && !NumTraits<Scalar>::IsInteger) {
+ m1(0,0) = std::numeric_limits<Scalar>::quiet_NaN();
+ maxM1 = m1.template maxCoeff<PropagateNaN>();
+ minM1 = m1.template minCoeff<PropagateNaN>();
+ VERIFY((numext::isnan)(maxM1));
+ VERIFY((numext::isnan)(minM1));
+
+ maxM1 = m1.template maxCoeff<PropagateNumbers>();
+ minM1 = m1.template minCoeff<PropagateNumbers>();
+ VERIFY(!(numext::isnan)(maxM1));
+ VERIFY(!(numext::isnan)(minM1));
+ }
}
EIGEN_DECLARE_TEST(array_cwise)
diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp
index 50ef173..7c79ded 100644
--- a/test/boostmultiprec.cpp
+++ b/test/boostmultiprec.cpp
@@ -204,5 +204,5 @@
CALL_SUBTEST_9(( jacobisvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
CALL_SUBTEST_10(( bdcsvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
- CALL_SUBTEST_11(( test_simplicial_cholesky_T<Real,int>() ));
+ CALL_SUBTEST_11(( test_simplicial_cholesky_T<Real,int,ColMajor>() ));
}
diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp
index 7ce640d..7b1684f 100644
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp
@@ -10,7 +10,6 @@
#include "main.h"
#include <Eigen/Geometry>
-#include<iostream>
using namespace std;
// NOTE the following workaround was needed on some 32 bits builds to kill extra precision of x87 registers.
@@ -26,7 +25,7 @@
}
-template<typename BoxType> void alignedbox(const BoxType& _box)
+template<typename BoxType> void alignedbox(const BoxType& box)
{
/* this test covers the following files:
AlignedBox.h
@@ -36,7 +35,7 @@
typedef typename ScalarTraits::Real RealScalar;
typedef Matrix<Scalar, BoxType::AmbientDimAtCompileTime, 1> VectorType;
- const Index dim = _box.dim();
+ const Index dim = box.dim();
VectorType p0 = VectorType::Random(dim);
VectorType p1 = VectorType::Random(dim);
@@ -87,18 +86,18 @@
}
-template<typename BoxType> void alignedboxTranslatable(const BoxType& _box)
+template<typename BoxType> void alignedboxTranslatable(const BoxType& box)
{
typedef typename BoxType::Scalar Scalar;
typedef Matrix<Scalar, BoxType::AmbientDimAtCompileTime, 1> VectorType;
typedef Transform<Scalar, BoxType::AmbientDimAtCompileTime, Isometry> IsometryTransform;
typedef Transform<Scalar, BoxType::AmbientDimAtCompileTime, Affine> AffineTransform;
- alignedbox(_box);
+ alignedbox(box);
const VectorType Ones = VectorType::Ones();
const VectorType UnitX = VectorType::UnitX();
- const Index dim = _box.dim();
+ const Index dim = box.dim();
// box((-1, -1, -1), (1, 1, 1))
BoxType a(-Ones, Ones);
@@ -175,33 +174,33 @@
}
template<typename Scalar, typename Rotation>
-Rotation rotate2D(Scalar _angle) {
- return Rotation2D<Scalar>(_angle);
+Rotation rotate2D(Scalar angle) {
+ return Rotation2D<Scalar>(angle);
}
template<typename Scalar, typename Rotation>
-Rotation rotate2DIntegral(typename NumTraits<Scalar>::NonInteger _angle) {
+Rotation rotate2DIntegral(typename NumTraits<Scalar>::NonInteger angle) {
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
- return Rotation2D<NonInteger>(_angle).toRotationMatrix().
+ return Rotation2D<NonInteger>(angle).toRotationMatrix().
template cast<Scalar>();
}
template<typename Scalar, typename Rotation>
-Rotation rotate3DZAxis(Scalar _angle) {
- return AngleAxis<Scalar>(_angle, Matrix<Scalar, 3, 1>(0, 0, 1));
+Rotation rotate3DZAxis(Scalar angle) {
+ return AngleAxis<Scalar>(angle, Matrix<Scalar, 3, 1>(0, 0, 1));
}
template<typename Scalar, typename Rotation>
-Rotation rotate3DZAxisIntegral(typename NumTraits<Scalar>::NonInteger _angle) {
+Rotation rotate3DZAxisIntegral(typename NumTraits<Scalar>::NonInteger angle) {
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
- return AngleAxis<NonInteger>(_angle, Matrix<NonInteger, 3, 1>(0, 0, 1)).
+ return AngleAxis<NonInteger>(angle, Matrix<NonInteger, 3, 1>(0, 0, 1)).
toRotationMatrix().template cast<Scalar>();
}
template<typename Scalar, typename Rotation>
-Rotation rotate4DZWAxis(Scalar _angle) {
+Rotation rotate4DZWAxis(Scalar angle) {
Rotation result = Matrix<Scalar, 4, 4>::Identity();
- result.block(0, 0, 3, 3) = rotate3DZAxis<Scalar, AngleAxisd>(_angle).toRotationMatrix();
+ result.block(0, 0, 3, 3) = rotate3DZAxis<Scalar, AngleAxisd>(angle).toRotationMatrix();
return result;
}
@@ -221,38 +220,22 @@
}
template <typename Scalar, int Dim>
-std::vector<Matrix<Scalar, Dim, 1> > boxGetCorners(const Matrix<Scalar, Dim, 1>& _min, const Matrix<Scalar, Dim, 1>& _max, int dim = Dim)
+Matrix<Scalar, Dim, (1<<Dim)> boxGetCorners(const Matrix<Scalar, Dim, 1>& min_, const Matrix<Scalar, Dim, 1>& max_)
{
- std::vector<Matrix<Scalar, Dim, 1> > result;
- if (dim == 1)
+ Matrix<Scalar, Dim, (1<<Dim) > result;
+ for(Index i=0; i<(1<<Dim); ++i)
{
- result.push_back(_min);
- result.push_back(_max);
- }
- else
- {
- std::vector<Matrix<Scalar, Dim, 1> > shorter = boxGetCorners(_min, _max, dim - 1);
- for (size_t i = 0; i < shorter.size(); ++i)
- {
- Matrix<Scalar, Dim , 1> vec = shorter[i];
-
- Matrix<Scalar, Dim, 1> vec1 = _min;
- vec1.block(Dim - dim, 0, dim - 1, 1) = vec.block(Dim - dim, 0, dim - 1, 1);
- result.push_back(vec1);
-
- Matrix<Scalar, Dim, 1> vec2 = _max;
- vec2.block(Dim - dim, 0, dim - 1, 1) = vec.block(Dim - dim, 0, dim - 1, 1);
- result.push_back(vec2);
- }
+ for(Index j=0; j<Dim; ++j)
+ result(j,i) = (i & (1<<j)) ? min_(j) : max_(j);
}
return result;
}
template<typename BoxType, typename Rotation> void alignedboxRotatable(
- const BoxType& _box,
- Rotation (*_rotate)(typename NumTraits<typename BoxType::Scalar>::NonInteger /*_angle*/))
+ const BoxType& box,
+ Rotation (*rotate)(typename NumTraits<typename BoxType::Scalar>::NonInteger /*_angle*/))
{
- alignedboxTranslatable(_box);
+ alignedboxTranslatable(box);
typedef typename BoxType::Scalar Scalar;
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
@@ -285,7 +268,7 @@
IsometryTransform tf2 = IsometryTransform::Identity();
// for some weird reason the following statement has to be put separate from
// the following rotate call, otherwise precision problems arise...
- Rotation rot = _rotate(NonInteger(EIGEN_PI));
+ Rotation rot = rotate(NonInteger(EIGEN_PI));
tf2.rotate(rot);
c.transform(tf2);
@@ -295,7 +278,7 @@
VERIFY_IS_APPROX((c.min)(), UnitX - UnitZ * Scalar(2));
VERIFY_IS_APPROX((c.max)(), UnitX * Scalar(3) + UnitY * Scalar(2));
- rot = _rotate(NonInteger(EIGEN_PI / 2));
+ rot = rotate(NonInteger(EIGEN_PI / 2));
tf2.setIdentity();
tf2.rotate(rot);
@@ -319,18 +302,20 @@
}
template<typename BoxType, typename Rotation> void alignedboxNonIntegralRotatable(
- const BoxType& _box,
- Rotation (*_rotate)(typename NumTraits<typename BoxType::Scalar>::NonInteger /*_angle*/))
+ const BoxType& box,
+ Rotation (*rotate)(typename NumTraits<typename BoxType::Scalar>::NonInteger /*_angle*/))
{
- alignedboxRotatable(_box, _rotate);
+ alignedboxRotatable(box, rotate);
typedef typename BoxType::Scalar Scalar;
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
- typedef Matrix<Scalar, BoxType::AmbientDimAtCompileTime, 1> VectorType;
- typedef Transform<Scalar, BoxType::AmbientDimAtCompileTime, Isometry> IsometryTransform;
- typedef Transform<Scalar, BoxType::AmbientDimAtCompileTime, Affine> AffineTransform;
+ enum { Dim = BoxType::AmbientDimAtCompileTime };
+ typedef Matrix<Scalar, Dim, 1> VectorType;
+ typedef Matrix<Scalar, Dim, (1 << Dim)> CornersType;
+ typedef Transform<Scalar, Dim, Isometry> IsometryTransform;
+ typedef Transform<Scalar, Dim, Affine> AffineTransform;
- const Index dim = _box.dim();
+ const Index dim = box.dim();
const VectorType Zero = VectorType::Zero();
const VectorType Ones = VectorType::Ones();
@@ -347,7 +332,7 @@
VectorType cornerTL = (c.max)(); cornerTL[0] = cornerBL[0];
NonInteger angle = NonInteger(EIGEN_PI/3);
- Rotation rot = _rotate(angle);
+ Rotation rot = rotate(angle);
IsometryTransform tf2;
tf2.setIdentity();
tf2.rotate(rot);
@@ -393,8 +378,7 @@
c = BoxType(minCorner, maxCorner);
- std::vector<VectorType> corners = boxGetCorners(minCorner, maxCorner);
- const size_t numCorners = corners.size();
+ CornersType corners = boxGetCorners(minCorner, maxCorner);
typename AffineTransform::LinearMatrixType rotation =
randomRotationMatrix<typename AffineTransform::LinearMatrixType>();
@@ -404,20 +388,10 @@
tf2.translate(VectorType::Random());
c.transform(tf2);
- for (size_t corner = 0; corner < numCorners; ++corner)
- corners[corner] = tf2 * corners[corner];
+ corners = tf2 * corners;
- for (Index d = 0; d < dim; ++d)
- {
- minCorner[d] = corners[0][d];
- maxCorner[d] = corners[0][d];
-
- for (size_t corner = 0; corner < numCorners; ++corner)
- {
- minCorner[d] = (min)(minCorner[d], corners[corner][d]);
- maxCorner[d] = (max)(maxCorner[d], corners[corner][d]);
- }
- }
+ minCorner = corners.rowwise().minCoeff();
+ maxCorner = corners.rowwise().maxCoeff();
VERIFY_IS_APPROX((c.min)(), minCorner);
VERIFY_IS_APPROX((c.max)(), maxCorner);
@@ -434,28 +408,17 @@
c = BoxType(minCorner, maxCorner);
- std::vector<VectorType> corners = boxGetCorners(minCorner, maxCorner);
- const size_t numCorners = corners.size();
+ CornersType corners = boxGetCorners(minCorner, maxCorner);
AffineTransform atf = AffineTransform::Identity();
atf.linearExt() = AffineTransform::LinearPart::Random();
atf.translate(VectorType::Random());
c.transform(atf);
- for (size_t corner = 0; corner < numCorners; ++corner)
- corners[corner] = atf * corners[corner];
+ corners = atf * corners;
- for (Index d = 0; d < dim; ++d)
- {
- minCorner[d] = corners[0][d];
- maxCorner[d] = corners[0][d];
-
- for (size_t corner = 0; corner < numCorners; ++corner)
- {
- minCorner[d] = (min)(minCorner[d], corners[corner][d]);
- maxCorner[d] = (max)(maxCorner[d], corners[corner][d]);
- }
- }
+ minCorner = corners.rowwise().minCoeff();
+ maxCorner = corners.rowwise().maxCoeff();
VERIFY_IS_APPROX((c.min)(), minCorner);
VERIFY_IS_APPROX((c.max)(), maxCorner);
@@ -463,13 +426,13 @@
}
template<typename BoxType>
-void alignedboxCastTests(const BoxType& _box)
+void alignedboxCastTests(const BoxType& box)
{
// casting
typedef typename BoxType::Scalar Scalar;
typedef Matrix<Scalar, BoxType::AmbientDimAtCompileTime, 1> VectorType;
- const Index dim = _box.dim();
+ const Index dim = box.dim();
VectorType p0 = VectorType::Random(dim);
VectorType p1 = VectorType::Random(dim);
diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu
index 46e4a43..b2e657e 100644
--- a/test/gpu_basic.cu
+++ b/test/gpu_basic.cu
@@ -234,6 +234,26 @@
};
template<typename T>
+struct alloc_new_delete {
+ EIGEN_DEVICE_FUNC
+ void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
+ {
+ int offset = 2*i*T::MaxSizeAtCompileTime;
+ T* x = new T(in + offset);
+ Eigen::Map<T> u(out + offset);
+ u = *x;
+ delete x;
+
+ offset += T::MaxSizeAtCompileTime;
+ T* y = new T[1];
+ y[0] = T(in + offset);
+ Eigen::Map<T> v(out + offset);
+ v = y[0];
+ delete[] y;
+ }
+};
+
+template<typename T>
struct redux {
EIGEN_DEVICE_FUNC
void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
@@ -389,6 +409,9 @@
// (aka 'ArrayBase<Eigen::Replicate<Eigen::Array<float, 4, 1, 0, 4, 1>, -1, -1> >') has protected default constructor
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array4f>(), nthreads, in, out) );
CALL_SUBTEST( run_and_compare_to_gpu(replicate<Array33f>(), nthreads, in, out) );
+
+ // HIP does not support new/delete on device.
+ CALL_SUBTEST( run_and_compare_to_gpu(alloc_new_delete<Vector3f>(), nthreads, in, out) );
#endif
CALL_SUBTEST( run_and_compare_to_gpu(redux<Array4f>(), nthreads, in, out) );
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index 231f443..72c54af 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp
@@ -26,6 +26,9 @@
#if defined(__GNUC__) && (__GNUC__ >=9)
#pragma GCC diagnostic ignored "-Wdeprecated-copy"
#endif
+#if defined(__clang__) && (__clang_major__ >= 10)
+ #pragma clang diagnostic ignored "-Wdeprecated-copy"
+#endif
#endif
diff --git a/test/main.h b/test/main.h
index 4f9887c..cf06173 100644
--- a/test/main.h
+++ b/test/main.h
@@ -45,7 +45,7 @@
#include <queue>
#include <cassert>
#include <list>
-#if __cplusplus >= 201103L
+#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
#include <random>
#include <chrono>
#ifdef EIGEN_USE_THREADS
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 94a83c0..f5cce64 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -299,6 +299,29 @@
CHECK_CWISE2_IF(internal::packet_traits<Scalar>::HasAdd, REF_ADD, internal::padd);
}
+// Ensure optimization barrier compiles and doesn't modify contents.
+// Only applies to raw types, so will not work for std::complex, Eigen::half
+// or Eigen::bfloat16. For those you would need to refer to an underlying
+// storage element.
+template<typename Packet, typename EnableIf = void>
+struct eigen_optimization_barrier_test {
+ static void run() {}
+};
+
+template<typename Packet>
+struct eigen_optimization_barrier_test<Packet, typename internal::enable_if<
+ !NumTraits<Packet>::IsComplex &&
+ !internal::is_same<Packet, Eigen::half>::value &&
+ !internal::is_same<Packet, Eigen::bfloat16>::value
+ >::type> {
+ static void run() {
+ typedef typename internal::unpacket_traits<Packet>::type Scalar;
+ Scalar s = internal::random<Scalar>();
+ Packet barrier = internal::pset1<Packet>(s);
+ EIGEN_OPTIMIZATION_BARRIER(barrier);
+ eigen_assert(s == internal::pfirst(barrier) && "EIGEN_OPTIMIZATION_BARRIER");
+ }
+};
template <typename Scalar, typename Packet>
void packetmath() {
@@ -317,6 +340,10 @@
EIGEN_ALIGN_MAX Scalar data3[size];
EIGEN_ALIGN_MAX Scalar ref[size];
RealScalar refvalue = RealScalar(0);
+
+ eigen_optimization_barrier_test<Packet>::run();
+ eigen_optimization_barrier_test<Scalar>::run();
+
for (int i = 0; i < size; ++i) {
data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
data2[i] = internal::random<Scalar>() / RealScalar(PacketSize);
@@ -504,6 +531,7 @@
data1[i] = numext::abs(internal::random<Scalar>());
}
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
+ CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
}
// Notice that this definition works for complex types as well.
@@ -532,7 +560,7 @@
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
CHECK_CWISE1_IF(PacketTraits::HasLog, log2, internal::plog2);
- CHECK_CWISE1_IF(PacketTraits::HasRsqrt, 1 / std::sqrt, internal::prsqrt);
+ CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
@@ -542,18 +570,52 @@
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
- CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
- CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
- CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
- CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
-
- // See bug 1785.
- for (int i = 0; i < size; ++i) {
- data1[i] = Scalar(-1.5 + i);
- data2[i] = Scalar(-1.5 + i);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
+
+ // Rounding edge cases.
+ if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) {
+ typedef typename internal::make_integer<Scalar>::type IntType;
+ // Start with values that cannot fit inside an integer, work down to less than one.
+ Scalar val = numext::mini(
+ Scalar(2) * static_cast<Scalar>(NumTraits<IntType>::highest()),
+ NumTraits<Scalar>::highest());
+ std::vector<Scalar> values;
+ while (val > Scalar(0.25)) {
+ // Cover both even and odd, positive and negative cases.
+ values.push_back(val);
+ values.push_back(val + Scalar(0.3));
+ values.push_back(val + Scalar(0.5));
+ values.push_back(val + Scalar(0.8));
+ values.push_back(val + Scalar(1));
+ values.push_back(val + Scalar(1.3));
+ values.push_back(val + Scalar(1.5));
+ values.push_back(val + Scalar(1.8));
+ values.push_back(-val);
+ values.push_back(-val - Scalar(0.3));
+ values.push_back(-val - Scalar(0.5));
+ values.push_back(-val - Scalar(0.8));
+ values.push_back(-val - Scalar(1));
+ values.push_back(-val - Scalar(1.3));
+ values.push_back(-val - Scalar(1.5));
+ values.push_back(-val - Scalar(1.8));
+ values.push_back(Scalar(-1.5) + val); // Bug 1785.
+ val = val / Scalar(2);
+ }
+ values.push_back(NumTraits<Scalar>::infinity());
+ values.push_back(-NumTraits<Scalar>::infinity());
+ values.push_back(NumTraits<Scalar>::quiet_NaN());
+
+ for (size_t k=0; k<values.size(); ++k) {
+ data1[0] = values[k];
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
+ CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
+ }
}
- CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
- CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(-1, 1));
@@ -609,7 +671,7 @@
if (PacketTraits::HasExp) {
data1[0] = Scalar(-1);
// underflow to zero
- data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-10);
+ data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-55);
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
// overflow to inf
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
@@ -699,7 +761,7 @@
}
}
-#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
+#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
data1[0] = std::numeric_limits<Scalar>::infinity();
data1[1] = Scalar(-1);
CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h
index 027715a..8624fe2 100644
--- a/test/packetmath_test_shared.h
+++ b/test/packetmath_test_shared.h
@@ -78,13 +78,18 @@
return internal::isMuchSmallerThan(a-b, refvalue);
}
+template<typename Scalar>
+inline void print_mismatch(const Scalar* ref, const Scalar* vec, int size) {
+ std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(ref,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(vec,size) << "]\n";
+}
+
template<typename Scalar> bool areApproxAbs(const Scalar* a, const Scalar* b, int size, const typename NumTraits<Scalar>::Real& refvalue)
{
for (int i=0; i<size; ++i)
{
if (!isApproxAbs(a[i],b[i],refvalue))
{
- std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
+ print_mismatch(a, b, size);
return false;
}
}
@@ -95,13 +100,23 @@
{
for (int i=0; i<size; ++i)
{
- if (a[i]!=b[i] && !internal::isApprox(a[i],b[i]))
+ if ( a[i]!=b[i] && !internal::isApprox(a[i],b[i])
+ && !((numext::isnan)(a[i]) && (numext::isnan)(b[i])) )
{
- if((numext::isnan)(a[i]) && (numext::isnan)(b[i]))
- {
- continue;
- }
- std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
+ print_mismatch(a, b, size);
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename Scalar> bool areEqual(const Scalar* a, const Scalar* b, int size)
+{
+ for (int i=0; i<size; ++i)
+ {
+ if ( (a[i] != b[i]) && !((numext::isnan)(a[i]) && (numext::isnan)(b[i])) )
+ {
+ print_mismatch(a, b, size);
return false;
}
}
@@ -178,6 +193,14 @@
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
}
+#define CHECK_CWISE1_EXACT_IF(COND, REFOP, POP) if(COND) { \
+ test::packet_helper<COND,Packet> h; \
+ for (int i=0; i<PacketSize; ++i) \
+ ref[i] = Scalar(REFOP(data1[i])); \
+ h.store(data2, POP(h.load(data1))); \
+ VERIFY(test::areEqual(ref, data2, PacketSize) && #POP); \
+}
+
#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
test::packet_helper<COND,Packet> h; \
for (int i=0; i<PacketSize; ++i) \
diff --git a/test/simplicial_cholesky.cpp b/test/simplicial_cholesky.cpp
index e3c31e3..538d01a 100644
--- a/test/simplicial_cholesky.cpp
+++ b/test/simplicial_cholesky.cpp
@@ -9,9 +9,9 @@
#include "sparse_solver.h"
-template<typename T, typename I_> void test_simplicial_cholesky_T()
+template<typename T, typename I_, int flag> void test_simplicial_cholesky_T()
{
- typedef SparseMatrix<T,0,I_> SparseMatrixType;
+ typedef SparseMatrix<T,flag,I_> SparseMatrixType;
SimplicialCholesky<SparseMatrixType, Lower> chol_colmajor_lower_amd;
SimplicialCholesky<SparseMatrixType, Upper> chol_colmajor_upper_amd;
SimplicialLLT< SparseMatrixType, Lower> llt_colmajor_lower_amd;
@@ -41,7 +41,10 @@
EIGEN_DECLARE_TEST(simplicial_cholesky)
{
- CALL_SUBTEST_1(( test_simplicial_cholesky_T<double,int>() ));
- CALL_SUBTEST_2(( test_simplicial_cholesky_T<std::complex<double>, int>() ));
- CALL_SUBTEST_3(( test_simplicial_cholesky_T<double,long int>() ));
+ CALL_SUBTEST_11(( test_simplicial_cholesky_T<double, int, ColMajor>() ));
+ CALL_SUBTEST_12(( test_simplicial_cholesky_T<std::complex<double>, int, ColMajor>() ));
+ CALL_SUBTEST_13(( test_simplicial_cholesky_T<double, long int, ColMajor>() ));
+ CALL_SUBTEST_21(( test_simplicial_cholesky_T<double, int, RowMajor>() ));
+ CALL_SUBTEST_22(( test_simplicial_cholesky_T<std::complex<double>, int, RowMajor>() ));
+ CALL_SUBTEST_23(( test_simplicial_cholesky_T<double, long int, RowMajor>() ));
}
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 5f87621..f7ad930 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -413,7 +413,7 @@
m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies<Scalar>());
VERIFY_IS_APPROX(m, refMat_prod);
-#if (defined(__cplusplus) && __cplusplus >= 201103L)
+#if (EIGEN_COMP_CXXVER >= 11)
m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; });
VERIFY_IS_APPROX(m, refMat_last);
#endif
@@ -587,30 +587,38 @@
inc.push_back(std::pair<StorageIndex,StorageIndex>(3,2));
inc.push_back(std::pair<StorageIndex,StorageIndex>(3,0));
inc.push_back(std::pair<StorageIndex,StorageIndex>(0,3));
-
+ inc.push_back(std::pair<StorageIndex,StorageIndex>(0,-1));
+ inc.push_back(std::pair<StorageIndex,StorageIndex>(-1,0));
+ inc.push_back(std::pair<StorageIndex,StorageIndex>(-1,-1));
+
for(size_t i = 0; i< inc.size(); i++) {
StorageIndex incRows = inc[i].first;
StorageIndex incCols = inc[i].second;
SparseMatrixType m1(rows, cols);
DenseMatrix refMat1 = DenseMatrix::Zero(rows, cols);
initSparse<Scalar>(density, refMat1, m1);
-
+
+ SparseMatrixType m2 = m1;
+ m2.makeCompressed();
+
m1.conservativeResize(rows+incRows, cols+incCols);
+ m2.conservativeResize(rows+incRows, cols+incCols);
refMat1.conservativeResize(rows+incRows, cols+incCols);
if (incRows > 0) refMat1.bottomRows(incRows).setZero();
if (incCols > 0) refMat1.rightCols(incCols).setZero();
-
+
VERIFY_IS_APPROX(m1, refMat1);
-
+ VERIFY_IS_APPROX(m2, refMat1);
+
// Insert new values
if (incRows > 0)
m1.insert(m1.rows()-1, 0) = refMat1(refMat1.rows()-1, 0) = 1;
if (incCols > 0)
m1.insert(0, m1.cols()-1) = refMat1(0, refMat1.cols()-1) = 1;
-
+
VERIFY_IS_APPROX(m1, refMat1);
-
-
+
+
}
}
diff --git a/unsupported/Eigen/AdolcForward b/unsupported/Eigen/AdolcForward
index 9b8d3cd..56caeae 100644
--- a/unsupported/Eigen/AdolcForward
+++ b/unsupported/Eigen/AdolcForward
@@ -74,6 +74,9 @@
inline adouble abs(const adouble& x) { return fabs(x); }
inline adouble abs2(const adouble& x) { return x*x; }
+inline bool (isinf)(const adouble& x) { return (Eigen::numext::isinf)(x.getValue()); }
+inline bool (isnan)(const adouble& x) { return (Eigen::numext::isnan)(x.getValue()); }
+
}
namespace Eigen {
diff --git a/unsupported/Eigen/CXX11/ThreadPool b/unsupported/Eigen/CXX11/ThreadPool
index 575c060..1af1a07 100644
--- a/unsupported/Eigen/CXX11/ThreadPool
+++ b/unsupported/Eigen/CXX11/ThreadPool
@@ -30,7 +30,7 @@
// The code depends on CXX11, so only include the module if the
// compiler supports it.
-#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900
+#if (EIGEN_COMP_CXXVER >= 11)
#include <cstddef>
#include <cstring>
#include <time.h>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 679996c..35b6458 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -298,6 +298,12 @@
}
EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log2_op<Scalar>, const Derived>
+ log2() const {
+ return unaryExpr(internal::scalar_log2_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
abs() const {
return unaryExpr(internal::scalar_abs_op<Scalar>());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 605d72c..424cace 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -362,7 +362,7 @@
template<typename Derived>
-struct TensorContractionEvaluatorBase
+struct TensorContractionEvaluatorBase : internal::no_assignment_operator
{
typedef typename internal::traits<Derived>::Indices Indices;
typedef typename internal::traits<Derived>::LeftArgType LeftArgType;
@@ -934,8 +934,6 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_result; }
protected:
- // Prevent assignment
- TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&);
Dimensions m_dimensions;
contract_t m_k_strides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index ceecd54..ef79c85 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -451,6 +451,7 @@
}
m_is_identity = true;
+ bool degenerate = false;
for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
eigen_assert(m_impl.dimensions()[i] >=
op.sizes()[i] + op.startIndices()[i]);
@@ -458,6 +459,9 @@
op.startIndices()[i] != 0) {
m_is_identity = false;
}
+ if (op.sizes()[i] == 0) { // we have an empty size
+ degenerate = true;
+ }
}
// No strides for scalars.
@@ -475,8 +479,8 @@
m_outputStrides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
+ // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
} else {
m_inputStrides[NumDims-1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
@@ -487,8 +491,8 @@
m_outputStrides[NumDims-1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
+ // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); }
}
}
diff --git a/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h b/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
index f1c0284..056736c 100644
--- a/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
+++ b/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
@@ -32,7 +32,7 @@
* On the other hand, visual studio still doesn't claim to support C++11 although it's
* compliant enugh for our purpose.
*/
-#if (__cplusplus <= 199711L) && (EIGEN_COMP_MSVC < 1900)
+#if (EIGEN_COMP_CXXVER < 11)
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#pragma GCC diagnostic error "-Wfatal-errors"
#endif
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index cb99bd2..c8c311a 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -131,8 +131,6 @@
const T_SrcMat & m_src;
T_FftIfc & m_ifc;
Index m_nfft;
-private:
- fft_fwd_proxy& operator=(const fft_fwd_proxy&);
};
template<typename T_SrcMat,typename T_FftIfc>
@@ -151,8 +149,6 @@
const T_SrcMat & m_src;
T_FftIfc & m_ifc;
Index m_nfft;
-private:
- fft_inv_proxy& operator=(const fft_inv_proxy&);
};
diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers
index d657b4c..8930a74 100644
--- a/unsupported/Eigen/IterativeSolvers
+++ b/unsupported/Eigen/IterativeSolvers
@@ -15,11 +15,14 @@
#include "../../Eigen/Householder"
/**
- * \defgroup IterativeSolvers_Module Iterative solvers module
+ * \defgroup IterativeLinearSolvers_Module Iterative solvers module
* This module aims to provide various iterative linear and non linear solver algorithms.
* It currently provides:
* - a constrained conjugate gradient
* - a Householder GMRES implementation
+ * - an IDR(s) implementation
+ * - a DGMRES implementation
+ * - a MINRES implementation
* \code
* #include <unsupported/Eigen/IterativeSolvers>
* \endcode
@@ -33,6 +36,7 @@
#include "src/IterativeSolvers/DGMRES.h"
//#include "src/IterativeSolvers/SSORPreconditioner.h"
#include "src/IterativeSolvers/MINRES.h"
+#include "src/IterativeSolvers/IDRS.h"
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index 9bdc012..430953a 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -25,16 +25,47 @@
std::vector<Complex> m_scratchBuf;
bool m_inverse;
- inline
- void make_twiddles(int nfft,bool inverse)
+ inline void make_twiddles(int nfft, bool inverse)
+ {
+ using numext::sin;
+ using numext::cos;
+ m_inverse = inverse;
+ m_twiddles.resize(nfft);
+ double phinc = 0.25 * double(EIGEN_PI) / nfft;
+ Scalar flip = inverse ? Scalar(1) : Scalar(-1);
+ m_twiddles[0] = Complex(Scalar(1), Scalar(0));
+ if ((nfft&1)==0)
+ m_twiddles[nfft/2] = Complex(Scalar(-1), Scalar(0));
+ int i=1;
+ for (;i*8<nfft;++i)
{
- using std::acos;
- m_inverse = inverse;
- m_twiddles.resize(nfft);
- Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft;
- for (int i=0;i<nfft;++i)
- m_twiddles[i] = exp( Complex(0,i*phinc) );
+ Scalar c = Scalar(cos(i*8*phinc));
+ Scalar s = Scalar(sin(i*8*phinc));
+ m_twiddles[i] = Complex(c, s*flip);
+ m_twiddles[nfft-i] = Complex(c, -s*flip);
}
+ for (;i*4<nfft;++i)
+ {
+ Scalar c = Scalar(cos((2*nfft-8*i)*phinc));
+ Scalar s = Scalar(sin((2*nfft-8*i)*phinc));
+ m_twiddles[i] = Complex(s, c*flip);
+ m_twiddles[nfft-i] = Complex(s, -c*flip);
+ }
+ for (;i*8<3*nfft;++i)
+ {
+ Scalar c = Scalar(cos((8*i-2*nfft)*phinc));
+ Scalar s = Scalar(sin((8*i-2*nfft)*phinc));
+ m_twiddles[i] = Complex(-s, c*flip);
+ m_twiddles[nfft-i] = Complex(-s, -c*flip);
+ }
+ for (;i*2<nfft;++i)
+ {
+ Scalar c = Scalar(cos((4*nfft-8*i)*phinc));
+ Scalar s = Scalar(sin((4*nfft-8*i)*phinc));
+ m_twiddles[i] = Complex(-c, s*flip);
+ m_twiddles[nfft-i] = Complex(-c, -s*flip);
+ }
+ }
void factorize(int nfft)
{
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 2ab56b5..5ae011b 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -57,7 +57,7 @@
}
/**
- * \ingroup IterativeLInearSolvers_Module
+ * \ingroup IterativeLinearSolvers_Module
* \brief A Restarted GMRES with deflation.
* This class implements a modification of the GMRES solver for
* sparse linear systems. The basis is built with modified
diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
new file mode 100755
index 0000000..90d20fa
--- /dev/null
+++ b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
@@ -0,0 +1,436 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
+// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
+// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_IDRS_H
+#define EIGEN_IDRS_H
+
+namespace Eigen
+{
+
+ namespace internal
+ {
+ /** \internal Low-level Induced Dimension Reduction algoritm
+ \param A The matrix A
+ \param b The right hand side vector b
+ \param x On input and initial solution, on output the computed solution.
+ \param precond A preconditioner being able to efficiently solve for an
+ approximation of Ax=b (regardless of b)
+ \param iter On input the max number of iteration, on output the number of performed iterations.
+ \param relres On input the tolerance error, on output an estimation of the relative error.
+ \param S On input Number of the dimension of the shadow space.
+ \param smoothing switches residual smoothing on.
+ \param angle small omega lead to faster convergence at the expense of numerical stability
+ \param replacement switches on a residual replacement strategy to increase accuracy of residual at the expense of more Mat*vec products
+ \return false in the case of numerical issue, for example a break down of IDRS.
+ */
+ template<typename Vector, typename RealScalar>
+ typename Vector::Scalar omega(const Vector& t, const Vector& s, RealScalar angle)
+ {
+ using numext::abs;
+ typedef typename Vector::Scalar Scalar;
+ const RealScalar ns = s.norm();
+ const RealScalar nt = t.norm();
+ const Scalar ts = t.dot(s);
+ const RealScalar rho = abs(ts / (nt * ns));
+
+ if (rho < angle) {
+ if (ts == Scalar(0)) {
+ return Scalar(0);
+ }
+ // Original relation for om is given by
+ // om = om * angle / rho;
+ // To alleviate potential (near) division by zero this can be rewritten as
+ // om = angle * (ns / nt) * (ts / abs(ts)) = angle * (ns / nt) * sgn(ts)
+ return angle * (ns / nt) * (ts / abs(ts));
+ }
+ return ts / (nt * nt);
+ }
+
+ template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+ bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond,
+ Index& iter,
+ typename Dest::RealScalar& relres, Index S, bool smoothing, typename Dest::RealScalar angle, bool replacement)
+ {
+ typedef typename Dest::RealScalar RealScalar;
+ typedef typename Dest::Scalar Scalar;
+ typedef Matrix<Scalar, Dynamic, 1> VectorType;
+ typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
+ const Index N = b.size();
+ S = S < x.rows() ? S : x.rows();
+ const RealScalar tol = relres;
+ const Index maxit = iter;
+
+ Index replacements = 0;
+ bool trueres = false;
+
+ FullPivLU<DenseMatrixType> lu_solver;
+
+ DenseMatrixType P;
+ {
+ HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
+ P = (qr.householderQ() * DenseMatrixType::Identity(N, S));
+ }
+
+ const RealScalar normb = b.norm();
+
+ if (internal::isApprox(normb, RealScalar(0)))
+ {
+ //Solution is the zero vector
+ x.setZero();
+ iter = 0;
+ relres = 0;
+ return true;
+ }
+ // from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf
+ // A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon).
+ // With epsilon the
+ // relative machine precision. The factor tol/epsilon corresponds to the size of a
+ // finite precision number that is so large that the absolute round-off error in
+ // this number, when propagated through the process, makes it impossible to
+ // achieve the required accuracy.The factor C accounts for the accumulation of
+ // round-off errors. This parameter has beenset to 10−3.
+ // mp is epsilon/C
+ // 10^3 * eps is very conservative, so normally no residual replacements will take place.
+ // It only happens if things go very wrong. Too many restarts may ruin the convergence.
+ const RealScalar mp = RealScalar(1e3) * NumTraits<Scalar>::epsilon();
+
+
+
+ //Compute initial residual
+ const RealScalar tolb = tol * normb; //Relative tolerance
+ VectorType r = b - A * x;
+
+ VectorType x_s, r_s;
+
+ if (smoothing)
+ {
+ x_s = x;
+ r_s = r;
+ }
+
+ RealScalar normr = r.norm();
+
+ if (normr <= tolb)
+ {
+ //Initial guess is a good enough solution
+ iter = 0;
+ relres = normr / normb;
+ return true;
+ }
+
+ DenseMatrixType G = DenseMatrixType::Zero(N, S);
+ DenseMatrixType U = DenseMatrixType::Zero(N, S);
+ DenseMatrixType M = DenseMatrixType::Identity(S, S);
+ VectorType t(N), v(N);
+ Scalar om = 1.;
+
+ //Main iteration loop, guild G-spaces:
+ iter = 0;
+
+ while (normr > tolb && iter < maxit)
+ {
+ //New right hand size for small system:
+ VectorType f = (r.adjoint() * P).adjoint();
+
+ for (Index k = 0; k < S; ++k)
+ {
+ //Solve small system and make v orthogonal to P:
+ //c = M(k:s,k:s)\f(k:s);
+ lu_solver.compute(M.block(k , k , S -k, S - k ));
+ VectorType c = lu_solver.solve(f.segment(k , S - k ));
+ //v = r - G(:,k:s)*c;
+ v = r - G.rightCols(S - k ) * c;
+ //Preconditioning
+ v = precond.solve(v);
+
+ //Compute new U(:,k) and G(:,k), G(:,k) is in space G_j
+ U.col(k) = U.rightCols(S - k ) * c + om * v;
+ G.col(k) = A * U.col(k );
+
+ //Bi-Orthogonalise the new basis vectors:
+ for (Index i = 0; i < k-1 ; ++i)
+ {
+ //alpha = ( P(:,i)'*G(:,k) )/M(i,i);
+ Scalar alpha = P.col(i ).dot(G.col(k )) / M(i, i );
+ G.col(k ) = G.col(k ) - alpha * G.col(i );
+ U.col(k ) = U.col(k ) - alpha * U.col(i );
+ }
+
+ //New column of M = P'*G (first k-1 entries are zero)
+ //M(k:s,k) = (G(:,k)'*P(:,k:s))';
+ M.block(k , k , S - k , 1) = (G.col(k ).adjoint() * P.rightCols(S - k )).adjoint();
+
+ if (internal::isApprox(M(k,k), Scalar(0)))
+ {
+ return false;
+ }
+
+ //Make r orthogonal to q_i, i = 0..k-1
+ Scalar beta = f(k ) / M(k , k );
+ r = r - beta * G.col(k );
+ x = x + beta * U.col(k );
+ normr = r.norm();
+
+ if (replacement && normr > tolb / mp)
+ {
+ trueres = true;
+ }
+
+ //Smoothing:
+ if (smoothing)
+ {
+ t = r_s - r;
+ //gamma is a Scalar, but the conversion is not allowed
+ Scalar gamma = t.dot(r_s) / t.norm();
+ r_s = r_s - gamma * t;
+ x_s = x_s - gamma * (x_s - x);
+ normr = r_s.norm();
+ }
+
+ if (normr < tolb || iter == maxit)
+ {
+ break;
+ }
+
+ //New f = P'*r (first k components are zero)
+ if (k < S-1)
+ {
+ f.segment(k + 1, S - (k + 1) ) = f.segment(k + 1 , S - (k + 1)) - beta * M.block(k + 1 , k , S - (k + 1), 1);
+ }
+ }//end for
+
+ if (normr < tolb || iter == maxit)
+ {
+ break;
+ }
+
+ //Now we have sufficient vectors in G_j to compute residual in G_j+1
+ //Note: r is already perpendicular to P so v = r
+ //Preconditioning
+ v = r;
+ v = precond.solve(v);
+
+ //Matrix-vector multiplication:
+ t = A * v;
+
+ //Computation of a new omega
+ om = internal::omega(t, r, angle);
+
+ if (om == RealScalar(0.0))
+ {
+ return false;
+ }
+
+ r = r - om * t;
+ x = x + om * v;
+ normr = r.norm();
+
+ if (replacement && normr > tolb / mp)
+ {
+ trueres = true;
+ }
+
+ //Residual replacement?
+ if (trueres && normr < normb)
+ {
+ r = b - A * x;
+ trueres = false;
+ replacements++;
+ }
+
+ //Smoothing:
+ if (smoothing)
+ {
+ t = r_s - r;
+ Scalar gamma = t.dot(r_s) /t.norm();
+ r_s = r_s - gamma * t;
+ x_s = x_s - gamma * (x_s - x);
+ normr = r_s.norm();
+ }
+
+ iter++;
+
+ }//end while
+
+ if (smoothing)
+ {
+ x = x_s;
+ }
+ relres=normr/normb;
+ return true;
+ }
+
+ } // namespace internal
+
+ template <typename _MatrixType, typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+ class IDRS;
+
+ namespace internal
+ {
+
+ template <typename _MatrixType, typename _Preconditioner>
+ struct traits<Eigen::IDRS<_MatrixType, _Preconditioner> >
+ {
+ typedef _MatrixType MatrixType;
+ typedef _Preconditioner Preconditioner;
+ };
+
+ } // namespace internal
+
+
+/** \ingroup IterativeLinearSolvers_Module
+ * \brief The Induced Dimension Reduction method (IDR(s)) is a short-recurrences Krylov method for sparse square problems.
+ *
+ * This class allows to solve for A.x = b sparse linear problems. The vectors x and b can be either dense or sparse.
+ * he Induced Dimension Reduction method, IDR(), is a robust and efficient short-recurrence Krylov subspace method for
+ * solving large nonsymmetric systems of linear equations.
+ *
+ * For indefinite systems IDR(S) outperforms both BiCGStab and BiCGStab(L). Additionally, IDR(S) can handle matrices
+ * with complex eigenvalues more efficiently than BiCGStab.
+ *
+ * Many problems that do not converge for BiCGSTAB converge for IDR(s) (for larger values of s). And if both methods
+ * converge the convergence for IDR(s) is typically much faster for difficult systems (for example indefinite problems).
+ *
+ * IDR(s) is a limited memory finite termination method. In exact arithmetic it converges in at most N+N/s iterations,
+ * with N the system size. It uses a fixed number of 4+3s vector. In comparison, BiCGSTAB terminates in 2N iterations
+ * and uses 7 vectors. GMRES terminates in at most N iterations, and uses I+3 vectors, with I the number of iterations.
+ * Restarting GMRES limits the memory consumption, but destroys the finite termination property.
+ *
+ * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
+ * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+ *
+ * \implsparsesolverconcept
+ *
+ * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
+ * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
+ * and NumTraits<Scalar>::epsilon() for the tolerance.
+ *
+ * The tolerance corresponds to the relative residual error: |Ax-b|/|b|
+ *
+ * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
+ * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
+ * See \ref TopicMultiThreading for details.
+ *
+ * By default the iterations start with x=0 as an initial guess of the solution.
+ * One can control the start using the solveWithGuess() method.
+ *
+ * IDR(s) can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+ *
+ * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ */
+ template <typename _MatrixType, typename _Preconditioner>
+ class IDRS : public IterativeSolverBase<IDRS<_MatrixType, _Preconditioner> >
+ {
+
+ public:
+ typedef _MatrixType MatrixType;
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef _Preconditioner Preconditioner;
+
+ private:
+ typedef IterativeSolverBase<IDRS> Base;
+ using Base::m_error;
+ using Base::m_info;
+ using Base::m_isInitialized;
+ using Base::m_iterations;
+ using Base::matrix;
+ Index m_S;
+ bool m_smoothing;
+ RealScalar m_angle;
+ bool m_residual;
+
+ public:
+ /** Default constructor. */
+ IDRS(): m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {}
+
+ /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+
+ This constructor is a shortcut for the default constructor followed
+ by a call to compute().
+
+ \warning this class stores a reference to the matrix A as well as some
+ precomputed values that depend on it. Therefore, if \a A is changed
+ this class becomes invalid. Call compute() to update it with the new
+ matrix A, or modify a copy of A.
+ */
+ template <typename MatrixDerived>
+ explicit IDRS(const EigenBase<MatrixDerived>& A) : Base(A.derived()), m_S(4), m_smoothing(false),
+ m_angle(RealScalar(0.7)), m_residual(false) {}
+
+
+ /** \internal */
+ /** Loops over the number of columns of b and does the following:
+ 1. sets the tolerence and maxIterations
+ 2. Calls the function that has the core solver routine
+ */
+ template <typename Rhs, typename Dest>
+ void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
+ {
+ m_iterations = Base::maxIterations();
+ m_error = Base::m_tolerance;
+
+ bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S,m_smoothing,m_angle,m_residual);
+
+ m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence;
+ }
+
+ /** Sets the parameter S, indicating the dimension of the shadow space. Default is 4*/
+ void setS(Index S)
+ {
+ if (S < 1)
+ {
+ S = 4;
+ }
+
+ m_S = S;
+ }
+
+ /** Switches off and on smoothing.
+ Residual smoothing results in monotonically decreasing residual norms at
+ the expense of two extra vectors of storage and a few extra vector
+ operations. Although monotonic decrease of the residual norms is a
+ desirable property, the rate of convergence of the unsmoothed process and
+ the smoothed process is basically the same. Default is off */
+ void setSmoothing(bool smoothing)
+ {
+ m_smoothing=smoothing;
+ }
+
+ /** The angle must be a real scalar. In IDR(s), a value for the
+ iteration parameter omega must be chosen in every s+1th step. The most
+ natural choice is to select a value to minimize the norm of the next residual.
+ This corresponds to the parameter omega = 0. In practice, this may lead to
+ values of omega that are so small that the other iteration parameters
+ cannot be computed with sufficient accuracy. In such cases it is better to
+ increase the value of omega sufficiently such that a compromise is reached
+ between accurate computations and reduction of the residual norm. The
+ parameter angle =0.7 (”maintaining the convergence strategy”)
+ results in such a compromise. */
+ void setAngle(RealScalar angle)
+ {
+ m_angle=angle;
+ }
+
+ /** The parameter replace is a logical that determines whether a
+ residual replacement strategy is employed to increase the accuracy of the
+ solution. */
+ void setResidualUpdate(bool update)
+ {
+ m_residual=update;
+ }
+
+ };
+
+} // namespace Eigen
+
+#endif /* EIGEN_IDRS_H */
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index 582fa85..6a9b0be 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -235,10 +235,10 @@
MaxRowsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
MaxColsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret,
- EvalToRowMajor = (LhsFlags & RhsFlags & RowMajorBit),
+ EvalToRowMajor = (int(LhsFlags) & int(RhsFlags) & RowMajorBit),
RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
- Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & RemovedBits)
| EvalBeforeNestingBit,
CoeffReadCost = HugeCost
};
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index cfc13af..f1c260e 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -241,7 +241,7 @@
Scalar p, q, nz, s, w, y;
bool negative = false;
- const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
const Scalar m_pi = Scalar(EIGEN_PI);
const Scalar zero = Scalar(0);
@@ -254,7 +254,7 @@
q = x;
p = numext::floor(q);
if (p == q) {
- return maxnum;
+ return nan;
}
/* Remove the zeros of tan(m_pi x)
* by subtracting the nearest integer from x
@@ -1403,7 +1403,12 @@
{
if(q == numext::floor(q))
{
- return maxnum;
+ if (x == numext::floor(x) && long(x) % 2 == 0) {
+ return maxnum;
+ }
+ else {
+ return nan;
+ }
}
p = x;
r = numext::floor(p);
@@ -1479,11 +1484,11 @@
Scalar nplus = n + one;
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
- // Check that n is an integer
- if (numext::floor(n) != n) {
+ // Check that n is a non-negative integer
+ if (numext::floor(n) != n || n < zero) {
return nan;
}
- // Just return the digamma function for n = 1
+ // Just return the digamma function for n = 0
else if (n == zero) {
return digamma_impl<Scalar>::run(x);
}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 6bbcf50..1819193 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -13,7 +13,7 @@
find_package (Threads)
find_package(GoogleHash)
-if(GOOGLEHASH_FOUND)
+if(GoogleHash_FOUND)
add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
include_directories(${GOOGLEHASH_INCLUDES})
ei_add_property(EIGEN_TESTED_BACKENDS "GoogleHash, ")
@@ -23,7 +23,7 @@
find_package(Adolc)
-if(ADOLC_FOUND)
+if(Adolc_FOUND)
include_directories(${ADOLC_INCLUDES})
ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
if(EIGEN_TEST_CXX11)
@@ -104,6 +104,7 @@
ei_add_test(gmres)
ei_add_test(dgmres)
ei_add_test(minres)
+ei_add_test(idrs)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
ei_add_test(bessel_functions)
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index e8c42a4..ed5d5ad 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -479,6 +479,66 @@
}
}
+template<typename T, int DataLayout>
+static void test_empty_slice()
+{
+ Tensor<T, 3, DataLayout> tensor(2,3,5);
+ tensor.setRandom();
+ Tensor<T, 3, DataLayout> copy = tensor;
+
+ // empty size in first dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices1(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes1(0,1,2);
+ Tensor<T, 3, DataLayout> slice1(0,1,2);
+ slice1.setRandom();
+ tensor.slice(indices1, sizes1) = slice1;
+
+ // empty size in second dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices2(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes2(1,0,2);
+ Tensor<T, 3, DataLayout> slice2(1,0,2);
+ slice2.setRandom();
+ tensor.slice(indices2, sizes2) = slice2;
+
+ // empty size in third dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices3(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes3(1,1,0);
+ Tensor<T, 3, DataLayout> slice3(1,1,0);
+ slice3.setRandom();
+ tensor.slice(indices3, sizes3) = slice3;
+
+ // empty size in first and second dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices4(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes4(0,0,2);
+ Tensor<T, 3, DataLayout> slice4(0,0,2);
+ slice4.setRandom();
+ tensor.slice(indices4, sizes4) = slice4;
+
+ // empty size in second and third dimension
+ Eigen::DSizes<ptrdiff_t, 3> indices5(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes5(1,0,0);
+ Tensor<T, 3, DataLayout> slice5(1,0,0);
+ slice5.setRandom();
+ tensor.slice(indices5, sizes5) = slice5;
+
+ // empty size in all dimensions
+ Eigen::DSizes<ptrdiff_t, 3> indices6(1,2,3);
+ Eigen::DSizes<ptrdiff_t, 3> sizes6(0,0,0);
+ Tensor<T, 3, DataLayout> slice6(0,0,0);
+ slice6.setRandom();
+ tensor.slice(indices6, sizes6) = slice6;
+
+ // none of these operations should change the tensor's components
+ // because all of the rvalue slices have at least one zero dimension
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 3; ++j) {
+ for (int k = 0; k < 5; ++k) {
+ VERIFY_IS_EQUAL(tensor(i,j,k), copy(i,j,k));
+ }
+ }
+ }
+}
+
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
diff --git a/unsupported/test/idrs.cpp b/unsupported/test/idrs.cpp
new file mode 100644
index 0000000..f88c016
--- /dev/null
+++ b/unsupported/test/idrs.cpp
@@ -0,0 +1,27 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2012 Kolja Brix <brix@igpm.rwth-aaachen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <Eigen/IterativeSolvers>
+
+template<typename T> void test_idrs_T()
+{
+ IDRS<SparseMatrix<T>, DiagonalPreconditioner<T> > idrs_colmajor_diag;
+ IDRS<SparseMatrix<T>, IncompleteLUT<T> > idrs_colmajor_ilut;
+
+ CALL_SUBTEST( check_sparse_square_solving(idrs_colmajor_diag) );
+ CALL_SUBTEST( check_sparse_square_solving(idrs_colmajor_ilut) );
+}
+
+EIGEN_DECLARE_TEST(idrs)
+{
+ CALL_SUBTEST_1(test_idrs_T<double>());
+ CALL_SUBTEST_2(test_idrs_T<std::complex<double> >());
+}
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
index 56848da..589bb76 100644
--- a/unsupported/test/special_functions.cpp
+++ b/unsupported/test/special_functions.cpp
@@ -191,10 +191,10 @@
// Check the zeta function against scipy.special.zeta
{
- ArrayType x(7), q(7), res(7), ref(7);
- x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9;
- q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345;
- ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan;
+ ArrayType x(10), q(10), res(10), ref(10);
+ x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9, 2, 3, 4;
+ q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3;
+ ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf;
CALL_SUBTEST( verify_component_wise(ref, ref); );
CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
@@ -202,9 +202,9 @@
// digamma
{
- ArrayType x(7), res(7), ref(7);
- x << 1, 1.5, 4, -10.5, 10000.5, 0, -1;
- ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf;
+ ArrayType x(9), res(9), ref(9);
+ x << 1, 1.5, 4, -10.5, 10000.5, 0, -1, -2, -3;
+ ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, nan, nan, nan, nan;
CALL_SUBTEST( verify_component_wise(ref, ref); );
CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); );
@@ -213,10 +213,10 @@
#if EIGEN_HAS_C99_MATH
{
- ArrayType n(11), x(11), res(11), ref(11);
- n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170;
- x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64;
- ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927;
+ ArrayType n(16), x(16), res(16), ref(16);
+ n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170, -1, 0, 1, 2, 3;
+ x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64, -1, -2, -3, -4, -5;
+ ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927, nan, nan, plusinf, nan, plusinf;
CALL_SUBTEST( verify_component_wise(ref, ref); );
if(sizeof(RealScalar)>=8) { // double
diff --git a/unsupported/test/special_packetmath.cpp b/unsupported/test/special_packetmath.cpp
index 4f426eb..31233f1 100644
--- a/unsupported/test/special_packetmath.cpp
+++ b/unsupported/test/special_packetmath.cpp
@@ -114,7 +114,7 @@
Scalar(std::pow(Scalar(10), internal::random<Scalar>(Scalar(-1),Scalar(2))));
}
-#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
+#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);