Update Eigen to commit:3564668908afc66351c1c3cc47dca2fcdb91dc12
CHANGELOG
=========
356466890 - Fix overalign check.
f3929ac7e - Fix EIGEN_HAS_CXX17_OVERALIGN for icc
1b33a6374 - Fixes git add . doesn'\''t include scripts/buildtests.in
a8bab0d8a - Patch SparseLU
910f6f65d - Adjust thresholds for bfloat16 product tests that are currently failing.
311cc0f9c - Enable NEON pcmp, plset, and complex psqrt
dbf7ae6f9 - Fix up C++ version detection macros and cmake tests.
bb6675caf - Fix incorrect NEON native fp16 multiplication.
dd85d2694 - Revert "Avoid mixing types in CompressedStorage.h"
c4fb6af24 - Enable NEON pabs for unsigned int types
400bc5cd5 - Add sparse_basic_1 to smoke tests.
04e4f0bb2 - Add missing colon in SparseMatrix.h.
3d8a8def8 - Avoid mixing types in CompressedStorage.h
4bb244679 - Add operators to CompressedStorageIterator
e1aee4ab3 - Update test of numext::signbit.
3717854a2 - Use numext::signbit instead of std::signbit, which is not defined for bfloat16.
37de43290 - Avoid using std::raise() for divide by zero
62de593c4 - Allow std::initializer_list constructors in constexpr expressions
6d3e3678b - optimize equalspace packetop
200483194 - add EqualSpaced / setEqualSpaced
273f80384 - Add BDCSVD_LAPACKE binding
03c9b4738 - Enable direct access for NestByValue.
b59f18b4f - Increase L2 and L3 cache size for Power10.
c614b2bbd - Fix index type for sparse index sorting.
44fe53915 - add sparse sort inner vectors function
d19416714 - Fix the bug using neon instruction fmla for data type half
31ab62d34 - Add support for Power10 (AltiVec) MMA instructions for bfloat16.
dcb042a87 - Fix serialization for non-compressed matrices.
2260e11eb - Fix reshape strides when input has non-zero inner stride.
23524ab6f - Changing BiCGSTAB parameters initialization so that it works with custom types
ab2b26fbc - Fix sparseLU solver when destination has a non-unit stride.
551eebc8c - Add synchronize method to all devices.
b7551bff9 - Fix a bunch of annoying compiler warnings in tests
e7b1ad031 - Add serialization for sparse matrix and sparse vector.
044f3f623 - Fix bug in handmade_aligned_realloc
672868393 - Small cleanup of IDRS.h
02805bd56 - Fix AVX2 psignbit
399ce1ed6 - Fix duplicate execution code for Power 8 Altivec in pstore_partial.
6431dfdb5 - Cross product for vectors of size 2. Fixes #1037
8588d8c74 - Correct pnegate for floating-point zero.
5eacb9e11 - Put brackets around unsigned type names.
37e40dca8 - Fix ambiguity in PPC for vec_splats call.
7dc6db75d - Fix typo in CholmodSupport
9b6d624ea - fix neon
7e398e943 - Add missing return keyword in psignbit for NEON.
82b152dbe - Add signbit function
8f8e36458 - Remove recently added sparse assert in SparseMapBase.
01a31b81b - Remove unused parameter name.
c5b896c5a - Allow empty matrices to be resized.
886aad136 - Disable patan for double on PPC.
ab407b2b6 - Fix handmade_aligned_malloc offset computation.
adb30efb2 - Add assert for invalid outerIndexPtr array in SparseMapBase.
c27d1abe4 - Fix pragma check for disabling fastmath.
a22637137 - Change handmade_aligned_malloc/realloc/free to store a 1 byte offset instead of absolute address
bf48d4633 - Explicitly state that indices must be sorted.
PiperOrigin-RevId: 500278312
Change-Id: I5ba2533555adb8e254879b45d370bb8fccb74c8f
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport
index bed8924..1037bd5 100644
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -22,7 +22,7 @@
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* It provides the two following main factorization classes:
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
- * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
+ * - class CholmodDecomposition: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
*
* For the sake of completeness, this module also propose the two following classes:
* - class CholmodSimplicialLLT
diff --git a/Eigen/Core b/Eigen/Core
index 48c2121..623d735 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -177,10 +177,6 @@
#include "src/Core/arch/Default/TypeCasting.h"
#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
-#ifndef EIGEN_GPU_COMPILE_PHASE
- #include <csignal>
-#endif
-
#if defined EIGEN_VECTORIZE_AVX512
#if defined EIGEN_VECTORIZE_AVX512FP16
#include "src/Core/arch/AVX512/PacketMathFP16.h"
diff --git a/Eigen/SVD b/Eigen/SVD
index 3451794..8241c73 100644
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -36,14 +36,17 @@
#include "src/SVD/SVDBase.h"
#include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h"
-#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
+#ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
+#ifndef EIGEN_USE_LAPACKE_STRICT
#include "src/SVD/JacobiSVD_LAPACKE.h"
#endif
+#include "src/SVD/BDCSVD_LAPACKE.h"
+#endif
#include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/SparseLU b/Eigen/SparseLU
index 37c4a5c..047cf0d 100644
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -25,8 +25,6 @@
#include "src/Core/util/DisableStupidWarnings.h"
-#include "src/SparseLU/SparseLU_gemm_kernel.h"
-
#include "src/SparseLU/SparseLU_Structs.h"
#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
#include "src/SparseLU/SparseLUImpl.h"
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 7be8971..d7a5e7a 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -193,8 +193,9 @@
*
* \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
*/
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Array(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(
+ const std::initializer_list<std::initializer_list<Scalar>>& list)
+ : Base(list) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T>
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index a62f54d..b33c052 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -306,6 +306,20 @@
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar>(low,high,Derived::SizeAtCompileTime));
}
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
+DenseBase<Derived>::EqualSpaced(Index size, const Scalar& low, const Scalar& step) {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::equalspaced_op<Scalar>(low, step));
+}
+
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessEqualSpacedReturnType
+DenseBase<Derived>::EqualSpaced(const Scalar& low, const Scalar& step) {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::equalspaced_op<Scalar>(low, step));
+}
+
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
template<typename Derived>
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
@@ -455,6 +469,19 @@
return setLinSpaced(size(), low, high);
}
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(Index newSize, const Scalar& low,
+ const Scalar& step) {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return derived() = Derived::NullaryExpr(newSize, internal::equalspaced_op<Scalar>(low, step));
+}
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setEqualSpaced(const Scalar& low,
+ const Scalar& step) {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return setEqualSpaced(size(), low, step);
+}
+
// zero:
/** \returns an expression of a zero matrix.
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 6e17779..bcfd0f6 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -258,6 +258,8 @@
EIGEN_DEPRECATED typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<internal::linspaced_op<Scalar>,PlainObject> RandomAccessLinSpacedReturnType;
+ /** \internal Represents a vector with equally spaced coefficients that allows random access. */
+ typedef CwiseNullaryOp<internal::equalspaced_op<Scalar>, PlainObject> RandomAccessEqualSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
@@ -336,6 +338,11 @@
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
LinSpaced(const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
+ EqualSpaced(Index size, const Scalar& low, const Scalar& step);
+ EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType
+ EqualSpaced(const Scalar& low, const Scalar& step);
+
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
@@ -357,6 +364,8 @@
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setEqualSpaced(Index size, const Scalar& low, const Scalar& step);
+ EIGEN_DEVICE_FUNC Derived& setEqualSpaced(const Scalar& low, const Scalar& step);
EIGEN_DEVICE_FUNC Derived& setZero();
EIGEN_DEVICE_FUNC Derived& setOnes();
EIGEN_DEVICE_FUNC Derived& setRandom();
@@ -661,8 +670,7 @@
protected:
EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
/** Default constructor. Do nothing. */
- EIGEN_DEVICE_FUNC DenseBase()
- {
+ EIGEN_DEVICE_FUNC constexpr DenseBase() {
/* Just checks for self-consistency of the flags.
* Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
*/
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 5e2763e..cf588bd 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -26,14 +26,12 @@
struct constructor_without_unaligned_array_assert {};
-template<typename T, int Size>
-EIGEN_DEVICE_FUNC
-void check_static_allocation_size()
-{
- // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
- #if EIGEN_STACK_ALLOCATION_LIMIT
+template <typename T, int Size>
+EIGEN_DEVICE_FUNC constexpr void check_static_allocation_size() {
+// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
+#if EIGEN_STACK_ALLOCATION_LIMIT
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
- #endif
+#endif
}
/** \internal
@@ -47,16 +45,10 @@
{
T array[Size];
- EIGEN_DEVICE_FUNC
- plain_array()
- {
- check_static_allocation_size<T,Size>();
- }
+ EIGEN_DEVICE_FUNC constexpr plain_array() { check_static_allocation_size<T, Size>(); }
- EIGEN_DEVICE_FUNC
- plain_array(constructor_without_unaligned_array_assert)
- {
- check_static_allocation_size<T,Size>();
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+ check_static_allocation_size<T, Size>();
}
};
@@ -87,17 +79,13 @@
{
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
- EIGEN_DEVICE_FUNC
- plain_array()
- {
+ EIGEN_DEVICE_FUNC constexpr plain_array() {
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
check_static_allocation_size<T,Size>();
}
- EIGEN_DEVICE_FUNC
- plain_array(constructor_without_unaligned_array_assert)
- {
- check_static_allocation_size<T,Size>();
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+ check_static_allocation_size<T, Size>();
}
};
@@ -106,17 +94,13 @@
{
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
- EIGEN_DEVICE_FUNC
- plain_array()
- {
+ EIGEN_DEVICE_FUNC constexpr plain_array() {
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
check_static_allocation_size<T,Size>();
}
- EIGEN_DEVICE_FUNC
- plain_array(constructor_without_unaligned_array_assert)
- {
- check_static_allocation_size<T,Size>();
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+ check_static_allocation_size<T, Size>();
}
};
@@ -125,17 +109,13 @@
{
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
- EIGEN_DEVICE_FUNC
- plain_array()
- {
+ EIGEN_DEVICE_FUNC constexpr plain_array() {
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
check_static_allocation_size<T,Size>();
}
- EIGEN_DEVICE_FUNC
- plain_array(constructor_without_unaligned_array_assert)
- {
- check_static_allocation_size<T,Size>();
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+ check_static_allocation_size<T, Size>();
}
};
@@ -144,17 +124,13 @@
{
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
- EIGEN_DEVICE_FUNC
- plain_array()
- {
+ EIGEN_DEVICE_FUNC constexpr plain_array() {
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
check_static_allocation_size<T,Size>();
}
- EIGEN_DEVICE_FUNC
- plain_array(constructor_without_unaligned_array_assert)
- {
- check_static_allocation_size<T,Size>();
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {
+ check_static_allocation_size<T, Size>();
}
};
@@ -162,8 +138,8 @@
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
T array[1];
- EIGEN_DEVICE_FUNC plain_array() {}
- EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC constexpr plain_array() {}
+ EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {}
};
struct plain_array_helper {
@@ -211,26 +187,25 @@
{
internal::plain_array<T,Size,Options_> m_data;
public:
- EIGEN_DEVICE_FUNC DenseStorage() {
+ constexpr EIGEN_DEVICE_FUNC DenseStorage() {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
}
- EIGEN_DEVICE_FUNC
- explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()) {}
#if defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN)
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC constexpr
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
}
#else
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) = default;
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default;
#endif
- EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) = default;
- EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&&) = default;
- EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&&) = default;
- EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+ EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default;
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default;
+ EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default;
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
- eigen_internal_assert(size==rows*cols && rows==Rows_ && cols==Cols_);
+ eigen_internal_assert(size == rows * cols && rows == Rows_ && cols == Cols_);
EIGEN_UNUSED_VARIABLE(size);
EIGEN_UNUSED_VARIABLE(rows);
EIGEN_UNUSED_VARIABLE(cols);
@@ -238,57 +213,148 @@
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
numext::swap(m_data, other.m_data);
}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
- EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
- EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
- EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
- EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+ EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
+ EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; }
+ EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {}
+ EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {}
+ EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
};
// null matrix
-template<typename T, int Rows_, int Cols_, int Options_> class DenseStorage<T, 0, Rows_, Cols_, Options_>
+template<typename T, int Rows_, int Cols_, int Options_>
+class DenseStorage<T, 0, Rows_, Cols_, Options_>
{
public:
- EIGEN_DEVICE_FUNC DenseStorage() {}
- EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
- EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
- EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
- EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
- EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
- EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
- EIGEN_DEVICE_FUNC const T *data() const { return 0; }
- EIGEN_DEVICE_FUNC T *data() { return 0; }
+ static_assert(Rows_ * Cols_ == 0, "The fixed number of rows times columns must equal the storage size.");
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() {}
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) { return *this; }
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage& ) {}
+ EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
+ EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
+ EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC constexpr void resize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC constexpr const T *data() const { return 0; }
+ EIGEN_DEVICE_FUNC constexpr T *data() { return 0; }
};
// more specializations for null matrices; these are necessary to resolve ambiguities
-template<typename T, int Options_> class DenseStorage<T, 0, Dynamic, Dynamic, Options_>
-: public DenseStorage<T, 0, 0, 0, Options_> { };
+template<typename T, int Options_>
+class DenseStorage<T, 0, Dynamic, Dynamic, Options_> {
+ Index m_rows;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+ m_rows = other.m_rows;
+ m_cols = other.m_cols;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {
+ eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_rows,other.m_rows);
+ numext::swap(m_cols,other.m_cols);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT {return m_rows;}
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) {
+ m_rows = rows;
+ m_cols = cols;
+ eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) {
+ m_rows = rows;
+ m_cols = cols;
+ eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+ EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
-template<typename T, int Rows_, int Options_> class DenseStorage<T, 0, Rows_, Dynamic, Options_>
-: public DenseStorage<T, 0, 0, 0, Options_> { };
+template<typename T, int Rows_, int Options_>
+class DenseStorage<T, 0, Rows_, Dynamic, Options_> {
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+ m_cols = other.m_cols;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {
+ eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_cols, other.m_cols);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) {
+ m_cols = cols;
+ eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) {
+ m_cols = cols;
+ eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+ EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
-template<typename T, int Cols_, int Options_> class DenseStorage<T, 0, Dynamic, Cols_, Options_>
-: public DenseStorage<T, 0, 0, 0, Options_> { };
+template<typename T, int Cols_, int Options_>
+class DenseStorage<T, 0, Dynamic, Cols_, Options_> {
+ Index m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) {
+ m_rows = other.m_rows;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {
+ eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
+ numext::swap(m_rows, other.m_rows);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
+ EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT {return Cols_;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) {
+ m_rows = rows;
+ eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) {
+ m_rows = rows;
+ eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size.");
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return nullptr; }
+ EIGEN_DEVICE_FUNC T *data() { return nullptr; }
+};
// dynamic-size matrix with fixed-size storage
-template<typename T, int Size, int Options_> class DenseStorage<T, Size, Dynamic, Dynamic, Options_>
+template<typename T, int Size, int Options_>
+class DenseStorage<T, Size, Dynamic, Dynamic, Options_>
{
internal::plain_array<T,Size,Options_> m_data;
Index m_rows;
Index m_cols;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)
- {
- internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
- }
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols) {
+ internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
+ }
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
@@ -299,36 +365,42 @@
}
return *this;
}
- EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
{
internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);
numext::swap(m_rows,other.m_rows);
numext::swap(m_cols,other.m_cols);
}
- EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
- EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
- EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
- EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
- EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
- EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
+ EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
+ EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index cols) {
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index cols) {
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed width
-template<typename T, int Size, int Cols_, int Options_> class DenseStorage<T, Size, Dynamic, Cols_, Options_>
+template<typename T, int Size, int Cols_, int Options_>
+class DenseStorage<T, Size, Dynamic, Cols_, Options_>
{
internal::plain_array<T,Size,Options_> m_data;
Index m_rows;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
- EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
- {
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_rows(0) {}
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows) {
internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data);
- }
-
+ }
+
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
@@ -338,34 +410,34 @@
}
return *this;
}
- EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
{
internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_);
numext::swap(m_rows, other.m_rows);
}
- EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return Cols_;}
- EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
- EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
- EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
- EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return m_rows; }
+ EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return Cols_; }
+ EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
};
// dynamic-size matrix with fixed-size storage and fixed height
-template<typename T, int Size, int Rows_, int Options_> class DenseStorage<T, Size, Rows_, Dynamic, Options_>
+template<typename T, int Size, int Rows_, int Options_>
+class DenseStorage<T, Size, Rows_, Dynamic, Options_>
{
internal::plain_array<T,Size,Options_> m_data;
Index m_cols;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
- EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
- : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)
- {
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols) {
internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data);
- }
+ }
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
@@ -380,30 +452,32 @@
internal::plain_array_helper::swap(m_data, Rows_ * m_cols, other.m_data, Rows_ * other.m_cols);
numext::swap(m_cols, other.m_cols);
}
- EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return Rows_;}
- EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
- EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
- EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { m_cols = cols; }
- EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
- EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return Rows_; }
+ EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return m_cols; }
+ EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
};
// purely dynamic matrix.
-template<typename T, int Options_> class DenseStorage<T, Dynamic, Dynamic, Dynamic, Options_>
+template<typename T, int Options_>
+class DenseStorage<T, Dynamic, Dynamic, Dynamic, Options_>
{
T *m_data;
Index m_rows;
Index m_cols;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(0), m_rows(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
- : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
- {
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+ : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)),
+ m_rows(rows),
+ m_cols(cols) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
- }
+ }
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*other.m_cols))
, m_rows(other.m_rows)
@@ -473,19 +547,19 @@
};
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
-template<typename T, int Rows_, int Options_> class DenseStorage<T, Dynamic, Rows_, Dynamic, Options_>
-{
+template<typename T, int Rows_, int Options_>
+class DenseStorage<T, Dynamic, Rows_, Dynamic, Options_> {
T *m_data;
Index m_cols;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
- explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_cols(cols)
- {
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_cols(0) {}
+ explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+ : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_cols(cols) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
eigen_internal_assert(size==rows*cols && rows==Rows_ && cols >=0);
EIGEN_UNUSED_VARIABLE(rows);
- }
+ }
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(Rows_*other.m_cols))
, m_cols(other.m_cols)
@@ -522,7 +596,7 @@
numext::swap(m_data,other.m_data);
numext::swap(m_cols,other.m_cols);
}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT {return Rows_;}
+ EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; }
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
{
@@ -547,19 +621,20 @@
};
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
-template<typename T, int Cols_, int Options_> class DenseStorage<T, Dynamic, Dynamic, Cols_, Options_>
+template<typename T, int Cols_, int Options_>
+class DenseStorage<T, Dynamic, Dynamic, Cols_, Options_>
{
T *m_data;
Index m_rows;
public:
- EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
- explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_rows(rows)
- {
+ EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0) {}
+ explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+ EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols)
+ : m_data(internal::conditional_aligned_new_auto<T, (Options_ & DontAlign) == 0>(size)), m_rows(rows) {
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
eigen_internal_assert(size==rows*cols && rows>=0 && cols == Cols_);
EIGEN_UNUSED_VARIABLE(cols);
- }
+ }
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
: m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(other.m_rows*Cols_))
, m_rows(other.m_rows)
@@ -597,7 +672,7 @@
numext::swap(m_rows,other.m_rows);
}
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
- EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) {return Cols_;}
+ EIGEN_DEVICE_FUNC static constexpr Index cols(void) { return Cols_; }
void conservativeResize(Index size, Index rows, Index)
{
m_data = internal::conditional_aligned_realloc_new_auto<T,(Options_&DontAlign)==0>(m_data, size, m_rows*Cols_);
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index b67c4ed..af773dd 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -563,13 +563,13 @@
parg(const Packet& a) { using numext::arg; return arg(a); }
-/** \internal \returns \a a logically shifted by N bits to the right */
+/** \internal \returns \a a arithmetically shifted by N bits to the right */
template<int N> EIGEN_DEVICE_FUNC inline int
parithmetic_shift_right(const int& a) { return a >> N; }
template<int N> EIGEN_DEVICE_FUNC inline long int
parithmetic_shift_right(const long int& a) { return a >> N; }
-/** \internal \returns \a a arithmetically shifted by N bits to the right */
+/** \internal \returns \a a logically shifted by N bits to the right */
template<int N> EIGEN_DEVICE_FUNC inline int
plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
template<int N> EIGEN_DEVICE_FUNC inline long int
@@ -1191,6 +1191,34 @@
return preciprocal<Packet>(psqrt(a));
}
+template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
+ bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+ struct psignbit_impl;
+template <typename Packet, bool IsInteger>
+struct psignbit_impl<Packet, true, IsInteger> {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, false> {
+ // generic implementation if not specialized in PacketMath.h
+ // slower than arithmetic shift
+ typedef typename unpacket_traits<Packet>::type Scalar;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
+ const Packet cst_pos_one = pset1<Packet>(Scalar(1));
+ const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
+ return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
+ }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, true> {
+ // generic implementation for integer packets
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
+};
+/** \internal \returns the sign bit of \a a as a bitmask*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet
+psignbit(const Packet& a) { return psignbit_impl<Packet>::run(a); }
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 0eee333..b194353 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -1531,6 +1531,37 @@
}
#endif
+template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger, bool IsSigned = NumTraits<Scalar>::IsSigned>
+struct signbit_impl;
+template <typename Scalar>
+struct signbit_impl<Scalar, false, true> {
+ static constexpr size_t Size = sizeof(Scalar);
+ static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+ using intSize_t = typename get_integer_by_size<Size>::signed_type;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Scalar run(const Scalar& x) {
+ intSize_t a = bit_cast<intSize_t, Scalar>(x);
+ a = a >> Shift;
+ Scalar result = bit_cast<Scalar, intSize_t>(a);
+ return result;
+ }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, true> {
+ static constexpr size_t Size = sizeof(Scalar);
+ static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& x) { return x >> Shift; }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, false> {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& ) {
+ return Scalar(0);
+ }
+};
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar signbit(const Scalar& x) {
+ return signbit_impl<Scalar>::run(x);
+}
+
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T exp(const T &x) {
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 23acd8a..c7747f1 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -312,8 +312,9 @@
*
* \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
*/
- EIGEN_DEVICE_FUNC
- explicit EIGEN_STRONG_INLINE Matrix(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
+ EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE Matrix(
+ const std::initializer_list<std::initializer_list<Scalar>>& list)
+ : Base(list) {}
#ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d87c6b3..ea2178f 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -387,20 +387,9 @@
/////////// Geometry module ///////////
- #ifndef EIGEN_PARSED_BY_DOXYGEN
- /// \internal helper struct to form the return type of the cross product
- template<typename OtherDerived> struct cross_product_return_type {
- typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
- typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
- };
- #endif // EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
- inline typename cross_product_return_type<OtherDerived>::type
-#else
- inline PlainObject
-#endif
+ inline typename internal::cross_impl<Derived, OtherDerived>::return_type
cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h
index 5f1dc84..311cb5a 100644
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -43,6 +43,8 @@
public:
typedef typename internal::dense_xpr_base<NestByValue>::type Base;
+ static constexpr bool HasDirectAccess = internal::has_direct_access<ExpressionType>::ret;
+
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -54,6 +56,18 @@
EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; }
+ EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, const Scalar*>::type data() const {
+ return m_expression.data();
+ }
+
+ EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type innerStride() const {
+ return m_expression.innerStride();
+ }
+
+ EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type outerStride() const {
+ return m_expression.outerStride();
+ }
+
protected:
const ExpressionType m_expression;
};
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 4f1f992..53362ef 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -95,7 +95,7 @@
// Load src into registers first. This allows the memcpy to be elided by CUDA.
const Src staged = src;
EIGEN_USING_STD(memcpy)
- memcpy(&tgt, &staged, sizeof(Tgt));
+ memcpy(static_cast<void*>(&tgt),static_cast<const void*>(&staged), sizeof(Tgt));
return tgt;
}
} // namespace numext
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 222eaf5..60a75b1 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -29,18 +29,13 @@
namespace internal {
template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
- template<typename Index>
- EIGEN_DEVICE_FUNC
- static EIGEN_ALWAYS_INLINE void run(Index, Index)
- {
- }
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {}
};
template<> struct check_rows_cols_for_overflow<Dynamic> {
- template<typename Index>
- EIGEN_DEVICE_FUNC
- static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
- {
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) {
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed
Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
@@ -160,12 +155,10 @@
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
- {
- if(Flags & RowMajorBit)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const {
+ if (Flags & RowMajorBit)
return m_storage.data()[colId + rowId * m_storage.cols()];
- else // column-major
+ else // column-major
return m_storage.data()[rowId + colId * m_storage.rows()];
}
@@ -183,12 +176,10 @@
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
- {
- if(Flags & RowMajorBit)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) {
+ if (Flags & RowMajorBit)
return m_storage.data()[colId + rowId * m_storage.cols()];
- else // column-major
+ else // column-major
return m_storage.data()[rowId + colId * m_storage.rows()];
}
@@ -196,28 +187,20 @@
* provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
*
* See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- return m_storage.data()[index];
- }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; }
/** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
* It is provided for convenience. */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
- {
- if(Flags & RowMajorBit)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
+ if (Flags & RowMajorBit)
return m_storage.data()[colId + rowId * m_storage.cols()];
- else // column-major
+ else // column-major
return m_storage.data()[rowId + colId * m_storage.rows()];
}
/** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
* It is provided for convenience. */
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
- {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const {
return m_storage.data()[index];
}
@@ -279,9 +262,7 @@
*
* \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
*/
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
- {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index rows, Index cols) {
eigen_assert(internal::check_implication(RowsAtCompileTime!=Dynamic, rows==RowsAtCompileTime)
&& internal::check_implication(ColsAtCompileTime!=Dynamic, cols==ColsAtCompileTime)
&& internal::check_implication(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic, rows<=MaxRowsAtCompileTime)
@@ -309,12 +290,13 @@
*
* \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
*/
- EIGEN_DEVICE_FUNC
- inline void resize(Index size)
- {
- EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
- eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
- #ifdef EIGEN_INITIALIZE_COEFFS
+ EIGEN_DEVICE_FUNC inline constexpr void resize(Index size) {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+ eigen_assert(
+ ((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime == Dynamic || size <= MaxSizeAtCompileTime)) ||
+ SizeAtCompileTime == size) &&
+ size >= 0);
+#ifdef EIGEN_INITIALIZE_COEFFS
bool size_changed = size != this->size();
#endif
if(RowsAtCompileTime == 1)
@@ -334,11 +316,7 @@
*
* \sa resize(Index,Index)
*/
- EIGEN_DEVICE_FUNC
- inline void resize(NoChange_t, Index cols)
- {
- resize(rows(), cols);
- }
+ EIGEN_DEVICE_FUNC inline constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); }
/** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
* as in the example below.
@@ -348,11 +326,7 @@
*
* \sa resize(Index,Index)
*/
- EIGEN_DEVICE_FUNC
- inline void resize(Index rows, NoChange_t)
- {
- resize(rows, cols());
- }
+ EIGEN_DEVICE_FUNC inline constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); }
/** Resizes \c *this to have the same dimensions as \a other.
* Takes care of doing all the checking that's needed.
@@ -552,10 +526,9 @@
/** \brief Constructs a Matrix or Array and initializes it by elements given by an initializer list of initializer
* lists
*/
- EIGEN_DEVICE_FUNC
- explicit EIGEN_STRONG_INLINE PlainObjectBase(const std::initializer_list<std::initializer_list<Scalar>>& list)
- : m_storage()
- {
+ EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE PlainObjectBase(
+ const std::initializer_list<std::initializer_list<Scalar>>& list)
+ : m_storage() {
size_t list_size = 0;
if (list.begin() != list.end()) {
list_size = list.begin()->size();
diff --git a/Eigen/src/Core/Reshaped.h b/Eigen/src/Core/Reshaped.h
index c90e61f..81355ac 100644
--- a/Eigen/src/Core/Reshaped.h
+++ b/Eigen/src/Core/Reshaped.h
@@ -251,7 +251,7 @@
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
inline Index outerStride() const
{
- return ((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows();
+ return (((Flags&RowMajorBit)==RowMajorBit) ? this->cols() : this->rows()) * m_xpr.innerStride();
}
protected:
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index ecbb73c..0fe830a 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -229,10 +229,7 @@
Vectorizable = 1,
AlignedOnScalar = 1,
HasCmp = 1,
- size=4,
-
- // requires AVX512
- HasShift = 0,
+ size=4
};
};
#endif
@@ -360,6 +357,35 @@
EIGEN_STRONG_INLINE Packet4l plogical_shift_left(Packet4l a) {
return _mm256_slli_epi64(a, N);
}
+#ifdef EIGEN_VECTORIZE_AVX512FP16
+template <int N>
+EIGEN_STRONG_INLINE Packet4l parithmetic_shift_right(Packet4l a) { return _mm256_srai_epi64(a, N); }
+#else
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N == 0), Packet4l> parithmetic_shift_right(Packet4l a) {
+ return a;
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N > 0) && (N < 32), Packet4l> parithmetic_shift_right(Packet4l a) {
+ __m256i hi_word = _mm256_srai_epi32(a, N);
+ __m256i lo_word = _mm256_srli_epi64(a, N);
+ return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N >= 32) && (N < 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+ __m256i hi_word = _mm256_srai_epi32(a, 31);
+ __m256i lo_word = _mm256_shuffle_epi32(_mm256_srai_epi32(a, N - 32), (shuffle_mask<1, 1, 3, 3>::mask));
+ return _mm256_blend_epi32(hi_word, lo_word, 0b01010101);
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N == 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+ return _mm256_shuffle_epi32(_mm256_srai_epi32(a, 31), (shuffle_mask<1, 1, 3, 3>::mask));
+}
+template <int N>
+EIGEN_STRONG_INLINE std::enable_if_t< (N < 0) || (N > 63), Packet4l> parithmetic_shift_right(Packet4l a) {
+ return parithmetic_shift_right<int(N&63)>(a);
+}
+#endif
template <>
EIGEN_STRONG_INLINE Packet4l pload<Packet4l>(const int64_t* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
@@ -518,11 +544,13 @@
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
{
- return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
+ return _mm256_xor_ps(a, mask);
}
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
{
- return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
+ return _mm256_xor_pd(a, mask);
}
template<> EIGEN_STRONG_INLINE Packet8i pnegate(const Packet8i& a)
{
@@ -1103,6 +1131,13 @@
#endif
}
+template<> EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) { return _mm_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { return _mm_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet8f psignbit(const Packet8f& a) { return _mm256_castsi256_ps(parithmetic_shift_right<31>((Packet8i)_mm256_castps_si256(a))); }
+#ifdef EIGEN_VECTORIZE_AVX2
+template<> EIGEN_STRONG_INLINE Packet4d psignbit(const Packet4d& a) { return _mm256_castsi256_pd(parithmetic_shift_right<63>((Packet4l)_mm256_castpd_si256(a))); }
+#endif
+
template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) {
return pfrexp_generic(a,exponent);
}
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 5f37740..159ae3e 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -346,11 +346,13 @@
template <>
EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
- return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+ const __m512i mask = _mm512_set1_epi32(0x80000000);
+ return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask));
}
template <>
EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
- return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+ const __m512i mask = _mm512_set1_epi64(0x8000000000000000ULL);
+ return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask));
}
template <>
EIGEN_STRONG_INLINE Packet16i pnegate(const Packet16i& a) {
@@ -1127,6 +1129,11 @@
return _mm512_abs_epi32(a);
}
+template<> EIGEN_STRONG_INLINE Packet16h psignbit(const Packet16h& a) { return _mm256_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet16bf psignbit(const Packet16bf& a) { return _mm256_srai_epi16(a, 15); }
+template<> EIGEN_STRONG_INLINE Packet16f psignbit(const Packet16f& a) { return _mm512_castsi512_ps(_mm512_srai_epi32(_mm512_castps_si512(a), 31)); }
+template<> EIGEN_STRONG_INLINE Packet8d psignbit(const Packet8d& a) { return _mm512_castsi512_pd(_mm512_srai_epi64(_mm512_castpd_si512(a), 63)); }
+
template<>
EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& exponent){
return pfrexp_generic(a, exponent);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index 324d050..f2e3e84 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -196,6 +196,13 @@
return _mm512_abs_ph(a);
}
+// psignbit
+
+template <>
+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
+ return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
+}
+
// pmin
template <>
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index 3b3b558..2429c81 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -91,6 +91,20 @@
};
};
+template<>
+struct quad_traits<bfloat16>
+{
+ typedef Packet8bf vectortype;
+ typedef PacketBlock<vectortype,4> type;
+ typedef vectortype rhstype;
+ enum
+ {
+ vectorsize = packet_traits<bfloat16>::size,
+ size = 8,
+ rows = 4
+ };
+};
+
// MatrixProduct decomposes real/imaginary vectors into a real vector and an imaginary vector, this turned out
// to be faster than Eigen's usual approach of having real/imaginary pairs on a single vector. This constants then
// are responsible to extract from convert between Eigen's and MatrixProduct approach.
@@ -2781,6 +2795,31 @@
#endif
gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
}
+
+#if defined(__MMA__)
+template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel<bfloat16, bfloat16, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
+{
+ typedef typename quad_traits<bfloat16>::vectortype Packet;
+ typedef typename quad_traits<bfloat16>::rhstype RhsPacket;
+
+ void operator()(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB,
+ Index rows, Index depth, Index cols, bfloat16 alpha,
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+};
+
+template<typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+void gebp_kernel<bfloat16, bfloat16, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
+ ::operator()(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB,
+ Index rows, Index depth, Index cols, bfloat16 alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB)
+ {
+ const Index accRows = quad_traits<bfloat16>::rows;
+ const Index accCols = quad_traits<bfloat16>::size;
+
+ Eigen::internal::gemmMMAbfloat16<Index, Packet, RhsPacket, DataMapper, accRows, accCols>(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
+ }
+#endif
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index aa1cbf8..e4013a7 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -28,6 +28,8 @@
#include "../../InternalHeaderCheck.h"
+#include "MatrixProductMMAbfloat16.h"
+
namespace Eigen {
namespace internal {
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
new file mode 100644
index 0000000..b3e063d
--- /dev/null
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
@@ -0,0 +1,385 @@
+ #ifndef EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
+ #define EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+EIGEN_STRONG_INLINE void pgerMMAbfloat16(__vector_quad* acc, const Packet8bf& a, const Packet8bf& b, int maskX, int maskY)
+{
+ switch(maskX){
+ case 15:
+ switch(maskY){
+ case 0b1111:
+ __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+ break;
+ case 0b0011:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b11, 0b11);
+ break;
+ case 0b0001:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b1, 0b11);
+ break;
+ case 0b0111:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1111, 0b111, 0b11);
+ break;
+ }
+ break;
+ case 3:
+ switch(maskY){
+ case 0b1111:
+ __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+ break;
+ case 0b0011:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b11, 0b11);
+ break;
+ case 0b0001:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b1, 0b11);
+ break;
+ case 0b0111:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b11, 0b111, 0b11);
+ break;
+ }
+ break;
+ case 1:
+ switch(maskY){
+ case 0b1111:
+ __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+ break;
+ case 0b0011:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b11, 0b11);
+ break;
+ case 0b0001:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b1, 0b11);
+ break;
+ case 0b0111:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b1, 0b111, 0b11);
+ break;
+ }
+ break;
+ case 0b0111:
+ switch(maskY){
+ case 0b1111:
+ __builtin_mma_xvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val));
+ break;
+ case 0b0011:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b11, 0b11);
+ break;
+ case 0b0001:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b1, 0b11);
+ break;
+ case 0b0111:
+ __builtin_mma_pmxvbf16ger2pp(acc, reinterpret_cast<Packet16uc>(a.m_val), reinterpret_cast<Packet16uc>(b.m_val), 0b111, 0b111, 0b11);
+ break;
+ }
+ break;
+ }
+}
+
+EIGEN_STRONG_INLINE void scaleAndStore(float* result, float* acc, Packet4f pAlpha)
+{
+ Packet4f result_block = ploadu<Packet4f>(result);
+ Packet4f packet_pmadd = pmadd(pload<Packet4f>(acc), pAlpha, result_block);
+ pstoreu(result, packet_pmadd);
+}
+
+template<int num_packets, bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16(const bfloat16* indexA)
+{
+ Packet8bf lhs1 = ploadu<Packet8bf>(indexA);
+ Packet8bf lhs2;
+ const int packet_size = 8; //We fit 8 bfloat16 on a 128 register
+ if(zero){
+ lhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
+ }
+ else lhs2 = ploadu<Packet8bf>(indexA + num_packets*packet_size);
+ return vec_mergeh(lhs1.m_val, lhs2.m_val);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadLhsBfloat16ExtraRows(const bfloat16* indexA, Index strideA, Index row, int extra_rows)
+{
+ EIGEN_ALIGN16 bfloat16 lhs_array[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
+ int count = 0;
+ const bfloat16* idxA = indexA + row*strideA;
+ for(int row_count = 0; row_count < extra_rows; row_count++){
+ lhs_array[count++] = *idxA;
+ if(!zero) lhs_array[count] = *(idxA+1);
+ count++;
+ idxA += strideA;
+ }
+ return pload<Packet8bf>(lhs_array);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16(const bfloat16* baseB, Index strideB, int i, int k)
+{
+ const bfloat16* indexB = baseB + strideB*4*i + (k*4);
+ Packet8bf rhs1 = ploadu<Packet8bf>(indexB);
+ if(zero){
+ Packet8bf rhs2 = pset1<Packet8bf>(Eigen::bfloat16(0));
+ return vec_mergeh(rhs1.m_val, rhs2.m_val);
+ }
+ //r = vec_perm (a, b, c)
+ //Let v be the concatenation of a and b.
+ //Each byte of r selected by using the least-significant 5 bits of the corresponding byte of c as an index into v
+ //We need this elements from rhs: 0, 4, 1, 5, 2, 6, 3, 7
+ Packet16uc c = {0x0u, 0x1u, 0x8u, 0x9u, 0x2u, 0x3u, 0xAu, 0xB, 0x4, 0x5, 0xCu, 0xDu, 0x6u, 0x7u, 0xEu, 0xFu};
+ return vec_perm(rhs1.m_val, rhs1.m_val, c);
+}
+
+template<bool zero>
+EIGEN_STRONG_INLINE Packet8bf loadRhsBfloat16ExtraCols(const bfloat16* blockB, Index strideB, Index offsetB, Index col, int i, int k, int extra_cols)
+{
+ EIGEN_ALIGN16 bfloat16 rhs_vector[8] = {Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0), Eigen::bfloat16(0)};
+ const bfloat16* indexB = blockB + ((col+4*i)*strideB)+k+offsetB;
+ for(int c = 0; c < extra_cols; c++){
+ rhs_vector[2*c] = *indexB;
+ if(!zero) rhs_vector[2*c+1] = *(indexB+1);
+ indexB += strideB;
+ }
+ return pload<Packet8bf>(rhs_vector);
+}
+
+template<int num_acc, int num_packets, bool zero, bool rhs_extra_cols, bool lhs_extra_rows>
+EIGEN_STRONG_INLINE void KLoop
+(
+ const bfloat16* indexA,
+ const bfloat16* indexB,
+ __vector_quad (&quad_acc)[num_acc],
+ Index strideA,
+ Index strideB,
+ Index offsetB,
+ Index k,
+ Index row,
+ Index col,
+ int extra_rows,
+ int extra_cols,
+ int mask_rows = 0xF,
+ int mask_cols = 0xF
+)
+{
+ Packet8bf lhs;
+ Packet8bf rhs[num_acc];
+ if(lhs_extra_rows) lhs = loadLhsBfloat16ExtraRows<zero>(indexA+k, strideA, row, extra_rows);
+ else lhs = loadLhsBfloat16<num_packets, zero>(indexA + k*num_packets*8); //a packet of bfloat16 has 8 elements
+ for(int i = 0; i < num_acc; i++){
+ if(!rhs_extra_cols)
+ rhs[i] = loadRhsBfloat16<zero>(indexB, strideB, i, k);
+ else{
+ rhs[i] = loadRhsBfloat16ExtraCols<zero>(indexB, strideB, offsetB, col, i, k, extra_cols);
+ }
+ pgerMMAbfloat16(&(quad_acc[i]), rhs[i], lhs, mask_cols, mask_rows);
+ }
+}
+
+template<const int num_acc, const int standard_block_size, const int num_packets, bool rhsExtraCols = false, bool lhsExtraRows = false>
+void colLoopBody(Index* p_col, Index row, Index depth, Index cols, Index rows, int offset_row, int block_index, Packet4f pAlpha, const bfloat16* indexA, Index strideA, const bfloat16* blockB, Index strideB, Index offsetB, float* result, int extra_cols = 0, int extra_rows = 0, int mask_cols = 0xF, int mask_rows = 0xF)
+{
+ int col = *p_col;
+ int count;
+ int max, step, bound;
+ const bfloat16* indexB;
+
+ if(num_acc == 1) bound = 0;
+ else bound = 1;
+
+ if(rhsExtraCols){
+ count = 0;
+ max = 1;
+ step = 1;
+ indexB = blockB;
+ }
+ else{
+ count = col;
+ step = num_acc * 4; //each accumulator has 4 elements
+ max = cols/step;
+ indexB = blockB + 4*offsetB + strideB*col;
+ }
+
+ while(count/step + bound < max){
+ Index k = 0;
+ EIGEN_ALIGN32 float acc[num_acc][4][4];
+ __vector_quad quad_acc[num_acc];
+
+ for(int i = 0; i < num_acc; i++)
+ __builtin_mma_xxsetaccz(&(quad_acc[i]));
+
+ if(depth%2 != 0){
+ KLoop<num_acc, num_packets, true, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
+ k = 1;
+ }
+ for(; k/2 < depth/2; k += 2){
+ KLoop<num_acc, num_packets, false, rhsExtraCols, lhsExtraRows>(indexA, indexB, quad_acc, strideA, strideB, offsetB, k, row, col, extra_rows, extra_cols, mask_rows, mask_cols);
+ }
+ for(int i = 0; i < num_acc; i++){
+ __builtin_mma_disassemble_acc((void*)acc[i], &(quad_acc[i]));
+ if(lhsExtraRows){
+ for(int x = 0; x < extra_cols; x++){
+ for(int y = 0; y < extra_rows; y++){
+ result[((col+i*4)+x)*rows + row + y] += acc[i][x][y]*(pAlpha[0]);
+ }
+ }
+ }
+ else{
+ if(rhsExtraCols){
+ for(int x = 0; x < cols-col; x++){
+ scaleAndStore(result + ((col+i*4)+x)*rows + row + offset_row,acc[i][x], pAlpha);
+ }
+ }
+ else{
+ for(int x = 0; x < 4; x++){
+ scaleAndStore(result + ((col+i*4)+x)*rows + (block_index*16) + offset_row,acc[i][x], pAlpha);
+ }
+ }
+ }
+ }
+ count += step;
+ if(!rhsExtraCols) {
+ indexB += strideB*step;
+ col += step;
+ }
+ }
+ *p_col = col;
+}
+
+template<typename Index, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
+void gemmMMAbfloat16(const DataMapper& res, const bfloat16* blockA, const bfloat16* blockB, Index rows, Index depth, Index cols, bfloat16 alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
+{
+
+ if(rows == 0 || cols == 0 || depth == 0) return;
+ const Packet4f pAlpha = pset1<Packet4f>(Eigen::bfloat16_impl::bfloat16_to_float(alpha));
+ ei_declare_aligned_stack_constructed_variable(float, result, cols*rows, 0);
+
+ for(int j = 0; j < cols; j++){
+ for(int i = 0; i < rows; i++){
+ result[j*rows + i] = res(i,j);
+ }
+ }
+
+ Index row = 0;
+ Index col = 0;
+
+ if( strideA == -1 ) strideA = depth;
+ if( strideB == -1 ) strideB = depth;
+ //Packing is done in blocks.
+ //There's 3 possible sizes of blocks
+ //Blocks of 8 columns with 16 elements (8x16) as col major
+ //Blocks of 8 columns with 8 elements (8x8) as col major. This happens when there's 16 > rows > 8
+ //Blocks of 8 columns with <8 elements as row major. This happens when there's less than 8 remaining rows
+
+ //Loop for LHS standard block (8x16)
+ int standard_block_size = 16;
+ const int standard_blocks_quantity = rows/standard_block_size; //Number of standard blocks
+ int bigSuffix = (2*8) * (strideA-offsetA-depth);
+ const bfloat16* indexA = blockA;
+ int block_index;
+ for(block_index = 0; block_index < standard_blocks_quantity; block_index++){
+ indexA += 2*8*offsetA;
+ for(int offset_row = 0; offset_row < standard_block_size; offset_row += 4){ //This block size has 16 rows maximum
+ col = 0;
+ colLoopBody<5, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<4, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<3, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<2, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<1, 16, 2>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ if(cols > col){
+ int extra_cols= cols-col;
+ int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+ int mask_cols= 0xF >> shift;
+ //Remember: It doesnt make sense use multiple acc to extra_cols as we are unrolling col loop
+ colLoopBody<1, 16, 2, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
+ }
+ }
+ row += 16;
+ indexA += bigSuffix + 2*8*depth;
+ }
+ //LHS (8x8) block
+ if(rows - standard_blocks_quantity*16 >= 8){
+ indexA += 1*8*offsetA + 2*8*offsetA;
+ for(int offset_row = 0; offset_row < 8; offset_row += 4){
+ col = 0;
+ colLoopBody<5, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<4, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<3, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<2, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ colLoopBody<1, 8, 1>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result);
+ }
+ if(cols > col){
+ int extra_cols= cols-col;
+ int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+ int mask_cols= 0xF >> shift;
+
+ for(int offset_row = 0; offset_row < 8; offset_row += 4){
+ colLoopBody<1, 8, 1, true>(&col, row, depth, cols, rows, offset_row, block_index, pAlpha, indexA+offset_row, strideA, blockB, strideB, offsetB, result, extra_cols, 4, mask_cols, 0xF);
+ }
+ } //end extra cols
+ row += 8;
+ }
+ //extra rows
+ while(row < rows){
+ int extra_rows = rows-row;
+ int shift = (4-extra_rows >= 0) ? 4-extra_rows : 0;
+ int mask_rows = 0xF >> shift;
+ int extra_rows_or_four = (extra_rows <= 4) ? extra_rows : 4;
+
+ //This index is the beginning of remaining block.
+ //This last block for LHS is organized as RowMajor
+ col = 0;
+ colLoopBody<5, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+ colLoopBody<4, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+ colLoopBody<3, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+ colLoopBody<2, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+ colLoopBody<1, 8, 1, false, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, 4, extra_rows_or_four, 0xF, mask_rows);
+ if(cols > col){
+ int extra_cols= cols-col;
+ int shift = (4-extra_cols>= 0) ? 4-extra_cols: 0;
+ int mask_cols= 0xF >> shift;
+ int extra_cols_or_four = (extra_cols <= 4) ? extra_cols : 4;
+
+ colLoopBody<1, 8, 1, true, true>(&col, row, depth, cols, rows, 0, block_index, pAlpha, blockA, strideA, blockB, strideB, offsetB, result, extra_cols_or_four, extra_rows_or_four, mask_cols, mask_rows);
+ }
+ row += extra_rows_or_four;
+ }
+
+ //Convert back to bfloat16
+ for(col = 0; col/4 < cols/4; col += 4){
+ int row;
+ for(row = 0; row/8 < rows/8; row += 8){
+ //get and save block
+ PacketBlock<Packet8bf,4> block;
+ for(int j = 0; j < 4; j++){
+ Packet4f temp_even, temp_odd;
+ EIGEN_ALIGN32 float even[4], odd[4];
+ for(int i = 0; i < 4; i++){
+ even[i] = result[(col + j)*rows + row + i*2];
+ odd[i] = result[(col + j)*rows + row + i*2+1];
+ }
+ temp_even = pload<Packet4f>(even);
+ temp_odd = pload<Packet4f>(odd);
+ block.packet[j] = F32ToBf16(temp_even, temp_odd);
+ }
+
+ res.template storePacketBlock<Packet8bf,4>(row, col, block);
+ }
+ //extra rows
+ while(row < rows){
+ for(int col_off = 0; col_off < 4; col_off++){
+ res(row, col+col_off) = Eigen::bfloat16(result[(col+col_off)*rows+row]);
+ }
+ row++;
+ }
+
+ }
+ //extra cols
+ while(col < cols){
+ for(int r = 0; r < rows; r++){
+ res(r, col) = Eigen::bfloat16(result[col*rows + r]);
+ }
+ col++;
+ }
+}
+
+
+}
+}
+#endif //EIGEN_MATRIX_PRODUCT_MMA_BFLOAT16_ALTIVEC_H
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 37398de..b0f8529 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1460,11 +1460,6 @@
if (i < n2) {
*reinterpret_cast<uint8_t *>(to2 + i) = *reinterpret_cast<uint8_t *>(store2 + i);
}
-
- LOAD_STORE_UNROLL_16
- for (Index i = 0; i < n; i++) {
- to[i] = from[i];
- }
#endif
}
@@ -1575,6 +1570,9 @@
return pand<Packet8us>(p8us_abs_mask, a);
}
+template<> EIGEN_STRONG_INLINE Packet8bf psignbit(const Packet8bf& a) { return vec_sra(a.m_val, vec_splat_u16(15)); }
+template<> EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { return (Packet4f)vec_sra((Packet4i)a, vec_splats((unsigned int)(31))); }
+
template<int N> EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a)
{ return vec_sra(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
template<int N> EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a)
@@ -2708,7 +2706,7 @@
HasAbs = 1,
HasSin = 0,
HasCos = 0,
- HasATan = 1,
+ HasATan = 0,
HasLog = 0,
HasExp = 1,
HasSqrt = 1,
@@ -2928,7 +2926,7 @@
return vec_sld(a, a, 8);
}
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
-
+template<> EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) { return (Packet2d)vec_sra((Packet2l)a, vec_splats((unsigned long long)(63))); }
// VSX support varies between different compilers and even different
// versions of the same compiler. For gcc version >= 4.9.3, we can use
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 008dd7a..3b1c6e7 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -66,6 +66,7 @@
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
+ HasSqrt = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
@@ -406,6 +407,7 @@
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
+ HasSqrt = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
diff --git a/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h b/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
index e49e394..5dc5ef8 100644
--- a/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
@@ -2,7 +2,7 @@
namespace Eigen {
namespace internal {
-
+
#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
// Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
@@ -183,7 +183,11 @@
}
};
-#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+// The register at operand 3 of fmla for data type half must be v0~v15, the compiler may not
+// allocate a required register for the '%2' of inline asm 'fmla %0.8h, %1.8h, %2.h[id]',
+// so inline assembly can't be used here to advoid the bug that vfmaq_lane_f16 is implemented
+// through a costly dup in gcc compiler.
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
template<>
struct gebp_traits <half,half,false,false,Architecture::NEON>
@@ -212,9 +216,11 @@
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const
{}
- EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar*, RhsPacket&) const
{
- loadRhs(b,dest);
+ // If LHS is a Packet8h, we cannot correctly mimic a ploadquad of the RHS
+ // using a single scalar value.
+ eigen_assert(false && "Cannot loadRhsQuad for a scalar RHS.");
}
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/, const FixedInt<0>&) const
@@ -240,19 +246,10 @@
template<int LaneID>
EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
{
- #if EIGEN_COMP_GNUC_STRICT
- // 1. vfmaq_lane_f16 is implemented through a costly dup
- // 2. workaround the gcc register split problem on arm64-neon
- if(LaneID==0) asm("fmla %0.8h, %1.8h, %2.h[0]\n" : "+w" (c) : "w" (a), "w" (b) : );
- else if(LaneID==1) asm("fmla %0.8h, %1.8h, %2.h[1]\n" : "+w" (c) : "w" (a), "w" (b) : );
- else if(LaneID==2) asm("fmla %0.8h, %1.8h, %2.h[2]\n" : "+w" (c) : "w" (a), "w" (b) : );
- else if(LaneID==3) asm("fmla %0.8h, %1.8h, %2.h[3]\n" : "+w" (c) : "w" (a), "w" (b) : );
- #else
c = vfmaq_lane_f16(c, a, b, LaneID);
- #endif
}
};
-#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
#endif // EIGEN_ARCH_ARM64
} // namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 5cbf4ac..069bd4c 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -176,6 +176,7 @@
size = 4,
HasHalfPacket = 1,
+ HasCmp = 1,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
@@ -188,7 +189,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasDiv = 1,
@@ -237,7 +238,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0
};
};
@@ -267,7 +268,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasSqrt = 1
@@ -286,6 +287,7 @@
size = 8,
HasHalfPacket = 1,
+ HasCmp = 1,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
@@ -298,7 +300,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0
};
};
@@ -315,19 +317,20 @@
size = 8,
HasHalfPacket = 1,
+ HasCmp = 1,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
HasMul = 1,
HasNegate = 0,
- HasAbs = 0,
+ HasAbs = 1,
HasAbsDiff = 1,
HasArg = 0,
HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasSqrt = 1
};
@@ -345,6 +348,7 @@
size = 4,
HasHalfPacket = 1,
+ HasCmp = 1,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
@@ -357,7 +361,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0
};
};
@@ -374,19 +378,20 @@
size = 4,
HasHalfPacket = 1,
+ HasCmp = 1,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
HasMul = 1,
HasNegate = 0,
- HasAbs = 0,
+ HasAbs = 1,
HasArg = 0,
HasAbs2 = 1,
HasAbsDiff = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasSqrt = 1
@@ -418,7 +423,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0
};
};
@@ -441,14 +446,14 @@
HasShift = 1,
HasMul = 1,
HasNegate = 0,
- HasAbs = 0,
+ HasAbs = 1,
HasArg = 0,
HasAbs2 = 1,
HasAbsDiff = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0
};
};
@@ -2372,6 +2377,15 @@
}
template<> EIGEN_STRONG_INLINE Packet2ul pabs(const Packet2ul& a) { return a; }
+template <>
+EIGEN_STRONG_INLINE Packet2f psignbit(const Packet2f& a) {
+ return vreinterpret_f32_s32(vshr_n_s32(vreinterpret_s32_f32(a), 31));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) {
+ return vreinterpretq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a), 31));
+}
+
template<> EIGEN_STRONG_INLINE Packet2f pfrexp<Packet2f>(const Packet2f& a, Packet2f& exponent)
{ return pfrexp_generic(a,exponent); }
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent)
@@ -3397,7 +3411,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasDiv = 1,
HasFloor = 1,
@@ -3754,7 +3768,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasDiv = 1,
@@ -3904,6 +3918,11 @@
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+template <>
+EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) {
+ return vreinterpretq_f64_s64(vshrq_n_s64(vreinterpretq_s64_f64(a), 63));
+}
+
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{ return vaddvq_f64(a); }
@@ -3993,7 +4012,7 @@
HasMin = 1,
HasMax = 1,
HasConj = 1,
- HasSetLinear = 0,
+ HasSetLinear = 1,
HasBlend = 0,
HasInsert = 1,
HasReduxp = 1,
@@ -4465,12 +4484,22 @@
return vabsq_f16(a);
}
+template<>
+EIGEN_STRONG_INLINE Packet8hf psignbit(const Packet8hf& a) {
+ return vreinterpretq_f16_s16(vshrq_n_s16(vreinterpretq_s16_f16(a), 15));
+}
+
template <>
EIGEN_STRONG_INLINE Packet4hf pabs<Packet4hf>(const Packet4hf& a) {
return vabs_f16(a);
}
template <>
+EIGEN_STRONG_INLINE Packet4hf psignbit(const Packet4hf& a) {
+ return vreinterpret_f16_s16( vshr_n_s16( vreinterpret_s16_f16(a), 15));
+}
+
+template <>
EIGEN_STRONG_INLINE Eigen::half predux<Packet8hf>(const Packet8hf& a) {
float16x4_t a_lo, a_hi, sum;
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 847ff07..a0ff359 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -649,6 +649,17 @@
#endif
}
+template<> EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { return _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(a), 31)); }
+template<> EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a)
+{
+ Packet4f tmp = psignbit<Packet4f>(_mm_castpd_ps(a));
+#ifdef EIGEN_VECTORIZE_AVX
+ return _mm_castps_pd(_mm_permute_ps(tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#else
+ return _mm_castps_pd(_mm_shuffle_ps(tmp, tmp, (shuffle_mask<1, 1, 3, 3>::mask)));
+#endif // EIGEN_VECTORIZE_AVX
+}
+
#ifdef EIGEN_VECTORIZE_SSE4_1
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
{
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index eaa4352..c8bb4e7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -388,7 +388,11 @@
struct maybe_raise_div_by_zero<Packet, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Packet x) {
if (EIGEN_PREDICT_FALSE(predux_any(pcmp_eq(x, pzero(x))))) {
- std::raise(SIGFPE);
+ // Use volatile variables to force a division by zero, which will
+ // result in the default platform behaviour (usually SIGFPE).
+ volatile typename unpacket_traits<Packet>::type zero = 0;
+ volatile typename unpacket_traits<Packet>::type val = 1;
+ val = val / zero;
}
}
};
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index e099d4a..4943d87 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -145,6 +145,39 @@
const linspaced_op_impl<Scalar,NumTraits<Scalar>::IsInteger> impl;
};
+template <typename Scalar>
+struct equalspaced_op {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ EIGEN_DEVICE_FUNC equalspaced_op(const Scalar& start, const Scalar& step) : m_start(start), m_step(step) {}
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(IndexType i) const {
+ return m_start + m_step * static_cast<Scalar>(i);
+ }
+ template <typename Packet, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(IndexType i) const {
+ const Packet cst_start = pset1<Packet>(m_start);
+ const Packet cst_step = pset1<Packet>(m_step);
+ const Packet cst_lin0 = plset<Packet>(Scalar(0));
+ const Packet cst_offset = pmadd(cst_lin0, cst_step, cst_start);
+
+ Packet i_packet = pset1<Packet>(static_cast<Scalar>(i));
+ return pmadd(i_packet, cst_step, cst_offset);
+ }
+ const Scalar m_start;
+ const Scalar m_step;
+};
+
+template <typename Scalar>
+struct functor_traits<equalspaced_op<Scalar> > {
+ enum {
+ Cost = NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost,
+ PacketAccess =
+ packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasMul && packet_traits<Scalar>::HasAdd,
+ IsRepeatable = true
+ };
+};
+
// Linear access is automatically determined from the operator() prototypes available for the given functor.
// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
// and linear access is not possible. In all other cases, linear access is enabled.
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 179d41c..4a6cef5 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -57,8 +57,13 @@
const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(2*1024*1024);
#elif EIGEN_ARCH_PPC
const std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(64*1024);
+#ifdef _ARCH_PWR10
+const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(2*1024*1024);
+const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(8*1024*1024);
+#else
const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);
const std::ptrdiff_t defaultL3CacheSize = EIGEN_SET_DEFAULT_L3_CACHE_SIZE(4*1024*1024);
+#endif
#else
const std::ptrdiff_t defaultL1CacheSize = EIGEN_SET_DEFAULT_L1_CACHE_SIZE(16*1024);
const std::ptrdiff_t defaultL2CacheSize = EIGEN_SET_DEFAULT_L2_CACHE_SIZE(512*1024);
@@ -746,6 +751,9 @@
template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > {
typedef DoublePacket<typename unpacket_traits<Packet>::half> half;
+ enum{
+ size = 2 * unpacket_traits<Packet>::size
+ };
};
// template<typename Packet>
// DoublePacket<Packet> pmadd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
@@ -2485,7 +2493,13 @@
// nr (which is currently 4) for the return type.
const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
const int SResPacketQuarterSize = unpacket_traits<typename unpacket_traits<typename unpacket_traits<SResPacket>::half>::half>::size;
- if ((SwappedTraits::LhsProgress % 4) == 0 &&
+ // The following code assumes we can load SRhsPacket in such a way that
+ // it multiplies blocks of 4 elements in SLhsPacket. This is not the
+ // case for some customized kernels (i.e. NEON fp16). If the assumption
+ // fails, drop down to the scalar path.
+ constexpr bool kCanLoadSRhsQuad = (unpacket_traits<SLhsPacket>::size < 4) || (unpacket_traits<SRhsPacket>::size % (unpacket_traits<SLhsPacket>::size / 4)) == 0;
+ if (kCanLoadSRhsQuad &&
+ (SwappedTraits::LhsProgress % 4) == 0 &&
(SwappedTraits::LhsProgress<=16) &&
(SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr) &&
(SwappedTraits::LhsProgress!=16 || SResPacketQuarterSize==nr))
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 3f00077..3f7638e 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -379,11 +379,12 @@
storePacketBlock_helper<SubPacket, Scalar_, n, idx-1> spbh;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
spbh.store(sup, i,j,block);
- for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
- {
- Scalar_ *v = &sup->operator()(i+l, j+idx);
- *v = block.packet[idx][l];
- }
+ sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
+ //for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ //{
+ // Scalar_ *v = &sup->operator()(i+l, j+idx);
+ // *v = *reinterpret_cast<Scalar_ *>(&block.packet[idx][l]);
+ //}
}
};
@@ -393,12 +394,7 @@
storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
spbh.store(sup,i,j,block);
- for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
- {
- std::complex<float> *v = &sup->operator()(i+l, j+idx);
- v->real(block.packet[idx].v[2*l+0]);
- v->imag(block.packet[idx].v[2*l+1]);
- }
+ sup->template storePacket<SubPacket>(i, j+idx, block.packet[idx]);
}
};
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 503d651..8f87c4a 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -270,8 +270,10 @@
template<typename Scalar> class JacobiRotation;
// Geometry module:
+namespace internal {
+template<typename Derived, typename OtherDerived, int Size = MatrixBase<Derived>::SizeAtCompileTime> struct cross_impl;
+}
template<typename Derived, int Dim_> class RotationBase;
-template<typename Lhs, typename Rhs> class Cross;
template<typename Derived> class QuaternionBase;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index d8a67a4..4cd81a0 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -654,11 +654,11 @@
// The macro EIGEN_COMP_CXXVER defines the c++ version expected by the compiler.
// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
// is defined to 17.
-#if EIGEN_CPLUSPLUS > 201703L
+#if EIGEN_CPLUSPLUS >= 202002L
#define EIGEN_COMP_CXXVER 20
-#elif EIGEN_CPLUSPLUS > 201402L
+#elif EIGEN_CPLUSPLUS >= 201703L
#define EIGEN_COMP_CXXVER 17
-#elif EIGEN_CPLUSPLUS > 201103L
+#elif EIGEN_CPLUSPLUS >= 201402L
#define EIGEN_COMP_CXXVER 14
#elif EIGEN_CPLUSPLUS >= 201103L
#define EIGEN_COMP_CXXVER 11
@@ -717,13 +717,16 @@
// and it could be that XCode 9 works just fine.
// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support
// and not tested.
+// NOTE: Intel C++ Compiler Classic (icc) Version 19.0 and later supports dynamic allocation
+// for over-aligned data, but not in a manner that is compatible with Eigen.
+// See https://gitlab.com/libeigen/eigen/-/issues/2575
#ifndef EIGEN_HAS_CXX17_OVERALIGN
#if EIGEN_MAX_CPP_VER>=17 && EIGEN_COMP_CXXVER>=17 && ( \
(EIGEN_COMP_MSVC >= 1912) \
|| (EIGEN_GNUC_AT_LEAST(7,0)) \
|| ((!defined(__apple_build_version__)) && (EIGEN_COMP_CLANG>=500)) \
|| (( defined(__apple_build_version__)) && (__apple_build_version__>=10000000)) \
- )
+ ) && !EIGEN_COMP_ICC
#define EIGEN_HAS_CXX17_OVERALIGN 1
#else
#define EIGEN_HAS_CXX17_OVERALIGN 0
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 385f4f4..e4a8793 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -96,19 +96,17 @@
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
-/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
- * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
+/** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
+ * Fast, but wastes `alignment` additional bytes of memory. Does not throw any exception.
*/
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
- eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
-
- EIGEN_USING_STD(malloc)
- void *original = malloc(size+alignment);
-
+ eigen_assert(alignment >= sizeof(void*) && alignment <= 128 && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*), less than or equal to 128, and a power of 2");
+ void* original = std::malloc(size + alignment);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
- *(reinterpret_cast<void**>(aligned) - 1) = original;
+ uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
+ *(static_cast<uint8_t*>(aligned) - 1) = offset;
return aligned;
}
@@ -116,8 +114,9 @@
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
if (ptr) {
- EIGEN_USING_STD(free)
- free(*(reinterpret_cast<void**>(ptr) - 1));
+ uint8_t offset = static_cast<uint8_t>(*(static_cast<uint8_t*>(ptr) - 1));
+ void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
+ std::free(original);
}
}
@@ -126,19 +125,22 @@
* Since we know that our handmade version is based on std::malloc
* we can use std::realloc to implement efficient reallocation.
*/
-inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
- if (ptr == 0) return handmade_aligned_malloc(size);
- void *original = *(reinterpret_cast<void**>(ptr) - 1);
- std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
- original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
+ if (ptr == 0) return handmade_aligned_malloc(new_size, alignment);
+ uint8_t old_offset = *(static_cast<uint8_t*>(ptr) - 1);
+ void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
+ void* original = std::realloc(old_original, new_size + alignment);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
- void *previous_aligned = static_cast<char *>(original)+previous_offset;
- if(aligned!=previous_aligned)
- std::memmove(aligned, previous_aligned, size);
-
- *(reinterpret_cast<void**>(aligned) - 1) = original;
+ if (original == old_original) return ptr;
+ uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
+ void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
+ if (offset != old_offset) {
+ const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
+ std::size_t count = (std::min)(new_size, old_size);
+ std::memmove(aligned, src, count);
+ }
+ *(static_cast<uint8_t*>(aligned) - 1) = offset;
return aligned;
}
@@ -214,13 +216,12 @@
* \brief Reallocates an aligned block of memory.
* \throws std::bad_alloc on allocation failure
*/
-inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
+EIGEN_DEVICE_FUNC inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
{
if (ptr == 0) return aligned_malloc(new_size);
- EIGEN_UNUSED_VARIABLE(old_size)
-
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ EIGEN_UNUSED_VARIABLE(old_size)
result = std::realloc(ptr,new_size);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -273,12 +274,12 @@
free(ptr);
}
-template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
{
return aligned_realloc(ptr, new_size, old_size);
}
-template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
{
return std::realloc(ptr, new_size);
}
@@ -471,7 +472,7 @@
return result;
}
-template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
{
if (NumTraits<T>::RequireInitialization) {
return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 32152ac..6c6fb71 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -43,6 +43,32 @@
typedef std::int32_t int32_t;
typedef std::uint64_t uint64_t;
typedef std::int64_t int64_t;
+
+template <size_t Size>
+struct get_integer_by_size {
+ typedef void signed_type;
+ typedef void unsigned_type;
+};
+template <>
+struct get_integer_by_size<1> {
+ typedef int8_t signed_type;
+ typedef uint8_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<2> {
+ typedef int16_t signed_type;
+ typedef uint16_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<4> {
+ typedef int32_t signed_type;
+ typedef uint32_t unsigned_type;
+};
+template <>
+struct get_integer_by_size<8> {
+ typedef int64_t signed_type;
+ typedef uint64_t unsigned_type;
+};
}
}
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index dc6a762..fbf020d 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -15,39 +15,83 @@
namespace Eigen {
+namespace internal {
+
+// Vector3 version (default)
+template<typename Derived, typename OtherDerived, int Size>
+struct cross_impl
+{
+ typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+ typedef Matrix<Scalar,MatrixBase<Derived>::RowsAtCompileTime,MatrixBase<Derived>::ColsAtCompileTime> return_type;
+
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ return_type run(const MatrixBase<Derived>& first, const MatrixBase<OtherDerived>& second)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
+
+ // Note that there is no need for an expression here since the compiler
+ // optimize such a small temporary very well (even within a complex expression)
+ typename internal::nested_eval<Derived,2>::type lhs(first.derived());
+ typename internal::nested_eval<OtherDerived,2>::type rhs(second.derived());
+ return return_type(
+ numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
+ numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
+ numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))
+ );
+ }
+};
+
+// Vector2 version
+template<typename Derived, typename OtherDerived>
+struct cross_impl<Derived, OtherDerived, 2>
+{
+ typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+ typedef Scalar return_type;
+
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ return_type run(const MatrixBase<Derived>& first, const MatrixBase<OtherDerived>& second)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,2);
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,2);
+ typename internal::nested_eval<Derived,2>::type lhs(first.derived());
+ typename internal::nested_eval<OtherDerived,2>::type rhs(second.derived());
+ return numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0));
+ }
+};
+
+} // end namespace internal
+
/** \geometry_module \ingroup Geometry_Module
*
- * \returns the cross product of \c *this and \a other
+ * \returns the cross product of \c *this and \a other. This is either a scalar for size-2 vectors or a size-3 vector for size-3 vectors.
*
- * Here is a very good explanation of cross-product: http://xkcd.com/199/
+ * This method is implemented for two different cases: between vectors of fixed size 2 and between vectors of fixed size 3.
*
- * With complex numbers, the cross product is implemented as
- * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$
+ * For vectors of size 3, the output is simply the traditional cross product.
+ *
+ * For vectors of size 2, the output is a scalar.
+ * Given vectors \f$ v = \begin{bmatrix} v_1 & v_2 \end{bmatrix} \f$ and \f$ w = \begin{bmatrix} w_1 & w_2 \end{bmatrix} \f$,
+ * the result is simply \f$ v\times w = \overline{v_1 w_2 - v_2 w_1} = \text{conj}\left|\begin{smallmatrix} v_1 & w_1 \\ v_2 & w_2 \end{smallmatrix}\right| \f$;
+ * or, to put it differently, it is the third coordinate of the cross product of \f$ \begin{bmatrix} v_1 & v_2 & v_3 \end{bmatrix} \f$ and \f$ \begin{bmatrix} w_1 & w_2 & w_3 \end{bmatrix} \f$.
+ * For real-valued inputs, the result can be interpreted as the signed area of a parallelogram spanned by the two vectors.
+ *
+ * \note With complex numbers, the cross product is implemented as
+ * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} + \mathbf{b} \times \mathbf{c})\f$
*
* \sa MatrixBase::cross3()
*/
template<typename Derived>
template<typename OtherDerived>
-#ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typename internal::cross_impl<Derived, OtherDerived>::return_type
#else
-typename MatrixBase<Derived>::PlainObject
+inline std::conditional_t<SizeAtCompileTime==2, Scalar, PlainObject>
#endif
MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
{
- EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
- EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
-
- // Note that there is no need for an expression here since the compiler
- // optimize such a small temporary very well (even within a complex expression)
- typename internal::nested_eval<Derived,2>::type lhs(derived());
- typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());
- return typename cross_product_return_type<OtherDerived>::type(
- numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
- numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
- numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))
- );
+ return internal::cross_impl<Derived, OtherDerived>::run(*this, other);
}
namespace internal {
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 079b22d..76195c7 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -51,9 +51,9 @@
x.setZero();
return true;
}
- Scalar rho = 1;
- Scalar alpha = 1;
- Scalar w = 1;
+ Scalar rho (1);
+ Scalar alpha (1);
+ Scalar w (1);
VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);
VectorType y(n), z(n);
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 2733fbe..25f4601 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -37,7 +37,7 @@
#include "../InternalHeaderCheck.h"
-#if !EIGEN_COMP_LLVM
+#if EIGEN_COMP_GNUC_STRICT
// These routines requires bit manipulation of the sign, which is not compatible
// with fastmath.
#pragma GCC push_options
@@ -358,7 +358,7 @@
} // namespace internal
} // namespace Eigen
-#if !EIGEN_COMP_LLVM
+#if EIGEN_COMP_GNUC_STRICT
#pragma GCC pop_options
#endif
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index 1b10c0e..a69fdca 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -236,7 +236,6 @@
}
private:
- void allocate(Index rows, Index cols, unsigned int computationOptions);
BDCSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
@@ -254,6 +253,7 @@
void computeBaseCase(SVDType& svd, Index n, Index firstCol, Index firstRowW, Index firstColW, Index shift);
protected:
+ void allocate(Index rows, Index cols, unsigned int computationOptions);
MatrixXr m_naiveU, m_naiveV;
MatrixXr m_computed;
Index m_nRec;
diff --git a/Eigen/src/SVD/BDCSVD_LAPACKE.h b/Eigen/src/SVD/BDCSVD_LAPACKE.h
new file mode 100644
index 0000000..d4cc173
--- /dev/null
+++ b/Eigen/src/SVD/BDCSVD_LAPACKE.h
@@ -0,0 +1,163 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Melven Roehrig-Zoellner <Melven.Roehrig-Zoellner@DLR.de>
+// Copyright (c) 2011, Intel Corporation. All rights reserved.
+//
+// This file is based on the JacobiSVD_LAPACKE.h originally from Intel -
+// see license notice below:
+/*
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ * Content : Eigen bindings to LAPACKe
+ * Singular Value Decomposition - SVD (divide and conquer variant)
+ ********************************************************************************
+*/
+#ifndef EIGEN_BDCSVD_LAPACKE_H
+#define EIGEN_BDCSVD_LAPACKE_H
+
+namespace Eigen {
+
+namespace internal {
+
+namespace lapacke_helpers {
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+// defining a derived class to allow access to protected members
+template <typename MatrixType_, int Options>
+class BDCSVD_LAPACKE : public BDCSVD<MatrixType_, Options> {
+ typedef BDCSVD<MatrixType_, Options> SVD;
+ typedef typename SVD::MatrixType MatrixType;
+ typedef typename SVD::Scalar Scalar;
+ typedef typename SVD::RealScalar RealScalar;
+
+public:
+ // construct this by moving from a parent object
+ BDCSVD_LAPACKE(SVD&& svd) : SVD(std::move(svd)) {}
+
+ void compute_impl_lapacke(const MatrixType& matrix, unsigned int computationOptions) {
+
+ SVD::allocate(matrix.rows(), matrix.cols(), computationOptions);
+
+ SVD::m_nonzeroSingularValues = SVD::m_diagSize;
+
+ // prepare arguments to ?gesdd
+ const lapack_int matrix_order = lapack_storage_of(matrix);
+ const char jobz = (SVD::m_computeFullU || SVD::m_computeFullV) ? 'A' : (SVD::m_computeThinU || SVD::m_computeThinV) ? 'S' : 'N';
+ const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::m_rows) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
+ const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::m_cols) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1;
+ lapack_int ldu, ldvt;
+ Scalar *u, *vt, dummy;
+ MatrixType localU;
+ if (SVD::computeU() && !(SVD::m_computeThinU && SVD::m_computeFullV) ) {
+ ldu = to_lapack(SVD::m_matrixU.outerStride());
+ u = SVD::m_matrixU.data();
+ } else if (SVD::computeV()) {
+ localU.resize(SVD::m_rows, u_cols);
+ ldu = to_lapack(localU.outerStride());
+ u = localU.data();
+ } else { ldu=1; u=&dummy; }
+ MatrixType localV;
+ if (SVD::computeU() || SVD::computeV()) {
+ localV.resize(vt_rows, SVD::m_cols);
+ ldvt = to_lapack(localV.outerStride());
+ vt = localV.data();
+ } else { ldvt=1; vt=&dummy; }
+ MatrixType temp; temp = matrix;
+
+ // actual call to ?gesdd
+ lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::m_rows), to_lapack(SVD::m_cols),
+ to_lapack(temp.data()), to_lapack(temp.outerStride()), (RealScalar*)SVD::m_singularValues.data(),
+ to_lapack(u), ldu, to_lapack(vt), ldvt);
+
+ // Check the result of the LAPACK call
+ if (info < 0 || !SVD::m_singularValues.allFinite()) {
+ // this includes info == -4 => NaN entry in A
+ SVD::m_info = InvalidInput;
+ } else if (info > 0 ) {
+ SVD::m_info = NoConvergence;
+ } else {
+ SVD::m_info = Success;
+ if (SVD::m_computeThinU && SVD::m_computeFullV) {
+ SVD::m_matrixU = localU.leftCols(SVD::m_matrixU.cols());
+ }
+ if (SVD::computeV()) {
+ SVD::m_matrixV = localV.adjoint().leftCols(SVD::m_matrixV.cols());
+ }
+ }
+ SVD::m_isInitialized = true;
+ }
+};
+
+template<typename MatrixType_, int Options>
+BDCSVD<MatrixType_, Options>& BDCSVD_wrapper(BDCSVD<MatrixType_, Options>& svd, const MatrixType_& matrix, int computationOptions)
+{
+ // we need to move to the wrapper type and back
+ BDCSVD_LAPACKE<MatrixType_, Options> tmpSvd(std::move(svd));
+ tmpSvd.compute_impl_lapacke(matrix, computationOptions);
+ svd = std::move(tmpSvd);
+ return svd;
+}
+
+} // end namespace lapacke_helpers
+
+} // end namespace internal
+
+#define EIGEN_LAPACKE_SDD(EIGTYPE, EIGCOLROW, OPTIONS) \
+template<> inline \
+BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
+BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) {\
+ return internal::lapacke_helpers::BDCSVD_wrapper(*this, matrix, computationOptions); \
+}
+
+#define EIGEN_LAPACK_SDD_OPTIONS(OPTIONS) \
+ EIGEN_LAPACKE_SDD(double, ColMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(float, ColMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(dcomplex, ColMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(scomplex, ColMajor, OPTIONS) \
+\
+ EIGEN_LAPACKE_SDD(double, RowMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(float, RowMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(dcomplex, RowMajor, OPTIONS) \
+ EIGEN_LAPACKE_SDD(scomplex, RowMajor, OPTIONS)
+
+EIGEN_LAPACK_SDD_OPTIONS(0)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU | ComputeThinV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU | ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeThinU | ComputeFullV)
+EIGEN_LAPACK_SDD_OPTIONS(ComputeFullU | ComputeThinV)
+
+#undef EIGEN_LAPACK_SDD_OPTIONS
+
+#undef EIGEN_LAPACKE_SDD
+
+} // end namespace Eigen
+
+#endif // EIGEN_BDCSVD_LAPACKE_H
diff --git a/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
index c5717e9..93244cd 100644
--- a/Eigen/src/SVD/JacobiSVD_LAPACKE.h
+++ b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -72,8 +72,16 @@
} else { ldvt=1; vt=&dummy; }\
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
MatrixType m_temp; m_temp = matrix; \
- LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
- if (computeV()) m_matrixV = localV.adjoint(); \
+ lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
+ /* Check the result of the LAPACK call */ \
+ if (info < 0 || !m_singularValues.allFinite()) { \
+ m_info = InvalidInput; \
+ } else if (info > 0 ) { \
+ m_info = NoConvergence; \
+ } else { \
+ m_info = Success; \
+ if (computeV()) m_matrixV = localV.adjoint(); \
+ } \
/* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
m_isInitialized = true; \
return *this; \
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h
index ede5766..243cd16 100644
--- a/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -22,6 +22,9 @@
struct traits<SparseCompressedBase<Derived> > : traits<Derived>
{};
+template <typename Derived, class Comp, bool IsVector>
+struct inner_sort_impl;
+
} // end namespace internal
/** \ingroup SparseCore_Module
@@ -126,6 +129,40 @@
*
* \sa valuePtr(), isCompressed() */
Map<Array<Scalar,Dynamic,1> > coeffs() { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }
+
+ /** sorts the inner vectors in the range [begin,end) with respect to `Comp`
+ * \sa innerIndicesAreSorted() */
+ template <class Comp = std::less<>>
+ inline void sortInnerIndices(Index begin, Index end) {
+ eigen_assert(begin >= 0 && end <= derived().outerSize() && end >= begin);
+ internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::run(*this, begin, end);
+ }
+
+ /** \returns the index of the first inner vector in the range [begin,end) that is not sorted with respect to `Comp`, or `end` if the range is fully sorted
+ * \sa sortInnerIndices() */
+ template <class Comp = std::less<>>
+ inline Index innerIndicesAreSorted(Index begin, Index end) const {
+ eigen_assert(begin >= 0 && end <= derived().outerSize() && end >= begin);
+ return internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::check(*this, begin, end);
+ }
+
+ /** sorts the inner vectors in the range [0,outerSize) with respect to `Comp`
+ * \sa innerIndicesAreSorted() */
+ template <class Comp = std::less<>>
+ inline void sortInnerIndices() {
+ Index begin = 0;
+ Index end = derived().outerSize();
+ internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::run(*this, begin, end);
+ }
+
+ /** \returns the index of the first inner vector in the range [0,outerSize) that is not sorted with respect to `Comp`, or `outerSize` if the range is fully sorted
+ * \sa sortInnerIndices() */
+ template<class Comp = std::less<>>
+ inline Index innerIndicesAreSorted() const {
+ Index begin = 0;
+ Index end = derived().outerSize();
+ return internal::inner_sort_impl<Derived, Comp, IsVectorAtCompileTime>::check(*this, begin, end);
+ }
protected:
/** Default constructor. Do nothing. */
@@ -306,6 +343,138 @@
namespace internal {
+// modified from https://artificial-mind.net/blog/2020/11/28/std-sort-multiple-ranges
+template <typename Scalar, typename StorageIndex>
+class CompressedStorageIterator;
+
+// wrapper class analogous to std::pair<StorageIndex&, Scalar&>
+// used to define assignment, swap, and comparison operators for CompressedStorageIterator
+template <typename Scalar, typename StorageIndex>
+class StorageRef
+{
+public:
+ using value_type = std::pair<StorageIndex, Scalar>;
+
+ inline StorageRef& operator=(const StorageRef& other) {
+ *m_innerIndexIterator = *other.m_innerIndexIterator;
+ *m_valueIterator = *other.m_valueIterator;
+ return *this;
+ }
+ inline StorageRef& operator=(const value_type& other) {
+ std::tie(*m_innerIndexIterator, *m_valueIterator) = other;
+ return *this;
+ }
+ inline operator value_type() const { return std::make_pair(*m_innerIndexIterator, *m_valueIterator); }
+ inline friend void swap(const StorageRef& a, const StorageRef& b) {
+ std::iter_swap(a.m_innerIndexIterator, b.m_innerIndexIterator);
+ std::iter_swap(a.m_valueIterator, b.m_valueIterator);
+ }
+
+ inline static const StorageIndex& key(const StorageRef& a) { return *a.m_innerIndexIterator; }
+ inline static const StorageIndex& key(const value_type& a) { return a.first; }
+ #define REF_COMP_REF(OP) inline friend bool operator OP(const StorageRef& a, const StorageRef& b) { return key(a) OP key(b); };
+ #define REF_COMP_VAL(OP) inline friend bool operator OP(const StorageRef& a, const value_type& b) { return key(a) OP key(b); };
+ #define VAL_COMP_REF(OP) inline friend bool operator OP(const value_type& a, const StorageRef& b) { return key(a) OP key(b); };
+ #define MAKE_COMPS(OP) REF_COMP_REF(OP) REF_COMP_VAL(OP) VAL_COMP_REF(OP)
+ MAKE_COMPS(<) MAKE_COMPS(>) MAKE_COMPS(<=) MAKE_COMPS(>=) MAKE_COMPS(==) MAKE_COMPS(!=)
+
+protected:
+ StorageIndex* m_innerIndexIterator;
+ Scalar* m_valueIterator;
+private:
+ StorageRef() = delete;
+ // these constructors are only called by the CompressedStorageIterator constructors for convenience only
+ StorageRef(StorageIndex* innerIndexIterator, Scalar* valueIterator) : m_innerIndexIterator(innerIndexIterator), m_valueIterator(valueIterator) {}
+ StorageRef(const StorageRef& other) : m_innerIndexIterator(other.m_innerIndexIterator), m_valueIterator(other.m_valueIterator) {}
+
+ friend class CompressedStorageIterator<Scalar, StorageIndex>;
+};
+
+// STL-compatible iterator class that operates on inner indices and values
+template<typename Scalar, typename StorageIndex>
+class CompressedStorageIterator
+{
+public:
+ using iterator_category = std::random_access_iterator_tag;
+ using reference = StorageRef<Scalar, StorageIndex>;
+ using difference_type = Index;
+ using value_type = typename reference::value_type;
+ using pointer = value_type*;
+
+ CompressedStorageIterator() = delete;
+ CompressedStorageIterator(difference_type index, StorageIndex* innerIndexPtr, Scalar* valuePtr) : m_index(index), m_data(innerIndexPtr, valuePtr) {}
+ CompressedStorageIterator(difference_type index, reference data) : m_index(index), m_data(data) {}
+ CompressedStorageIterator(const CompressedStorageIterator& other) : m_index(other.m_index), m_data(other.m_data) {}
+ inline CompressedStorageIterator& operator=(const CompressedStorageIterator& other) {
+ m_index = other.m_index;
+ m_data = other.m_data;
+ return *this;
+ }
+
+ inline CompressedStorageIterator operator+(difference_type offset) const { return CompressedStorageIterator(m_index + offset, m_data); }
+ inline CompressedStorageIterator operator-(difference_type offset) const { return CompressedStorageIterator(m_index - offset, m_data); }
+ inline difference_type operator-(const CompressedStorageIterator& other) const { return m_index - other.m_index; }
+ inline CompressedStorageIterator& operator++() { ++m_index; return *this; }
+ inline CompressedStorageIterator& operator--() { --m_index; return *this; }
+ inline CompressedStorageIterator& operator+=(difference_type offset) { m_index += offset; return *this; }
+ inline CompressedStorageIterator& operator-=(difference_type offset) { m_index -= offset; return *this; }
+ inline reference operator*() const { return reference(m_data.m_innerIndexIterator + m_index, m_data.m_valueIterator + m_index); }
+
+ #define MAKE_COMP(OP) inline bool operator OP(const CompressedStorageIterator& other) const { return m_index OP other.m_index; }
+ MAKE_COMP(<) MAKE_COMP(>) MAKE_COMP(>=) MAKE_COMP(<=) MAKE_COMP(!=) MAKE_COMP(==)
+
+protected:
+ difference_type m_index;
+ reference m_data;
+};
+
+template <typename Derived, class Comp, bool IsVector>
+struct inner_sort_impl {
+ typedef typename Derived::Scalar Scalar;
+ typedef typename Derived::StorageIndex StorageIndex;
+ static inline void run(SparseCompressedBase<Derived>& obj, Index begin, Index end) {
+ const bool is_compressed = obj.isCompressed();
+ for (Index outer = begin; outer < end; outer++) {
+ Index begin_offset = obj.outerIndexPtr()[outer];
+ Index end_offset = is_compressed ? obj.outerIndexPtr()[outer + 1] : (begin_offset + obj.innerNonZeroPtr()[outer]);
+ CompressedStorageIterator<Scalar, StorageIndex> begin_it(begin_offset, obj.innerIndexPtr(), obj.valuePtr());
+ CompressedStorageIterator<Scalar, StorageIndex> end_it(end_offset, obj.innerIndexPtr(), obj.valuePtr());
+ std::sort(begin_it, end_it, Comp());
+ }
+ }
+ static inline Index check(const SparseCompressedBase<Derived>& obj, Index begin, Index end) {
+ const bool is_compressed = obj.isCompressed();
+ for (Index outer = begin; outer < end; outer++) {
+ Index begin_offset = obj.outerIndexPtr()[outer];
+ Index end_offset = is_compressed ? obj.outerIndexPtr()[outer + 1] : (begin_offset + obj.innerNonZeroPtr()[outer]);
+ const StorageIndex* begin_it = obj.innerIndexPtr() + begin_offset;
+ const StorageIndex* end_it = obj.innerIndexPtr() + end_offset;
+ bool is_sorted = std::is_sorted(begin_it, end_it, Comp());
+ if (!is_sorted) return outer;
+ }
+ return end;
+ }
+};
+template <typename Derived, class Comp>
+struct inner_sort_impl<Derived, Comp, true> {
+ typedef typename Derived::Scalar Scalar;
+ typedef typename Derived::StorageIndex StorageIndex;
+ static inline void run(SparseCompressedBase<Derived>& obj, Index, Index) {
+ Index begin_offset = 0;
+ Index end_offset = obj.nonZeros();
+ CompressedStorageIterator<Scalar, StorageIndex> begin_it(begin_offset, obj.innerIndexPtr(), obj.valuePtr());
+ CompressedStorageIterator<Scalar, StorageIndex> end_it(end_offset, obj.innerIndexPtr(), obj.valuePtr());
+ std::sort(begin_it, end_it, Comp());
+ }
+ static inline Index check(const SparseCompressedBase<Derived>& obj, Index, Index) {
+ Index begin_offset = 0;
+ Index end_offset = obj.nonZeros();
+ const StorageIndex* begin_it = obj.innerIndexPtr() + begin_offset;
+ const StorageIndex* end_it = obj.innerIndexPtr() + end_offset;
+ return std::is_sorted(begin_it, end_it, Comp()) ? 1 : 0;
+ }
+};
+
template<typename Derived>
struct evaluator<SparseCompressedBase<Derived> >
: evaluator_base<Derived>
diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h
index a74baec..0ee3813 100644
--- a/Eigen/src/SparseCore/SparseMap.h
+++ b/Eigen/src/SparseCore/SparseMap.h
@@ -239,6 +239,7 @@
/** Constructs a read-write Map to a sparse matrix of size \a rows x \a cols, containing \a nnz non-zero coefficients,
* stored as a sparse format as defined by the pointers \a outerIndexPtr, \a innerIndexPtr, and \a valuePtr.
* If the optional parameter \a innerNonZerosPtr is the null pointer, then a standard compressed format is assumed.
+ * The inner indices must be sorted appropriately.
*
* This constructor is available only if \c SparseMatrixType is non-const.
*
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index fdac443..6806812 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -694,6 +694,11 @@
Base::operator=(other);
}
+ inline SparseMatrix(SparseMatrix&& other) : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
+ {
+ *this = other.derived().markAsRValue();
+ }
+
/** Copy constructor (it performs a deep copy) */
inline SparseMatrix(const SparseMatrix& other)
: Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
@@ -742,6 +747,7 @@
internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
m_innerNonZeros = 0;
}
+
inline SparseMatrix& operator=(const SparseMatrix& other)
{
if (other.isRValue())
@@ -767,6 +773,10 @@
return *this;
}
+ inline SparseMatrix& operator=(SparseMatrix&& other) {
+ return *this = other.derived().markAsRValue();
+ }
+
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename OtherDerived>
inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other)
@@ -1496,6 +1506,126 @@
}
+// Specialization for SparseMatrix.
+// Serializes [rows, cols, isCompressed, outerSize, innerBufferSize,
+// innerNonZeros, outerIndices, innerIndices, values].
+template <typename Scalar, int Options, typename StorageIndex>
+class Serializer<SparseMatrix<Scalar, Options, StorageIndex>, void> {
+ public:
+ typedef SparseMatrix<Scalar, Options, StorageIndex> SparseMat;
+
+ struct Header {
+ typename SparseMat::Index rows;
+ typename SparseMat::Index cols;
+ bool compressed;
+ Index outer_size;
+ Index inner_buffer_size;
+ };
+
+ EIGEN_DEVICE_FUNC size_t size(const SparseMat& value) const {
+ // innerNonZeros.
+ std::size_t num_storage_indices = value.isCompressed() ? 0 : value.outerSize();
+ // Outer indices.
+ num_storage_indices += value.outerSize() + 1;
+ // Inner indices.
+ const StorageIndex inner_buffer_size = value.outerIndexPtr()[value.outerSize()];
+ num_storage_indices += inner_buffer_size;
+ // Values.
+ std::size_t num_values = inner_buffer_size;
+ return sizeof(Header) + sizeof(Scalar) * num_values +
+ sizeof(StorageIndex) * num_storage_indices;
+ }
+
+ EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end,
+ const SparseMat& value) {
+ if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+ if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
+
+ const size_t header_bytes = sizeof(Header);
+ Header header = {value.rows(), value.cols(), value.isCompressed(),
+ value.outerSize(), value.outerIndexPtr()[value.outerSize()]};
+ EIGEN_USING_STD(memcpy)
+ memcpy(dest, &header, header_bytes);
+ dest += header_bytes;
+
+ // innerNonZeros.
+ if (!header.compressed) {
+ std::size_t data_bytes = sizeof(StorageIndex) * header.outer_size;
+ memcpy(dest, value.innerNonZeroPtr(), data_bytes);
+ dest += data_bytes;
+ }
+
+ // Outer indices.
+ std::size_t data_bytes = sizeof(StorageIndex) * (header.outer_size + 1);
+ memcpy(dest, value.outerIndexPtr(), data_bytes);
+ dest += data_bytes;
+
+ // Inner indices.
+ data_bytes = sizeof(StorageIndex) * header.inner_buffer_size;
+ memcpy(dest, value.innerIndexPtr(), data_bytes);
+ dest += data_bytes;
+
+ // Values.
+ data_bytes = sizeof(Scalar) * header.inner_buffer_size;
+ memcpy(dest, value.valuePtr(), data_bytes);
+ dest += data_bytes;
+
+ return dest;
+ }
+
+ EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src,
+ const uint8_t* end,
+ SparseMat& value) const {
+ if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+ if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
+
+ const size_t header_bytes = sizeof(Header);
+ Header header;
+ EIGEN_USING_STD(memcpy)
+ memcpy(&header, src, header_bytes);
+ src += header_bytes;
+
+ value.setZero();
+ value.resize(header.rows, header.cols);
+ if (header.compressed) {
+ value.makeCompressed();
+ } else {
+ value.uncompress();
+ }
+
+ // Adjust value ptr size.
+ value.data().resize(header.inner_buffer_size);
+
+ // Initialize compressed state and inner non-zeros.
+ if (!header.compressed) {
+ // Inner non-zero counts.
+ std::size_t data_bytes = sizeof(StorageIndex) * header.outer_size;
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.innerNonZeroPtr(), src, data_bytes);
+ src += data_bytes;
+ }
+
+ // Outer indices.
+ std::size_t data_bytes = sizeof(StorageIndex) * (header.outer_size + 1);
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.outerIndexPtr(), src, data_bytes);
+ src += data_bytes;
+
+ // Inner indices.
+ data_bytes = sizeof(StorageIndex) * header.inner_buffer_size;
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.innerIndexPtr(), src, data_bytes);
+ src += data_bytes;
+
+ // Values.
+ data_bytes = sizeof(Scalar) * header.inner_buffer_size;
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.valuePtr(), src, data_bytes);
+ src += data_bytes;
+ return src;
+ }
+};
+
} // end namespace Eigen
#endif // EIGEN_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index ebbadb5..3b4d7b0 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -495,6 +495,78 @@
}
+// Specialization for SparseVector.
+// Serializes [size, numNonZeros, innerIndices, values].
+template <typename Scalar, int Options, typename StorageIndex>
+class Serializer<SparseVector<Scalar, Options, StorageIndex>, void> {
+ public:
+ typedef SparseVector<Scalar, Options, StorageIndex> SparseMat;
+
+ struct Header {
+ typename SparseMat::Index size;
+ Index num_non_zeros;
+ };
+
+ EIGEN_DEVICE_FUNC size_t size(const SparseMat& value) const {
+ return sizeof(Header) +
+ (sizeof(Scalar) + sizeof(StorageIndex)) * value.nonZeros();
+ }
+
+ EIGEN_DEVICE_FUNC uint8_t* serialize(uint8_t* dest, uint8_t* end,
+ const SparseMat& value) {
+ if (EIGEN_PREDICT_FALSE(dest == nullptr)) return nullptr;
+ if (EIGEN_PREDICT_FALSE(dest + size(value) > end)) return nullptr;
+
+ const size_t header_bytes = sizeof(Header);
+ Header header = {value.innerSize(), value.nonZeros()};
+ EIGEN_USING_STD(memcpy)
+ memcpy(dest, &header, header_bytes);
+ dest += header_bytes;
+
+ // Inner indices.
+ std::size_t data_bytes = sizeof(StorageIndex) * header.num_non_zeros;
+ memcpy(dest, value.innerIndexPtr(), data_bytes);
+ dest += data_bytes;
+
+ // Values.
+ data_bytes = sizeof(Scalar) * header.num_non_zeros;
+ memcpy(dest, value.valuePtr(), data_bytes);
+ dest += data_bytes;
+
+ return dest;
+ }
+
+ EIGEN_DEVICE_FUNC const uint8_t* deserialize(const uint8_t* src,
+ const uint8_t* end,
+ SparseMat& value) const {
+ if (EIGEN_PREDICT_FALSE(src == nullptr)) return nullptr;
+ if (EIGEN_PREDICT_FALSE(src + sizeof(Header) > end)) return nullptr;
+
+ const size_t header_bytes = sizeof(Header);
+ Header header;
+ EIGEN_USING_STD(memcpy)
+ memcpy(&header, src, header_bytes);
+ src += header_bytes;
+
+ value.setZero();
+ value.resize(header.size);
+ value.resizeNonZeros(header.num_non_zeros);
+
+ // Inner indices.
+ std::size_t data_bytes = sizeof(StorageIndex) * header.num_non_zeros;
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.innerIndexPtr(), src, data_bytes);
+ src += data_bytes;
+
+ // Values.
+ data_bytes = sizeof(Scalar) * header.num_non_zeros;
+ if (EIGEN_PREDICT_FALSE(src + data_bytes > end)) return nullptr;
+ memcpy(value.valuePtr(), src, data_bytes);
+ src += data_bytes;
+ return src;
+ }
+};
+
} // end namespace Eigen
#endif // EIGEN_SPARSEVECTOR_H
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 9fd7e25..f70aab1 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -37,9 +37,10 @@
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
- SparseLUTransposeView() : m_sparseLU(NULL) {}
- SparseLUTransposeView(const SparseLUTransposeView& view) {
+ SparseLUTransposeView() : APIBase(), m_sparseLU(NULL) {}
+ SparseLUTransposeView(const SparseLUTransposeView& view) : APIBase() {
this->m_sparseLU = view.m_sparseLU;
+ this->m_isInitialized = view.m_isInitialized;
}
void setIsInitialized(const bool isInitialized) {this->m_isInitialized = isInitialized;}
void setSparseLU(SparseLUType* sparseLU) {m_sparseLU = sparseLU;}
@@ -860,7 +861,6 @@
template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const
{
Index nrhs = X.cols();
- Index n = X.rows();
// Backward solve with U
for (Index k = m_mapL.nsuper(); k >= 0; k--)
{
@@ -880,7 +880,7 @@
{
// FIXME: the following lines should use Block expressions and not Map!
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
- Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X.coeffRef(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+ typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
U = A.template triangularView<Upper>().solve(U);
}
@@ -903,7 +903,6 @@
{
using numext::conj;
Index nrhs = X.cols();
- Index n = X.rows();
// Forward solve with U
for (Index k = 0; k <= m_mapL.nsuper(); k++)
{
@@ -934,7 +933,7 @@
else
{
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
- Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+ typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
if(Conjugate)
U = A.adjoint().template triangularView<Lower>().solve(U);
else
diff --git a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index 0d59a38..adfc63a 100644
--- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -276,9 +276,8 @@
// Triangular solve
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
- Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
- U = A.template triangularView<UnitLower>().solve(U);
-
+ typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
+ U = A.template triangularView<UnitLower>().solve(U);
// Matrix-vector product
new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
work.topRows(nrow).noalias() = A * U;
@@ -351,7 +350,7 @@
// Matrix-vector product with transposed submatrix
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
- Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+ typename Dest::RowsBlockXpr U = X.derived().middleRows(fsupc, nsupc);
if(Conjugate)
U = U - A.adjoint() * work.topRows(nrow);
else
diff --git a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
deleted file mode 100644
index 034d379..0000000
--- a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
-#define EIGEN_SPARSELU_GEMM_KERNEL_H
-
-#include "./InternalHeaderCheck.h"
-
-namespace Eigen {
-
-namespace internal {
-
-
-/** \internal
- * A general matrix-matrix product kernel optimized for the SparseLU factorization.
- * - A, B, and C must be column major
- * - lda and ldc must be multiples of the respective packet size
- * - C must have the same alignment as A
- */
-template<typename Scalar>
-EIGEN_DONT_INLINE
-void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
-{
- using namespace Eigen::internal;
-
- typedef typename packet_traits<Scalar>::type Packet;
- enum {
- NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
- PacketSize = packet_traits<Scalar>::size,
- PM = 8, // peeling in M
- RN = 2, // register blocking
- RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
- BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
- SM = PM*PacketSize // step along M
- };
- Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
- Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
- Index i0 = internal::first_default_aligned(A,m);
-
- eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
-
- // handle the non aligned rows of A and C without any optimization:
- for(Index i=0; i<i0; ++i)
- {
- for(Index j=0; j<n; ++j)
- {
- Scalar c = C[i+j*ldc];
- for(Index k=0; k<d; ++k)
- c += B[k+j*ldb] * A[i+k*lda];
- C[i+j*ldc] = c;
- }
- }
- // process the remaining rows per chunk of BM rows
- for(Index ib=i0; ib<m; ib+=BM)
- {
- Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
- Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
- Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
-
- // Let's process two columns of B-C at once
- for(Index j=0; j<n_end; j+=RN)
- {
- const Scalar* Bc0 = B+(j+0)*ldb;
- const Scalar* Bc1 = B+(j+1)*ldb;
-
- for(Index k=0; k<d_end; k+=RK)
- {
-
- // load and expand a RN x RK block of B
- Packet b00, b10, b20, b30, b01, b11, b21, b31;
- { b00 = pset1<Packet>(Bc0[0]); }
- { b10 = pset1<Packet>(Bc0[1]); }
- if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
- if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
- { b01 = pset1<Packet>(Bc1[0]); }
- { b11 = pset1<Packet>(Bc1[1]); }
- if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
- if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
-
- Packet a0, a1, a2, a3, c0, c1, t0, t1;
-
- const Scalar* A0 = A+ib+(k+0)*lda;
- const Scalar* A1 = A+ib+(k+1)*lda;
- const Scalar* A2 = A+ib+(k+2)*lda;
- const Scalar* A3 = A+ib+(k+3)*lda;
-
- Scalar* C0 = C+ib+(j+0)*ldc;
- Scalar* C1 = C+ib+(j+1)*ldc;
-
- a0 = pload<Packet>(A0);
- a1 = pload<Packet>(A1);
- if(RK==4)
- {
- a2 = pload<Packet>(A2);
- a3 = pload<Packet>(A3);
- }
- else
- {
- // workaround "may be used uninitialized in this function" warning
- a2 = a3 = a0;
- }
-
-#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
-#define WORK(I) \
- c0 = pload<Packet>(C0+i+(I)*PacketSize); \
- c1 = pload<Packet>(C1+i+(I)*PacketSize); \
- KMADD(c0, a0, b00, t0) \
- KMADD(c1, a0, b01, t1) \
- a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
- KMADD(c0, a1, b10, t0) \
- KMADD(c1, a1, b11, t1) \
- a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
- if(RK==4){ KMADD(c0, a2, b20, t0) }\
- if(RK==4){ KMADD(c1, a2, b21, t1) }\
- if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
- if(RK==4){ KMADD(c0, a3, b30, t0) }\
- if(RK==4){ KMADD(c1, a3, b31, t1) }\
- if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
- pstore(C0+i+(I)*PacketSize, c0); \
- pstore(C1+i+(I)*PacketSize, c1)
-
- // process rows of A' - C' with aggressive vectorization and peeling
- for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
- {
- EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
- prefetch((A0+i+(5)*PacketSize));
- prefetch((A1+i+(5)*PacketSize));
- if(RK==4) prefetch((A2+i+(5)*PacketSize));
- if(RK==4) prefetch((A3+i+(5)*PacketSize));
-
- WORK(0);
- WORK(1);
- WORK(2);
- WORK(3);
- WORK(4);
- WORK(5);
- WORK(6);
- WORK(7);
- }
- // process the remaining rows with vectorization only
- for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
- {
- WORK(0);
- }
-#undef WORK
- // process the remaining rows without vectorization
- for(Index i=actual_b_end2; i<actual_b; ++i)
- {
- if(RK==4)
- {
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
- C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
- }
- else
- {
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
- C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
- }
- }
-
- Bc0 += RK;
- Bc1 += RK;
- } // peeled loop on k
- } // peeled loop on the columns j
- // process the last column (we now perform a matrix-vector product)
- if((n-n_end)>0)
- {
- const Scalar* Bc0 = B+(n-1)*ldb;
-
- for(Index k=0; k<d_end; k+=RK)
- {
-
- // load and expand a 1 x RK block of B
- Packet b00, b10, b20, b30;
- b00 = pset1<Packet>(Bc0[0]);
- b10 = pset1<Packet>(Bc0[1]);
- if(RK==4) b20 = pset1<Packet>(Bc0[2]);
- if(RK==4) b30 = pset1<Packet>(Bc0[3]);
-
- Packet a0, a1, a2, a3, c0, t0/*, t1*/;
-
- const Scalar* A0 = A+ib+(k+0)*lda;
- const Scalar* A1 = A+ib+(k+1)*lda;
- const Scalar* A2 = A+ib+(k+2)*lda;
- const Scalar* A3 = A+ib+(k+3)*lda;
-
- Scalar* C0 = C+ib+(n_end)*ldc;
-
- a0 = pload<Packet>(A0);
- a1 = pload<Packet>(A1);
- if(RK==4)
- {
- a2 = pload<Packet>(A2);
- a3 = pload<Packet>(A3);
- }
- else
- {
- // workaround "may be used uninitialized in this function" warning
- a2 = a3 = a0;
- }
-
-#define WORK(I) \
- c0 = pload<Packet>(C0+i+(I)*PacketSize); \
- KMADD(c0, a0, b00, t0) \
- a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
- KMADD(c0, a1, b10, t0) \
- a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
- if(RK==4){ KMADD(c0, a2, b20, t0) }\
- if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
- if(RK==4){ KMADD(c0, a3, b30, t0) }\
- if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
- pstore(C0+i+(I)*PacketSize, c0);
-
- // aggressive vectorization and peeling
- for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
- {
- EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
- WORK(0);
- WORK(1);
- WORK(2);
- WORK(3);
- WORK(4);
- WORK(5);
- WORK(6);
- WORK(7);
- }
- // vectorization only
- for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
- {
- WORK(0);
- }
- // remaining scalars
- for(Index i=actual_b_end2; i<actual_b; ++i)
- {
- if(RK==4)
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
- else
- C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
- }
-
- Bc0 += RK;
-#undef WORK
- }
- }
-
- // process the last columns of A, corresponding to the last rows of B
- Index rd = d-d_end;
- if(rd>0)
- {
- for(Index j=0; j<n; ++j)
- {
- enum {
- Alignment = PacketSize>1 ? Aligned : 0
- };
- typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
- typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
- if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
-
- else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
- + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
-
- else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
- + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
- + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
- }
- }
-
- } // blocking on the rows of A and C
-}
-#undef KMADD
-
-} // namespace internal
-
-} // namespace Eigen
-
-#endif // EIGEN_SPARSELU_GEMM_KERNEL_H
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 424f93c..831b938 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -71,8 +71,7 @@
Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
- l.setZero();
- internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
+ l.noalias() = B * u;
// Scatter tempv[] into SPA dense[] as a temporary storage
isub = lptr + no_zeros;
diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
index 8cd331a..fd1243d 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -150,8 +150,7 @@
Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
- L.setZero();
- internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
+ L.noalias() = B * U;
// scatter U and L
u_col = 0;
diff --git a/Eigen/src/misc/lapacke_helpers.h b/Eigen/src/misc/lapacke_helpers.h
index e5d7ca0..b6ad6e8 100644
--- a/Eigen/src/misc/lapacke_helpers.h
+++ b/Eigen/src/misc/lapacke_helpers.h
@@ -150,6 +150,7 @@
EIGEN_MAKE_LAPACKE_WRAPPER(potrf)
EIGEN_MAKE_LAPACKE_WRAPPER(getrf)
EIGEN_MAKE_LAPACKE_WRAPPER(geqrf)
+EIGEN_MAKE_LAPACKE_WRAPPER(gesdd)
#undef EIGEN_MAKE_LAPACKE_WRAPPER
}
diff --git a/cmake/EigenSmokeTestList.cmake b/cmake/EigenSmokeTestList.cmake
index 498d529..db7d3ff 100644
--- a/cmake/EigenSmokeTestList.cmake
+++ b/cmake/EigenSmokeTestList.cmake
@@ -103,6 +103,7 @@
sizeof
sizeoverflow
smallvectors
+ sparse_basic_1
sparse_basic_3
sparse_block_1
sparse_extra_4
@@ -132,4 +133,4 @@
unalignedcount
vectorwiseop_1
visitor_1
- vectorization_logic_1)
\ No newline at end of file
+ vectorization_logic_1)
diff --git a/doc/QuickReference.dox b/doc/QuickReference.dox
index c5dfce4..e96b617 100644
--- a/doc/QuickReference.dox
+++ b/doc/QuickReference.dox
@@ -367,7 +367,8 @@
<tr class="alt"><td>
\link MatrixBase::cross() cross product \endlink \matrixworld</td><td>\code
#include <Eigen/Geometry>
-vec3 = vec1.cross(vec2);\endcode</td></tr>
+v3c = v3a.cross(v3b); // size-3 vectors
+scalar = v2a.cross(v2b); // size-2 vectors \endcode</td></tr>
</table>
<a href="#" class="top">top</a>
diff --git a/doc/TutorialMatrixArithmetic.dox b/doc/TutorialMatrixArithmetic.dox
index 5fc569a..53916c2 100644
--- a/doc/TutorialMatrixArithmetic.dox
+++ b/doc/TutorialMatrixArithmetic.dox
@@ -158,7 +158,7 @@
\verbinclude tut_arithmetic_dot_cross.out
</td></tr></table>
-Remember that cross product is only for vectors of size 3. Dot product is for vectors of any sizes.
+Cross product is defined in Eigen not only for vectors of size 3 but also for those of size 2, check \link MatrixBase::cross() the doc\endlink for details. Dot product is for vectors of any sizes.
When using complex numbers, Eigen's dot product is conjugate-linear in the first variable and linear in the
second variable.
diff --git a/doc/UsingBlasLapackBackends.dox b/doc/UsingBlasLapackBackends.dox
index 4572820..c700d85 100644
--- a/doc/UsingBlasLapackBackends.dox
+++ b/doc/UsingBlasLapackBackends.dox
@@ -106,6 +106,12 @@
\endcode</td><td>\code
?gesvd
\endcode</td></tr>
+<tr class="alt"><td>Singular value decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT </td><td>\code
+BDCSVD<MatrixXd> svd;
+svd.compute(m1);
+\endcode</td><td>\code
+?gesdd
+\endcode</td></tr>
<tr><td>Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT </td><td>\code
EigenSolver<MatrixXd> es(m1);
ComplexEigenSolver<MatrixXcd> ces(m1);
diff --git a/doc/examples/tut_arithmetic_dot_cross.cpp b/doc/examples/tut_arithmetic_dot_cross.cpp
index 5b0fd1e..d95e03c 100644
--- a/doc/examples/tut_arithmetic_dot_cross.cpp
+++ b/doc/examples/tut_arithmetic_dot_cross.cpp
@@ -9,5 +9,10 @@
std::cout << "Dot product: " << v.dot(w) << std::endl;
double dp = v.adjoint()*w; // automatic conversion of the inner product to a scalar
std::cout << "Dot product via a matrix product: " << dp << std::endl;
+
std::cout << "Cross product:\n" << v.cross(w) << std::endl;
+ Eigen::Vector2d v2(1,2);
+ Eigen::Vector2d w2(0,1);
+ double cp = v2.cross(w2); // returning a scalar between size-2 vectors
+ std::cout << "Cross product for 2D vectors: " << cp << std::endl;
}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8ffa002..223a9f1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -185,6 +185,7 @@
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
ei_add_test(vectorization_logic)
ei_add_test(basicstuff)
+ei_add_test(constexpr)
ei_add_test(constructor)
ei_add_test(linearstructure)
ei_add_test(integer_types)
diff --git a/test/adjoint.cpp b/test/adjoint.cpp
index 210a4d92..37d23b1 100644
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@@ -63,10 +63,10 @@
};
template<typename MatrixType, typename Scalar = typename MatrixType::Scalar>
-MatrixType RandomMatrix(int rows, int cols, Scalar min, Scalar max) {
+MatrixType RandomMatrix(Index rows, Index cols, Scalar min, Scalar max) {
MatrixType M = MatrixType(rows, cols);
- for (int i=0; i<rows; ++i) {
- for (int j=0; j<cols; ++j) {
+ for (Index i=0; i<rows; ++i) {
+ for (Index j=0; j<cols; ++j) {
M(i, j) = Eigen::internal::random<Scalar>(min, max);
}
}
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index a7e0ff4..af8a1ef 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -10,7 +10,18 @@
#include <vector>
#include "main.h"
-template <typename Scalar>
+template <typename Scalar, std::enable_if_t<NumTraits<Scalar>::IsInteger,int> = 0>
+std::vector<Scalar> special_values() {
+ const Scalar zero = Scalar(0);
+ const Scalar one = Scalar(1);
+ const Scalar two = Scalar(2);
+ const Scalar three = Scalar(3);
+ const Scalar min = (std::numeric_limits<Scalar>::min)();
+ const Scalar max = (std::numeric_limits<Scalar>::max)();
+ return { zero, min, one, two, three, max };
+}
+
+template <typename Scalar, std::enable_if_t<!NumTraits<Scalar>::IsInteger, int> = 0>
std::vector<Scalar> special_values() {
const Scalar zero = Scalar(0);
const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
@@ -25,30 +36,29 @@
const Scalar min = (std::numeric_limits<Scalar>::min)();
const Scalar max = (std::numeric_limits<Scalar>::max)();
const Scalar max_exp = (static_cast<Scalar>(int(Eigen::NumTraits<Scalar>::max_exponent())) * Scalar(EIGEN_LN2)) / eps;
-
- return {zero, denorm_min, min, eps, sqrt_half, one, sqrt2, two, three, max_exp, max, inf, nan};
+ return { zero, denorm_min, min, eps, sqrt_half, one, sqrt2, two, three, max_exp, max, inf, nan };
}
template<typename Scalar>
void special_value_pairs(Array<Scalar, Dynamic, Dynamic>& x,
Array<Scalar, Dynamic, Dynamic>& y) {
std::vector<Scalar> abs_vals = special_values<Scalar>();
- const int abs_cases = abs_vals.size();
- const int num_cases = 2*abs_cases * 2*abs_cases;
+ const Index abs_cases = (Index)abs_vals.size();
+ const Index num_cases = 2*abs_cases * 2*abs_cases;
// ensure both vectorized and non-vectorized paths taken
- const int num_repeats = 2 * internal::packet_traits<Scalar>::size + 1;
+ const Index num_repeats = 2 * (Index)internal::packet_traits<Scalar>::size + 1;
x.resize(num_repeats, num_cases);
y.resize(num_repeats, num_cases);
int count = 0;
- for (int i = 0; i < abs_cases; ++i) {
+ for (Index i = 0; i < abs_cases; ++i) {
const Scalar abs_x = abs_vals[i];
- for (int sign_x = 0; sign_x < 2; ++sign_x) {
+ for (Index sign_x = 0; sign_x < 2; ++sign_x) {
Scalar x_case = sign_x == 0 ? -abs_x : abs_x;
- for (int j = 0; j < abs_cases; ++j) {
+ for (Index j = 0; j < abs_cases; ++j) {
const Scalar abs_y = abs_vals[j];
- for (int sign_y = 0; sign_y < 2; ++sign_y) {
+ for (Index sign_y = 0; sign_y < 2; ++sign_y) {
Scalar y_case = sign_y == 0 ? -abs_y : abs_y;
- for (int repeat = 0; repeat < num_repeats; ++repeat) {
+ for (Index repeat = 0; repeat < num_repeats; ++repeat) {
x(repeat, count) = x_case;
y(repeat, count) = y_case;
}
@@ -68,8 +78,8 @@
Array<Scalar, Dynamic, Dynamic> actual = fun(x, y);
bool all_pass = true;
- for (int i = 0; i < x.rows(); ++i) {
- for (int j = 0; j < x.cols(); ++j) {
+ for (Index i = 0; i < x.rows(); ++i) {
+ for (Index j = 0; j < x.cols(); ++j) {
Scalar e = static_cast<Scalar>(ref(x(i,j), y(i,j)));
Scalar a = actual(i, j);
bool success = (a==e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
@@ -98,7 +108,7 @@
const Scalar tol = test_precision<Scalar>();
std::vector<Scalar> abs_vals = special_values<Scalar>();
- const int num_vals = abs_vals.size();
+ const Index num_vals = (Index)abs_vals.size();
Map<Array<Scalar, Dynamic, 1>> bases(abs_vals.data(), num_vals);
bool all_pass = true;
@@ -110,7 +120,7 @@
if (exponent_is_integer) {
Int_t exponent_as_int = static_cast<Int_t>(exponent);
Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent_as_int);
- for (int j = 0; j < num_vals; j++) {
+ for (Index j = 0; j < num_vals; j++) {
Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
Scalar a = eigenPow(j);
bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
@@ -123,7 +133,7 @@
} else {
// test floating point exponent code path
Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent);
- for (int j = 0; j < num_vals; j++) {
+ for (Index j = 0; j < num_vals; j++) {
Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
Scalar a = eigenPow(j);
bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
@@ -219,7 +229,7 @@
for (Exponent exponent = min_exponent; exponent < max_exponent; ++exponent) {
test_exponent<Base, Exponent>(exponent);
}
-};
+}
void mixed_pow_test() {
// The following cases will test promoting a smaller exponent type
@@ -260,6 +270,63 @@
unary_pow_test<long long, int>();
}
+namespace Eigen {
+namespace internal {
+template <typename Scalar>
+struct test_signbit_op {
+ Scalar constexpr operator()(const Scalar& a) const { return numext::signbit(a); }
+ template <typename Packet>
+ inline Packet packetOp(const Packet& a) const {
+ return psignbit(a);
+ }
+};
+template <typename Scalar>
+struct functor_traits<test_signbit_op<Scalar>> {
+ enum { Cost = 1, PacketAccess = true }; //todo: define HasSignbit flag
+};
+} // namespace internal
+} // namespace Eigen
+
+
+template <typename Scalar>
+void signbit_test() {
+ const size_t size = 100 * internal::packet_traits<Scalar>::size;
+ ArrayX<Scalar> x(size), y(size);
+ x.setRandom();
+ std::vector<Scalar> special_vals = special_values<Scalar>();
+ for (size_t i = 0; i < special_vals.size(); i++) {
+ x(2 * i + 0) = special_vals[i];
+ x(2 * i + 1) = -special_vals[i];
+ }
+ y = x.unaryExpr(internal::test_signbit_op<Scalar>());
+
+ bool all_pass = true;
+ for (size_t i = 0; i < size; i++) {
+ const Scalar ref_val = numext::signbit(x(i));
+ bool not_same = internal::predux_any(internal::bitwise_helper<Scalar>::bitwise_xor(ref_val, y(i)));
+ if (not_same) std::cout << "signbit(" << x(i) << ") != " << y(i) << "\n";
+ all_pass = all_pass && !not_same;
+ }
+
+ VERIFY(all_pass);
+}
+void signbit_tests() {
+ signbit_test<float>();
+ signbit_test<double>();
+ signbit_test<Eigen::half>();
+ signbit_test<Eigen::bfloat16>();
+
+ signbit_test<uint8_t>();
+ signbit_test<uint16_t>();
+ signbit_test<uint32_t>();
+ signbit_test<uint64_t>();
+
+ signbit_test<int8_t>();
+ signbit_test<int16_t>();
+ signbit_test<int32_t>();
+ signbit_test<int64_t>();
+}
+
template<typename ArrayType> void array(const ArrayType& m)
{
typedef typename ArrayType::Scalar Scalar;
@@ -855,6 +922,35 @@
VERIFY( (m2 == m1.unaryExpr(arithmetic_shift_right<9>())).all() );
}
+template <typename ArrayType>
+struct signed_shift_test_impl {
+ typedef typename ArrayType::Scalar Scalar;
+ static constexpr size_t Size = sizeof(Scalar);
+ static constexpr size_t MaxShift = (CHAR_BIT * Size) - 1;
+
+ template <size_t N = 0>
+ static inline std::enable_if_t<(N > MaxShift), void> run(const ArrayType& ) {}
+ template <size_t N = 0>
+ static inline std::enable_if_t<(N <= MaxShift), void> run(const ArrayType& m) {
+ const Index rows = m.rows();
+ const Index cols = m.cols();
+
+ ArrayType m1 = ArrayType::Random(rows, cols), m2(rows, cols);
+
+ m2 = m1.unaryExpr([](const Scalar& x) { return x >> N; });
+ VERIFY((m2 == m1.unaryExpr(internal::scalar_shift_right_op<Scalar, N>())).all());
+
+ m2 = m1.unaryExpr([](const Scalar& x) { return x << N; });
+ VERIFY((m2 == m1.unaryExpr( internal::scalar_shift_left_op<Scalar, N>())).all());
+
+ run<N + 1>(m);
+ }
+};
+template <typename ArrayType>
+void signed_shift_test(const ArrayType& m) {
+ signed_shift_test_impl<ArrayType>::run(m);
+}
+
EIGEN_DECLARE_TEST(array_cwise)
{
for(int i = 0; i < g_repeat; i++) {
@@ -867,6 +963,9 @@
CALL_SUBTEST_6( array(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_6( array_integer(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_6( array_integer(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+ CALL_SUBTEST_7( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+ CALL_SUBTEST_7( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( comparisons(Array<float, 1, 1>()) );
@@ -897,6 +996,7 @@
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_6( int_pow_test() );
CALL_SUBTEST_7( mixed_pow_test() );
+ CALL_SUBTEST_8( signbit_tests() );
}
VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
diff --git a/test/constexpr.cpp b/test/constexpr.cpp
new file mode 100644
index 0000000..b8f0b09
--- /dev/null
+++ b/test/constexpr.cpp
@@ -0,0 +1,52 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Alex Richardson <alexrichardson@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+EIGEN_DECLARE_TEST(constexpr) {
+ // Clang accepts (some of) this code when using C++14/C++17, but GCC does not like
+ // the fact that `T array[Size]` inside Eigen::internal::plain_array is not initialized
+ // until after the constructor returns:
+ // error: member ‘Eigen::internal::plain_array<int, 9, 0, 0>::array’ must be initialized by mem-initializer in
+ // ‘constexpr’ constructor
+#if EIGEN_COMP_CXXVER >= 20
+ constexpr Matrix3i mat({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+ VERIFY_IS_EQUAL(mat.size(), 9);
+ VERIFY_IS_EQUAL(mat(0, 0), 1);
+ static_assert(mat.coeff(0,1) == 2);
+ constexpr Array33i arr({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+ VERIFY_IS_EQUAL(arr(0, 0), 1);
+ VERIFY_IS_EQUAL(arr.size(), 9);
+ static_assert(arr.coeff(0,1) == 2);
+ // Also check dynamic size arrays/matrices with fixed-size storage (currently
+ // only works if all elements are initialized, since otherwise the compiler
+ // complains about uninitialized trailing elements.
+ constexpr Matrix<int, Eigen::Dynamic, Eigen::Dynamic, 0, 3, 3> dyn_mat({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+ VERIFY_IS_EQUAL(dyn_mat.size(), 9);
+ VERIFY_IS_EQUAL(dyn_mat(0, 0), 1);
+ static_assert(dyn_mat.coeff(0,1) == 2);
+ constexpr Array<int, Eigen::Dynamic, Eigen::Dynamic, 0, 3, 3> dyn_arr({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+ VERIFY_IS_EQUAL(dyn_arr(0, 0), 1);
+ VERIFY_IS_EQUAL(dyn_arr.size(), 9);
+ static_assert(dyn_arr.coeff(0,1) == 2);
+#endif // EIGEN_COMP_CXXVER >= 20
+}
+
+// Check that we can use the std::initializer_list constructor for constexpr variables.
+#if EIGEN_COMP_CXXVER >= 20
+// EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT() will fail constexpr evaluation unless
+// we have std::is_constant_evaluated().
+constexpr Matrix<int, 2, 2> global_mat({{1, 2}, {3, 4}});
+
+EIGEN_DECLARE_TEST(constexpr_global) {
+ VERIFY_IS_EQUAL(global_mat.size(), 4);
+ VERIFY_IS_EQUAL(global_mat(0, 0), 1);
+ static_assert(global_mat.coeff(0,0) == 1);
+}
+#endif // EIGEN_COMP_CXXVER >= 20
diff --git a/test/geo_orthomethods.cpp b/test/geo_orthomethods.cpp
index 5f7ddb9..64b3927 100644
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@@ -78,6 +78,36 @@
VERIFY_IS_APPROX(v2.cross(v1), rv1.cross(v1));
}
+template<typename Scalar> void orthomethods_2()
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Matrix<Scalar,2,1> Vector2;
+ typedef Matrix<Scalar,3,1> Vector3;
+
+ Vector3 v30 = Vector3::Random(),
+ v31 = Vector3::Random();
+ Vector2 v20 = v30.template head<2>();
+ Vector2 v21 = v31.template head<2>();
+
+ VERIFY_IS_MUCH_SMALLER_THAN(v20.cross(v20), Scalar(1));
+ VERIFY_IS_MUCH_SMALLER_THAN(v21.cross(v21), Scalar(1));
+ VERIFY_IS_APPROX(v20.cross(v21), v30.cross(v31).z());
+
+ Vector2 v20Rot90(numext::conj(-v20.y()), numext::conj(v20.x()));
+ VERIFY_IS_APPROX(v20.cross( v20Rot90), v20.squaredNorm());
+ VERIFY_IS_APPROX(v20.cross(-v20Rot90), -v20.squaredNorm());
+ Vector2 v21Rot90(numext::conj(-v21.y()), numext::conj(v21.x()));
+ VERIFY_IS_APPROX(v21.cross( v21Rot90), v21.squaredNorm());
+ VERIFY_IS_APPROX(v21.cross(-v21Rot90), -v21.squaredNorm());
+
+ // check mixed product
+ typedef Matrix<RealScalar, 2, 1> RealVector2;
+ RealVector2 rv21 = RealVector2::Random();
+ v21 = rv21.template cast<Scalar>();
+ VERIFY_IS_APPROX(v20.cross(v21), v20.cross(rv21));
+ VERIFY_IS_APPROX(v21.cross(v20), rv21.cross(v20));
+}
+
template<typename Scalar, int Size> void orthomethods(int size=Size)
{
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -119,6 +149,9 @@
EIGEN_DECLARE_TEST(geo_orthomethods)
{
for(int i = 0; i < g_repeat; i++) {
+ CALL_SUBTEST_1( orthomethods_2<float>() );
+ CALL_SUBTEST_2( orthomethods_2<double>() );
+ CALL_SUBTEST_4( orthomethods_2<std::complex<double> >() );
CALL_SUBTEST_1( orthomethods_3<float>() );
CALL_SUBTEST_2( orthomethods_3<double>() );
CALL_SUBTEST_4( orthomethods_3<std::complex<double> >() );
diff --git a/test/main.h b/test/main.h
index 884b6c3..a52da9e 100644
--- a/test/main.h
+++ b/test/main.h
@@ -670,6 +670,12 @@
return true;
}
+template <typename Derived1, typename Derived2>
+bool test_isCwiseApprox(const SparseMatrixBase<Derived1>& m1,
+ const SparseMatrixBase<Derived2>& m2, bool exact) {
+ return test_isCwiseApprox(m1.toDense(), m2.toDense(), exact);
+}
+
template<typename T, typename U>
bool test_is_equal(const T& actual, const U& expected, bool expect_equal)
{
diff --git a/test/nestbyvalue.cpp b/test/nestbyvalue.cpp
index 3a86bea..c25f0bf 100644
--- a/test/nestbyvalue.cpp
+++ b/test/nestbyvalue.cpp
@@ -33,5 +33,7 @@
MatrixXd b = x;
VERIFY_IS_EQUAL(nb_temporaries,6+1);
VERIFY_IS_APPROX(b, a.rowwise().reverse().eval() + (a+a).eval());
+ // Block expressions work with dense NestByValue.
+ VERIFY_IS_APPROX(b, a.nestByValue().rowwise().reverse().eval() + (a.nestByValue()+a.nestByValue()).eval());
}
}
diff --git a/test/nullary.cpp b/test/nullary.cpp
index 2c4d938..e524837 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -78,8 +78,9 @@
const Scalar step = ((size == 1) ? 1 : (high-low)/RealScalar(size-1));
// check whether the result yields what we expect it to do
- VectorType m(base);
+ VectorType m(base), o(base);
m.setLinSpaced(size,low,high);
+ o.setEqualSpaced(size, low, step);
if(!NumTraits<Scalar>::IsInteger)
{
@@ -87,6 +88,7 @@
for (int i=0; i<size; ++i)
n(i) = low+RealScalar(i)*step;
VERIFY_IS_APPROX(m,n);
+ VERIFY_IS_APPROX(n,o);
CALL_SUBTEST( check_extremity_accuracy(m, low, high) );
}
@@ -256,11 +258,12 @@
{
// Check possible overflow issue
int n = 60000;
- ArrayXi a1(n), a2(n);
- a1.setLinSpaced(n, 0, n-1);
- for(int i=0; i<n; ++i)
- a2(i) = i;
- VERIFY_IS_APPROX(a1,a2);
+ ArrayXi a1(n), a2(n), a_ref(n);
+ a1.setLinSpaced(n, 0, n - 1);
+ a2.setEqualSpaced(n, 0, 1);
+ for (int i = 0; i < n; ++i) a_ref(i) = i;
+ VERIFY_IS_APPROX(a1, a_ref);
+ VERIFY_IS_APPROX(a2, a_ref);
}
template<int>
diff --git a/test/numext.cpp b/test/numext.cpp
index ee879c9..e99eddc 100644
--- a/test/numext.cpp
+++ b/test/numext.cpp
@@ -239,6 +239,63 @@
check_rsqrt_impl<T>::run();
}
+
+template <typename T>
+struct check_signbit_impl {
+ static void run() {
+ T true_mask;
+ std::memset(static_cast<void*>(&true_mask), 0xff, sizeof(T));
+ T false_mask;
+ std::memset(static_cast<void*>(&false_mask), 0x00, sizeof(T));
+
+ std::vector<T> negative_values;
+ std::vector<T> non_negative_values;
+
+ if (NumTraits<T>::IsInteger) {
+ negative_values = {static_cast<T>(-1),
+ static_cast<T>(NumTraits<T>::lowest())};
+ non_negative_values = {static_cast<T>(0), static_cast<T>(1),
+ static_cast<T>(NumTraits<T>::highest())};
+ } else {
+ // has sign bit
+ const T neg_zero = static_cast<T>(-0.0);
+ const T neg_one = static_cast<T>(-1.0);
+ const T neg_inf = -std::numeric_limits<T>::infinity();
+ const T neg_nan = -std::numeric_limits<T>::quiet_NaN();
+ // does not have sign bit
+ const T pos_zero = static_cast<T>(0.0);
+ const T pos_one = static_cast<T>(1.0);
+ const T pos_inf = std::numeric_limits<T>::infinity();
+ const T pos_nan = std::numeric_limits<T>::quiet_NaN();
+ negative_values = {neg_zero, neg_one, neg_inf, neg_nan};
+ non_negative_values = {pos_zero, pos_one, pos_inf, pos_nan};
+ }
+
+
+ auto check_all = [](auto values, auto expected) {
+ bool all_pass = true;
+ for (T val : values) {
+ const T numext_val = numext::signbit(val);
+ bool not_same = internal::predux_any(
+ internal::bitwise_helper<T>::bitwise_xor(expected, numext_val));
+ all_pass = all_pass && !not_same;
+ if (not_same)
+ std::cout << "signbit(" << val << ") = " << numext_val
+ << " != " << expected << std::endl;
+ }
+ return all_pass;
+ };
+
+ bool all_pass = check_all(non_negative_values, false_mask);
+ all_pass = all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask));
+ VERIFY(all_pass);
+ }
+};
+template <typename T>
+void check_signbit() {
+ check_signbit_impl<T>::run();
+}
+
EIGEN_DECLARE_TEST(numext) {
for(int k=0; k<g_repeat; ++k)
{
@@ -266,10 +323,25 @@
CALL_SUBTEST( check_sqrt<double>() );
CALL_SUBTEST( check_sqrt<std::complex<float> >() );
CALL_SUBTEST( check_sqrt<std::complex<double> >() );
-
+
CALL_SUBTEST( check_rsqrt<float>() );
CALL_SUBTEST( check_rsqrt<double>() );
CALL_SUBTEST( check_rsqrt<std::complex<float> >() );
CALL_SUBTEST( check_rsqrt<std::complex<double> >() );
+
+ CALL_SUBTEST( check_signbit<half>());
+ CALL_SUBTEST( check_signbit<bfloat16>());
+ CALL_SUBTEST( check_signbit<float>());
+ CALL_SUBTEST( check_signbit<double>());
+
+ CALL_SUBTEST( check_signbit<uint8_t>());
+ CALL_SUBTEST( check_signbit<uint16_t>());
+ CALL_SUBTEST( check_signbit<uint32_t>());
+ CALL_SUBTEST( check_signbit<uint64_t>());
+
+ CALL_SUBTEST( check_signbit<int8_t>());
+ CALL_SUBTEST( check_signbit<int16_t>());
+ CALL_SUBTEST( check_signbit<int32_t>());
+ CALL_SUBTEST( check_signbit<int64_t>());
}
}
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index d7c7c9c..f082985 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -790,9 +790,19 @@
const int PacketSize = internal::unpacket_traits<Packet>::size;
const int size = PacketSize * 4;
- EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
- EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
- EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
+ EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4] = {};
+ EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4] = {};
+ EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4] = {};
+
+ // Negate with -0.
+ if (PacketTraits::HasNegate) {
+ test::packet_helper<PacketTraits::HasNegate,Packet> h;
+ data1[0] = Scalar{-0};
+ h.store(data2, internal::pnegate(h.load(data1)));
+ typedef typename internal::make_unsigned<typename internal::make_integer<Scalar>::type>::type Bits;
+ Bits bits = numext::bit_cast<Bits>(data2[0]);
+ VERIFY_IS_EQUAL(bits, static_cast<Bits>(Bits(1)<<(sizeof(Scalar)*CHAR_BIT - 1)));
+ }
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
diff --git a/test/product.h b/test/product.h
index a4d8591..b0ce06d 100644
--- a/test/product.h
+++ b/test/product.h
@@ -17,6 +17,19 @@
* (std::max)(m1.cwiseAbs2().maxCoeff(), m2.cwiseAbs2().maxCoeff()));
}
+// Allow specifying tolerance for verifying error.
+template<typename Type1, typename Type2, typename Tol>
+inline bool verifyIsApprox(const Type1& a, const Type2& b, Tol tol)
+{
+ bool ret = a.isApprox(b, tol);
+ if(!ret)
+ {
+ std::cerr << "Difference too large wrt tolerance " << tol << ", relative error is: " << test_relative_error(a,b) << std::endl;
+ }
+ return ret;
+}
+
+
template <typename LhsType, typename RhsType>
std::enable_if_t<RhsType::SizeAtCompileTime==Dynamic,void>
check_mismatched_product(LhsType& lhs, const RhsType& rhs) {
@@ -34,6 +47,7 @@
Identity.h Product.h
*/
typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> RowVectorType;
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> ColVectorType;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
@@ -41,6 +55,11 @@
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
MatrixType::Flags&RowMajorBit?ColMajor:RowMajor> OtherMajorMatrixType;
+ // Wwe want a tighter epsilon for not-approx tests. Otherwise, for certain
+ // low-precision types (e.g. bfloat16), the bound ends up being relatively large
+ // (e.g. 0.12), causing flaky tests.
+ RealScalar not_approx_epsilon = RealScalar(0.1) * NumTraits<RealScalar>::dummy_precision();
+
Index rows = m.rows();
Index cols = m.cols();
@@ -68,7 +87,11 @@
// begin testing Product.h: only associativity for now
// (we use Transpose.h but this doesn't count as a test for it)
- VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2));
+ {
+ // Increase tolerance, since coefficients here can get relatively large.
+ RealScalar tol = RealScalar(2) * get_test_precision(m1);
+ VERIFY(verifyIsApprox((m1*m1.transpose())*m2, m1*(m1.transpose()*m2), tol));
+ }
m3 = m1;
m3 *= m1.transpose() * m2;
VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2));
@@ -96,7 +119,7 @@
// (we use the more accurate default epsilon)
if (!NumTraits<Scalar>::IsInteger && (std::min)(rows,cols)>1)
{
- VERIFY(areNotApprox(m1.transpose()*m2,m2.transpose()*m1));
+ VERIFY(areNotApprox(m1.transpose()*m2,m2.transpose()*m1, not_approx_epsilon));
}
// test optimized operator+= path
@@ -105,7 +128,7 @@
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
if (!NumTraits<Scalar>::IsInteger && (std::min)(rows,cols)>1)
{
- VERIFY(areNotApprox(res,square + m2 * m1.transpose()));
+ VERIFY(areNotApprox(res,square + m2 * m1.transpose(), not_approx_epsilon));
}
vcres = vc2;
vcres.noalias() += m1.transpose() * v1;
@@ -117,7 +140,7 @@
VERIFY_IS_APPROX(res, square - (m1 * m2.transpose()));
if (!NumTraits<Scalar>::IsInteger && (std::min)(rows,cols)>1)
{
- VERIFY(areNotApprox(res,square - m2 * m1.transpose()));
+ VERIFY(areNotApprox(res,square - m2 * m1.transpose(), not_approx_epsilon));
}
vcres = vc2;
vcres.noalias() -= m1.transpose() * v1;
@@ -138,7 +161,7 @@
res.noalias() = square + m1 * m2.transpose();
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
res.noalias() += square + m1 * m2.transpose();
- VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose()));
+ VERIFY_IS_APPROX(res, Scalar(2)*(square + m1 * m2.transpose()));
res.noalias() -= square + m1 * m2.transpose();
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
@@ -146,7 +169,7 @@
res.noalias() = square - m1 * m2.transpose();
VERIFY_IS_APPROX(res, square - m1 * m2.transpose());
res.noalias() += square - m1 * m2.transpose();
- VERIFY_IS_APPROX(res, 2*(square - m1 * m2.transpose()));
+ VERIFY_IS_APPROX(res, Scalar(2)*(square - m1 * m2.transpose()));
res.noalias() -= square - m1 * m2.transpose();
VERIFY_IS_APPROX(res, square - m1 * m2.transpose());
@@ -169,7 +192,7 @@
VERIFY_IS_APPROX(res2, square2 + m1.transpose() * m2);
if (!NumTraits<Scalar>::IsInteger && (std::min)(rows,cols)>1)
{
- VERIFY(areNotApprox(res2,square2 + m2.transpose() * m1));
+ VERIFY(areNotApprox(res2,square2 + m2.transpose() * m1, not_approx_epsilon));
}
VERIFY_IS_APPROX(res.col(r).noalias() = square.adjoint() * square.col(r), (square.adjoint() * square.col(r)).eval());
@@ -247,10 +270,12 @@
// regression for blas_trais
{
- VERIFY_IS_APPROX(square * (square*square).transpose(), square * square.transpose() * square.transpose());
- VERIFY_IS_APPROX(square * (-(square*square)), -square * square * square);
- VERIFY_IS_APPROX(square * (s1*(square*square)), s1 * square * square * square);
- VERIFY_IS_APPROX(square * (square*square).conjugate(), square * square.conjugate() * square.conjugate());
+ // Increase test tolerance, since coefficients can get relatively large.
+ RealScalar tol = RealScalar(2) * get_test_precision(square);
+ VERIFY(verifyIsApprox(square * (square*square).transpose(), square * square.transpose() * square.transpose(), tol));
+ VERIFY(verifyIsApprox(square * (-(square*square)), -square * square * square, tol));
+ VERIFY(verifyIsApprox(square * (s1*(square*square)), s1 * square * square * square, tol));
+ VERIFY(verifyIsApprox(square * (square*square).conjugate(), square * square.conjugate() * square.conjugate(), tol));
}
// destination with a non-default inner-stride
diff --git a/test/product_large.cpp b/test/product_large.cpp
index 3d0204b..8a9d7f8 100644
--- a/test/product_large.cpp
+++ b/test/product_large.cpp
@@ -117,6 +117,7 @@
CALL_SUBTEST_8( product(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_9( product(Matrix<std::complex<float>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_10( product(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+ CALL_SUBTEST_11( product(Matrix<bfloat16,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
}
CALL_SUBTEST_6( product_large_regressions<0>() );
diff --git a/test/product_small.cpp b/test/product_small.cpp
index c1d9943..a3a04dc 100644
--- a/test/product_small.cpp
+++ b/test/product_small.cpp
@@ -281,6 +281,25 @@
}
}
+template<typename T>
+void product_sweep(int max_m, int max_k, int max_n) {
+ using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
+ for (int m = 1; m < max_m; ++m) {
+ for (int n = 1; n < max_n; ++n) {
+ Matrix C = Matrix::Zero(m, n);
+ Matrix Cref = Matrix::Zero(m, n);
+ for (int k = 1; k < max_k; ++k) {
+ Matrix A = Matrix::Random(m, k);
+ Matrix B = Matrix::Random(k, n);
+ C = A * B;
+ Cref.setZero();
+ ref_prod(Cref, A, B);
+ VERIFY_IS_APPROX(C, Cref);
+ }
+ }
+ }
+}
+
EIGEN_DECLARE_TEST(product_small)
{
for(int i = 0; i < g_repeat; i++) {
@@ -290,6 +309,7 @@
CALL_SUBTEST_3( product(Matrix3d()) );
CALL_SUBTEST_4( product(Matrix4d()) );
CALL_SUBTEST_5( product(Matrix4f()) );
+ CALL_SUBTEST_10( product(Matrix<bfloat16, 3, 2>()) );
CALL_SUBTEST_6( product1x1<0>() );
CALL_SUBTEST_11( test_lazy_l1<float>() );
@@ -316,6 +336,12 @@
CALL_SUBTEST_6( bug_1311<5>() );
CALL_SUBTEST_9( test_dynamic_bool() );
+
+ // Commonly specialized vectorized types.
+ CALL_SUBTEST_50( product_sweep<float>(10, 10, 10) );
+ CALL_SUBTEST_51( product_sweep<double>(10, 10, 10) );
+ CALL_SUBTEST_52( product_sweep<Eigen::half>(10, 10, 10) );
+ CALL_SUBTEST_53( product_sweep<Eigen::bfloat16>(10, 10, 10) );
}
CALL_SUBTEST_6( product_small_regressions<0>() );
diff --git a/test/product_syrk.cpp b/test/product_syrk.cpp
index 8becd37..16ccb3a 100644
--- a/test/product_syrk.cpp
+++ b/test/product_syrk.cpp
@@ -141,6 +141,7 @@
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) );
CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) );
+ CALL_SUBTEST_5( syrk(Matrix<bfloat16, Dynamic, Dynamic>(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
}
}
diff --git a/test/reshape.cpp b/test/reshape.cpp
index d248f01..ca7a73e 100644
--- a/test/reshape.cpp
+++ b/test/reshape.cpp
@@ -196,6 +196,24 @@
}
}
+template<typename BlockType>
+void reshape_block(const BlockType& M) {
+ auto dense = M.eval();
+ Index rows = M.size() / 2;
+ Index cols = M.size() / rows;
+ VERIFY_IS_EQUAL(dense.reshaped(rows, cols), M.reshaped(rows, cols));
+
+ for (Index i=0; i<rows; ++i) {
+ VERIFY_IS_EQUAL(dense.reshaped(rows, cols).row(i),
+ M.reshaped(rows, cols).row(i));
+ }
+
+ for (Index j = 0; j<cols; ++j) {
+ VERIFY_IS_EQUAL(dense.reshaped(rows, cols).col(j),
+ M.reshaped(rows, cols).col(j));
+ }
+}
+
EIGEN_DECLARE_TEST(reshape)
{
typedef Matrix<int,Dynamic,Dynamic,RowMajor> RowMatrixXi;
@@ -216,4 +234,5 @@
CALL_SUBTEST(reshape4x4(rmx));
CALL_SUBTEST(reshape4x4(rm4));
+ CALL_SUBTEST(reshape_block(rm4.col(1)));
}
diff --git a/test/serializer.cpp b/test/serializer.cpp
index 76b9083..d3e7ba5 100644
--- a/test/serializer.cpp
+++ b/test/serializer.cpp
@@ -9,8 +9,71 @@
#include "main.h"
-#include <vector>
#include <Eigen/Core>
+#include <Eigen/SparseCore>
+#include <vector>
+
+template <typename T>
+struct RandomImpl {
+ static auto Create(Eigen::Index rows, Eigen::Index cols) {
+ return T::Random(rows, cols);
+ }
+};
+
+template <typename Scalar, int Options, typename DenseIndex>
+struct RandomImpl<Eigen::SparseMatrix<Scalar, Options, DenseIndex>> {
+ using T = Eigen::SparseMatrix<Scalar, Options, DenseIndex>;
+
+ static auto Create(Eigen::Index rows, Eigen::Index cols) {
+ Eigen::SparseMatrix<Scalar, Options, DenseIndex> M(rows, cols);
+ M.setZero();
+ double density = 0.1;
+
+ // Reserve some space along each inner dim.
+ int nnz = static_cast<int>(std::ceil(density * 1.5 * M.innerSize()));
+ M.reserve(Eigen::VectorXi::Constant(M.outerSize(), nnz));
+
+ for (int j = 0; j < M.outerSize(); j++) {
+ for (int i = 0; i < M.innerSize(); i++) {
+ bool zero = (Eigen::internal::random<double>(0, 1) > density);
+ if (!zero) {
+ M.insertByOuterInner(j, i) = internal::random<Scalar>();
+ }
+ }
+ }
+
+ // 50-50 whether to compress or not.
+ if (Eigen::internal::random<double>(0, 1) >= 0.5) {
+ M.makeCompressed();
+ }
+
+ return M;
+ }
+};
+
+template <typename Scalar, int Options, typename DenseIndex>
+struct RandomImpl<Eigen::SparseVector<Scalar, Options, DenseIndex>> {
+ using T = Eigen::SparseVector<Scalar, Options, DenseIndex>;
+
+ static auto Create(Eigen::Index rows, Eigen::Index cols) {
+ Eigen::SparseVector<Scalar, Options, DenseIndex> M(rows, cols);
+ M.setZero();
+ double density = 0.1;
+
+ // Reserve some space along each inner dim.
+ int nnz = static_cast<int>(density * 1.5 * M.innerSize());
+ M.reserve(nnz);
+
+ for (int i = 0; i < M.innerSize(); i++) {
+ bool zero = (Eigen::internal::random<double>(0, 1) > density);
+ if (!zero) {
+ M.insert(i) = internal::random<Scalar>();
+ }
+ }
+
+ return M;
+ }
+};
struct MyPodType {
double x;
@@ -67,9 +130,9 @@
void test_eigen_type(const T& type) {
const Index rows = type.rows();
const Index cols = type.cols();
-
- const T initial = T::Random(rows, cols);
-
+
+ const T initial = RandomImpl<T>::Create(rows, cols);
+
// Serialize.
Eigen::Serializer<T> serializer;
size_t buffer_size = serializer.size(initial);
@@ -160,7 +223,9 @@
CALL_SUBTEST( test_eigen_type(Eigen::Vector3f()) );
CALL_SUBTEST( test_eigen_type(Eigen::Matrix4d()) );
CALL_SUBTEST( test_eigen_type(Eigen::MatrixXd(15, 17)) );
-
+ CALL_SUBTEST(test_eigen_type(Eigen::SparseMatrix<float>(13, 12)));
+ CALL_SUBTEST(test_eigen_type(Eigen::SparseVector<float>(17)));
+
CALL_SUBTEST( test_dense_types( Eigen::Array33f(),
Eigen::ArrayXd(10),
Eigen::MatrixXd(15, 17)) );
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 8694490..52ff7b7 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -28,8 +28,8 @@
const Index rows = ref.rows();
const Index cols = ref.cols();
- //const Index inner = ref.innerSize();
- //const Index outer = ref.outerSize();
+ const Index inner = ref.innerSize();
+ const Index outer = ref.outerSize();
typedef typename SparseMatrixType::Scalar Scalar;
typedef typename SparseMatrixType::RealScalar RealScalar;
@@ -91,8 +91,12 @@
for (Index k=0; k<nnz; ++k)
{
Index i = internal::random<Index>(0,rows-1);
- if (m1.coeff(i,j)==Scalar(0))
- m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+ if (m1.coeff(i, j) == Scalar(0)) {
+ Scalar v = internal::random<Scalar>();
+ if (v == Scalar(0)) v = Scalar(1);
+ m1(i, j) = v;
+ m2.insert(i, j) = v;
+ }
}
}
@@ -116,13 +120,18 @@
{
Index i = internal::random<Index>(0,rows-1);
Index j = internal::random<Index>(0,cols-1);
- if ((m1.coeff(i,j)==Scalar(0)) && (internal::random<int>()%2))
- m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+ if ((m1.coeff(i, j) == Scalar(0)) && (internal::random<int>() % 2)) {
+ Scalar v = internal::random<Scalar>();
+ if (v == Scalar(0)) v = Scalar(1);
+ m1(i, j) = v;
+ m2.insert(i, j) = v;
+ }
else
{
Scalar v = internal::random<Scalar>();
- m2.coeffRef(i,j) += v;
- m1(i,j) += v;
+ if (v == Scalar(0)) v = Scalar(1);
+ m1(i, j) = v;
+ m2.coeffRef(i, j) = v;
}
}
VERIFY_IS_APPROX(m2,m1);
@@ -140,8 +149,12 @@
{
Index i = internal::random<Index>(0,rows-1);
Index j = internal::random<Index>(0,cols-1);
- if (m1.coeff(i,j)==Scalar(0))
- m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
+ if (m1.coeff(i, j) == Scalar(0)) {
+ Scalar v = internal::random<Scalar>();
+ if (v == Scalar(0)) v = Scalar(1);
+ m1(i, j) = v;
+ m2.insert(i, j) = v;
+ }
if(mode==3)
m2.reserve(r);
}
@@ -150,6 +163,63 @@
VERIFY_IS_APPROX(m2,m1);
}
+ // test sort
+ if (inner > 1) {
+ bool StorageOrdersMatch = DenseMatrix::IsRowMajor == SparseMatrixType::IsRowMajor;
+ DenseMatrix m1(rows, cols);
+ m1.setZero();
+ SparseMatrixType m2(rows, cols);
+ // generate random inner indices with no repeats
+ Vector<Index, Dynamic> innerIndices(inner);
+ innerIndices.setLinSpaced(inner, 0, inner - 1);
+ for (Index j = 0; j < outer; j++) {
+ std::random_shuffle(innerIndices.begin(), innerIndices.end());
+ Index nzj = internal::random<Index>(2, inner / 2);
+ for (Index k = 0; k < nzj; k++) {
+ Index i = innerIndices[k];
+ Scalar val = internal::random<Scalar>();
+ m1.coeffRefByOuterInner(StorageOrdersMatch ? j : i, StorageOrdersMatch ? i : j) = val;
+ m2.insertByOuterInner(j, i) = val;
+ }
+ }
+
+ VERIFY_IS_APPROX(m2, m1);
+ // sort wrt greater
+ m2.template sortInnerIndices<std::greater<>>();
+ // verify that all inner vectors are not sorted wrt less
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), 0);
+ // verify that all inner vectors are sorted wrt greater
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), m2.outerSize());
+ // verify that sort does not change evaluation
+ VERIFY_IS_APPROX(m2, m1);
+ // sort wrt less
+ m2.template sortInnerIndices<std::less<>>();
+ // verify that all inner vectors are sorted wrt less
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), m2.outerSize());
+ // verify that all inner vectors are not sorted wrt greater
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), 0);
+ // verify that sort does not change evaluation
+ VERIFY_IS_APPROX(m2, m1);
+
+ m2.makeCompressed();
+ // sort wrt greater
+ m2.template sortInnerIndices<std::greater<>>();
+ // verify that all inner vectors are not sorted wrt less
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), 0);
+ // verify that all inner vectors are sorted wrt greater
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), m2.outerSize());
+ // verify that sort does not change evaluation
+ VERIFY_IS_APPROX(m2, m1);
+ // sort wrt less
+ m2.template sortInnerIndices<std::less<>>();
+ // verify that all inner vectors are sorted wrt less
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::less<>>(), m2.outerSize());
+ // verify that all inner vectors are not sorted wrt greater
+ VERIFY_IS_EQUAL(m2.template innerIndicesAreSorted<std::greater<>>(), 0);
+ // verify that sort does not change evaluation
+ VERIFY_IS_APPROX(m2, m1);
+ }
+
// test basic computations
{
DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
@@ -729,9 +799,12 @@
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(8, 8)) ));
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
- CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
- CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
- CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
+ CALL_SUBTEST_2(( sparse_basic(SparseMatrix<float, RowMajor>(r, c))));
+ CALL_SUBTEST_2(( sparse_basic(SparseMatrix<float, ColMajor>(r, c))));
+ CALL_SUBTEST_3(( sparse_basic(SparseMatrix<double, ColMajor>(r, c))));
+ CALL_SUBTEST_3(( sparse_basic(SparseMatrix<double, RowMajor>(r, c))));
+ CALL_SUBTEST_4(( sparse_basic(SparseMatrix<double, ColMajor,long int>(r, c)) ));
+ CALL_SUBTEST_4(( sparse_basic(SparseMatrix<double, RowMajor,long int>(r, c)) ));
r = Eigen::internal::random<int>(1,100);
c = Eigen::internal::random<int>(1,100);
@@ -739,14 +812,14 @@
r = c; // check square matrices in 25% of tries
}
- CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
- CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
+ CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
+ CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
// Regression test for bug 900: (manually insert higher values here, if you have enough RAM):
- CALL_SUBTEST_3((big_sparse_triplet<SparseMatrix<float, RowMajor, int> >(10000, 10000, 0.125)));
- CALL_SUBTEST_4((big_sparse_triplet<SparseMatrix<double, ColMajor, long int> >(10000, 10000, 0.125)));
+ CALL_SUBTEST_5(( big_sparse_triplet<SparseMatrix<float, RowMajor, int>>(10000, 10000, 0.125)));
+ CALL_SUBTEST_5(( big_sparse_triplet<SparseMatrix<double, ColMajor, long int>>(10000, 10000, 0.125)));
- CALL_SUBTEST_7( bug1105<0>() );
+ CALL_SUBTEST_5(bug1105<0>());
}
#endif
diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index a9b18b8..01846e2 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h
@@ -99,6 +99,13 @@
VERIFY(solver.info() == Success && "solving failed when using Map");
VERIFY(oldb.isApprox(bm) && "sparse solver testing: the rhs should not be modified!");
VERIFY(xm.isApprox(refX,test_precision<Scalar>()));
+
+ // Test with a Map and non-unit stride.
+ Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> out(2*xm.rows(), 2*xm.cols());
+ out.setZero();
+ Eigen::Map<DenseRhs, 0, Stride<Eigen::Dynamic, 2>> outm(out.data(), xm.rows(), xm.cols(), Stride<Eigen::Dynamic, 2>(2 * xm.rows(), 2));
+ outm = solver.solve(bm);
+ VERIFY(outm.isApprox(refX,test_precision<Scalar>()));
}
// if not too large, do some extra check:
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index e1e74f3..88cea09 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -15,6 +15,7 @@
double densityVec = (std::max)(8./(rows), 0.1);
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
+ typedef Matrix<DenseIndex,Dynamic,1> DenseIndexVector;
typedef SparseVector<Scalar,0,StorageIndex> SparseVectorType;
typedef SparseMatrix<Scalar,0,StorageIndex> SparseMatrixType;
Scalar eps = 1e-6;
@@ -143,6 +144,33 @@
}
}
+ // test sort
+ if(rows > 1)
+ {
+ SparseVectorType vec1(rows);
+ DenseVector refVec1 = DenseVector::Zero(rows);
+ DenseIndexVector innerIndices(rows);
+ innerIndices.setLinSpaced(0, rows - 1);
+ std::random_shuffle(innerIndices.begin(), innerIndices.end());
+ Index nz = internal::random<Index>(2, rows / 2);
+ for (Index k = 0; k < nz; k++)
+ {
+ Index i = innerIndices[k];
+ Scalar val = internal::random<Scalar>();
+ refVec1.coeffRef(i) = val;
+ vec1.insert(i) = val;
+ }
+
+ vec1.template sortInnerIndices<std::greater<>>();
+ VERIFY_IS_APPROX(vec1, refVec1);
+ VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::greater<>>(), 1);
+ VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::less<>>(), 0);
+ vec1.template sortInnerIndices<std::less<>>();
+ VERIFY_IS_APPROX(vec1, refVec1);
+ VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::greater<>>(), 0);
+ VERIFY_IS_EQUAL(vec1.template innerIndicesAreSorted<std::less<>>(), 1);
+ }
+
}
void test_pruning() {
using SparseVectorType = SparseVector<double, 0, int>;
diff --git a/test/visitor.cpp b/test/visitor.cpp
index 3b118f9..bc34917 100644
--- a/test/visitor.cpp
+++ b/test/visitor.cpp
@@ -207,8 +207,9 @@
// Unrolled - ColMajor.
{
- Eigen::Matrix4f X = Eigen::Matrix4f::Random();
- TrackedVisitor<double, false> visitor;
+ using MatrixType = Matrix<float, 4, 4, ColMajor>;
+ MatrixType X = MatrixType::Random(4, 4);
+ TrackedVisitor<MatrixType::Scalar, false> visitor;
X.visit(visitor);
Index count = 0;
for (Index j=0; j<X.cols(); ++j) {
@@ -221,10 +222,10 @@
}
// Unrolled - RowMajor.
- using Matrix4fRowMajor = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
{
- Matrix4fRowMajor X = Matrix4fRowMajor::Random();
- TrackedVisitor<double, false> visitor;
+ using MatrixType = Matrix<float, 4, 4, RowMajor>;
+ MatrixType X = MatrixType::Random(4, 4);
+ TrackedVisitor<MatrixType::Scalar, false> visitor;
X.visit(visitor);
Index count = 0;
for (Index i=0; i<X.rows(); ++i) {
@@ -238,8 +239,9 @@
// Not unrolled - ColMajor
{
- Eigen::MatrixXf X = Eigen::MatrixXf::Random(4, 4);
- TrackedVisitor<double, false> visitor;
+ using MatrixType = Matrix<float, Dynamic, Dynamic, ColMajor>;
+ MatrixType X = MatrixType::Random(4, 4);
+ TrackedVisitor<MatrixType::Scalar, false> visitor;
X.visit(visitor);
Index count = 0;
for (Index j=0; j<X.cols(); ++j) {
@@ -252,10 +254,10 @@
}
// Not unrolled - RowMajor.
- using MatrixXfRowMajor = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
{
- MatrixXfRowMajor X = MatrixXfRowMajor::Random(4, 4);
- TrackedVisitor<double, false> visitor;
+ using MatrixType = Matrix<float, Dynamic, Dynamic, RowMajor>;
+ MatrixType X = MatrixType::Random(4, 4);
+ TrackedVisitor<MatrixType::Scalar, false> visitor;
X.visit(visitor);
Index count = 0;
for (Index i=0; i<X.rows(); ++i) {
@@ -269,10 +271,11 @@
// Vectorized - ColMajor
{
+ using MatrixType = Matrix<float, Dynamic, Dynamic, ColMajor>;
// Ensure rows/cols is larger than packet size.
- constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
- Eigen::MatrixXf X = Eigen::MatrixXf::Random(4 * PacketSize, 4 * PacketSize);
- TrackedVisitor<double, true> visitor;
+ constexpr int PacketSize = Eigen::internal::packet_traits<MatrixType::Scalar>::size;
+ MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize);
+ TrackedVisitor<MatrixType::Scalar, true> visitor;
X.visit(visitor);
Index previ = -1;
Index prevj = 0;
@@ -287,17 +290,18 @@
previ = i;
prevj = j;
}
- if (Eigen::internal::packet_traits<float>::Vectorizable) {
+ if (Eigen::internal::packet_traits<MatrixType::Scalar>::Vectorizable) {
VERIFY(visitor.vectorized);
}
}
// Vectorized - RowMajor.
{
+ using MatrixType = Matrix<float, Dynamic, Dynamic, RowMajor>;
// Ensure rows/cols is larger than packet size.
- constexpr int PacketSize = Eigen::internal::packet_traits<float>::size;
- MatrixXfRowMajor X = MatrixXfRowMajor::Random(4 * PacketSize, 4 * PacketSize);
- TrackedVisitor<double, true> visitor;
+ constexpr int PacketSize = Eigen::internal::packet_traits<MatrixType::Scalar>::size;
+ MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize);
+ TrackedVisitor<MatrixType::Scalar, true> visitor;
X.visit(visitor);
Index previ = 0;
Index prevj = -1;
@@ -312,7 +316,7 @@
previ = i;
prevj = j;
}
- if (Eigen::internal::packet_traits<float>::Vectorizable) {
+ if (Eigen::internal::packet_traits<MatrixType::Scalar>::Vectorizable) {
VERIFY(visitor.vectorized);
}
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index d9a3d32..7864a2a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -95,6 +95,10 @@
return firstLevelCacheSize();
#endif
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
+ // Nothing. Default device operations are synchronous.
+ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
#if !defined(EIGEN_GPU_COMPILE_PHASE)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index a3adf61..6accc66 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -147,6 +147,10 @@
// The l3 cache size is shared between all the cores.
return l3CacheSize() / num_threads_;
}
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
+ // Nothing. Threadpool device operations are synchronous.
+ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
// Should return an enum that encodes the ISA supported by the CPU
diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
index 2c7d7b0..d8f7a32 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IDRS.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
@@ -66,7 +66,6 @@
const RealScalar tol = relres;
const Index maxit = iter;
- Index replacements = 0;
bool trueres = false;
FullPivLU<DenseMatrixType> lu_solver;
@@ -88,15 +87,14 @@
}
// from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf
// A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon).
- // With epsilon the
- // relative machine precision. The factor tol/epsilon corresponds to the size of a
- // finite precision number that is so large that the absolute round-off error in
- // this number, when propagated through the process, makes it impossible to
- // achieve the required accuracy.The factor C accounts for the accumulation of
- // round-off errors. This parameter has beenset to 10−3.
- // mp is epsilon/C
- // 10^3 * eps is very conservative, so normally no residual replacements will take place.
- // It only happens if things go very wrong. Too many restarts may ruin the convergence.
+ // With epsilon the relative machine precision. The factor tol/epsilon corresponds
+ // to the size of a finite precision number that is so large that the absolute
+ // round-off error in this number, when propagated through the process, makes it
+ // impossible to achieve the required accuracy. The factor C accounts for the
+ // accumulation of round-off errors. This parameter has been set to 10^{-3}.
+ // mp is epsilon/C 10^3 * eps is very conservative, so normally no residual
+ // replacements will take place. It only happens if things go very wrong. Too many
+ // restarts may ruin the convergence.
const RealScalar mp = RealScalar(1e3) * NumTraits<Scalar>::epsilon();
// Compute initial residual
@@ -224,7 +222,6 @@
if (trueres && normr < normb) {
r = b - A * x;
trueres = false;
- replacements++;
}
// Smoothing: