Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/3d9051ea84a5089b277c88dac456b3b1576bfa7f
BEGIN_PUBLIC
Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/3d9051ea84a5089b277c88dac456b3b1576bfa7f
Additional minor changes to account for API change in `TensorDeviceGpu` - static variables replaced
by a safer singleton class.
END_PUBLIC
For TFRT, the OSS eigen version hasn't been updated in over a year. This is now corrected, and
required a small change in handling `__shfl_down_sync` for `Eigen::half`, for which an overload
is now provided.
PiperOrigin-RevId: 374792818
diff --git a/Eigen/LU b/Eigen/LU
index 0fb184b..1236ceb 100644
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -38,9 +38,7 @@
#include "src/LU/Determinant.h"
#include "src/LU/InverseImpl.h"
-// Use the SSE optimized version whenever possible. At the moment the
-// SSE version doesn't compile when AVX is enabled
-#if (defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX) || defined EIGEN_VECTORIZE_NEON
+#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON
#include "src/LU/arch/InverseSize4.h"
#endif
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index f6e1d0a..9acca6c 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -163,6 +163,30 @@
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
};
+struct plain_array_helper {
+ template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ static void copy(const plain_array<T, Size, MatrixOrArrayOptions, Alignment>& src, const Eigen::Index size,
+ plain_array<T, Size, MatrixOrArrayOptions, Alignment>& dst) {
+ smart_copy(src.array, src.array + size, dst.array);
+ }
+
+ template<typename T, int Size, int MatrixOrArrayOptions, int Alignment>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ static void swap(plain_array<T, Size, MatrixOrArrayOptions, Alignment>& a, const Eigen::Index a_size,
+ plain_array<T, Size, MatrixOrArrayOptions, Alignment>& b, const Eigen::Index b_size) {
+ if (a_size < b_size) {
+ std::swap_ranges(b.array, b.array + a_size, a.array);
+ smart_move(b.array + a_size, b.array + b_size, a.array + a_size);
+ } else if (a_size > b_size) {
+ std::swap_ranges(a.array, a.array + b_size, b.array);
+ smart_move(a.array + b_size, a.array + a_size, b.array + b_size);
+ } else {
+ std::swap_ranges(a.array, a.array + a_size, b.array);
+ }
+ }
+};
+
} // end namespace internal
/** \internal
@@ -268,21 +292,25 @@
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols)
+ {
+ internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
+ }
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
- m_data = other.m_data;
m_rows = other.m_rows;
m_cols = other.m_cols;
+ internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data);
}
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
{
- numext::swap(m_data,other.m_data);
+ internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols);
numext::swap(m_rows,other.m_rows);
numext::swap(m_cols,other.m_cols);
}
@@ -303,21 +331,26 @@
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows)
+ {
+ internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
+ }
+
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
- m_data = other.m_data;
m_rows = other.m_rows;
+ internal::plain_array_helper::copy(other.m_data, m_rows * _Cols, m_data);
}
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
- {
- numext::swap(m_data,other.m_data);
- numext::swap(m_rows,other.m_rows);
+ {
+ internal::plain_array_helper::swap(m_data, m_rows * _Cols, other.m_data, other.m_rows * _Cols);
+ numext::swap(m_rows, other.m_rows);
}
EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT {return m_rows;}
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT {return _Cols;}
@@ -336,20 +369,24 @@
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
- EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols)
+ {
+ internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
+ }
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
{
if (this != &other)
{
- m_data = other.m_data;
m_cols = other.m_cols;
+ internal::plain_array_helper::copy(other.m_data, _Rows * m_cols, m_data);
}
return *this;
}
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) {
- numext::swap(m_data,other.m_data);
- numext::swap(m_cols,other.m_cols);
+ internal::plain_array_helper::swap(m_data, _Rows * m_cols, other.m_data, _Rows * other.m_cols);
+ numext::swap(m_cols, other.m_cols);
}
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT {return _Rows;}
EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT {return m_cols;}
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 2920121..7f82090 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -260,19 +260,8 @@
}
};
-template<typename Scalar> struct conj_impl : conj_default_impl<Scalar> {};
-
-#if defined(EIGEN_GPU_COMPILE_PHASE)
-template<typename T>
-struct conj_impl<std::complex<T> >
-{
- EIGEN_DEVICE_FUNC
- static inline std::complex<T> run(const std::complex<T>& x)
- {
- return std::complex<T>(x.real(), -x.imag());
- }
-};
-#endif
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl : conj_default_impl<Scalar, IsComplex> {};
template<typename Scalar>
struct conj_retval
@@ -592,8 +581,9 @@
template<typename Scalar>
struct arg_default_impl<Scalar, true> {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
- static inline Scalar run(const Scalar& x)
+ static inline RealScalar run(const Scalar& x)
{
#if defined(EIGEN_HIP_DEVICE_COMPILE)
// HIP does not seem to have a native device side implementation for the math routine "arg"
@@ -601,7 +591,7 @@
#else
EIGEN_USING_STD(arg);
#endif
- return static_cast<Scalar>(arg(x));
+ return static_cast<RealScalar>(arg(x));
}
};
@@ -612,7 +602,7 @@
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
- return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0);
+ return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
}
};
#else
@@ -623,7 +613,7 @@
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
- return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0);
+ return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
}
};
@@ -2022,6 +2012,18 @@
}
};
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+template<typename T>
+struct conj_impl<std::complex<T>, true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline std::complex<T> run(const std::complex<T>& x)
+ {
+ return std::complex<T>(numext::real(x), -numext::imag(x));
+ }
+};
+#endif
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/StlIterators.h b/Eigen/src/Core/StlIterators.h
index a2bc0de..09041db 100644
--- a/Eigen/src/Core/StlIterators.h
+++ b/Eigen/src/Core/StlIterators.h
@@ -7,6 +7,9 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_STLITERATORS_H
+#define EIGEN_STLITERATORS_H
+
namespace Eigen {
namespace internal {
@@ -30,10 +33,10 @@
typedef Index difference_type;
typedef std::random_access_iterator_tag iterator_category;
- indexed_based_stl_iterator_base() : mp_xpr(0), m_index(0) {}
- indexed_based_stl_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {}
+ indexed_based_stl_iterator_base() EIGEN_NO_THROW : mp_xpr(0), m_index(0) {}
+ indexed_based_stl_iterator_base(XprType& xpr, Index index) EIGEN_NO_THROW : mp_xpr(&xpr), m_index(index) {}
- indexed_based_stl_iterator_base(const non_const_iterator& other)
+ indexed_based_stl_iterator_base(const non_const_iterator& other) EIGEN_NO_THROW
: mp_xpr(other.mp_xpr), m_index(other.m_index)
{}
@@ -190,17 +193,17 @@
typedef typename internal::conditional<bool(is_lvalue), value_type&, const value_type&>::type reference;
- pointer_based_stl_iterator() : m_ptr(0) {}
- pointer_based_stl_iterator(XprType& xpr, Index index) : m_incr(xpr.innerStride())
+ pointer_based_stl_iterator() EIGEN_NO_THROW : m_ptr(0) {}
+ pointer_based_stl_iterator(XprType& xpr, Index index) EIGEN_NO_THROW : m_incr(xpr.innerStride())
{
m_ptr = xpr.data() + index * m_incr.value();
}
- pointer_based_stl_iterator(const non_const_iterator& other)
+ pointer_based_stl_iterator(const non_const_iterator& other) EIGEN_NO_THROW
: m_ptr(other.m_ptr), m_incr(other.m_incr)
{}
- pointer_based_stl_iterator& operator=(const non_const_iterator& other)
+ pointer_based_stl_iterator& operator=(const non_const_iterator& other) EIGEN_NO_THROW
{
m_ptr = other.m_ptr;
m_incr.setValue(other.m_incr);
@@ -456,3 +459,5 @@
}
} // namespace Eigen
+
+#endif // EIGEN_STLITERATORS_H
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 41929cb..6fd726d 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -119,74 +119,11 @@
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
}
-/*template <>
+template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
pexp<Packet8d>(const Packet8d& _x) {
- Packet8d x = _x;
-
- _EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
- _EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
-
- _EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
- _EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
-
- _EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
-
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
-
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
-
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
- _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
-
- // clamp x
- x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
-
- // Express exp(x) as exp(g + n*log(2)).
- const Packet8d n =
- _mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
-
- // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
- // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
- // digits right.
- const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
- const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
- x = psub(x, nC1);
- x = psub(x, nC2);
-
- const Packet8d x2 = pmul(x, x);
-
- // Evaluate the numerator polynomial of the rational interpolant.
- Packet8d px = p8d_cephes_exp_p0;
- px = pmadd(px, x2, p8d_cephes_exp_p1);
- px = pmadd(px, x2, p8d_cephes_exp_p2);
- px = pmul(px, x);
-
- // Evaluate the denominator polynomial of the rational interpolant.
- Packet8d qx = p8d_cephes_exp_q0;
- qx = pmadd(qx, x2, p8d_cephes_exp_q1);
- qx = pmadd(qx, x2, p8d_cephes_exp_q2);
- qx = pmadd(qx, x2, p8d_cephes_exp_q3);
-
- // I don't really get this bit, copied from the SSE2 routines, so...
- // TODO(gonnet): Figure out what is going on here, perhaps find a better
- // rational interpolant?
- x = _mm512_div_pd(px, psub(qx, px));
- x = pmadd(p8d_2, x, p8d_1);
-
- // Build e=2^n.
- const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
- _mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
-
- // Construct the result 2^n * exp(g) = e * x. The max is used to catch
- // non-finite values in the input.
- return pmax(pmul(x, e), _x);
- }*/
+ return pexp_double(_x);
+}
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index f874137..59bbef0 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -140,6 +140,7 @@
HasHalfPacket = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
HasLog = 1,
+ HasExp = 1,
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
#endif
@@ -486,7 +487,7 @@
}
template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
- __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ);
+ __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ);
return _mm512_castsi512_ps(
_mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
}
@@ -517,7 +518,7 @@
}
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_lt_or_nan(const Packet8d& a, const Packet8d& b) {
- __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGT_UQ);
+ __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ);
return _mm512_castsi512_pd(
_mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
}
@@ -929,7 +930,8 @@
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
// 2^b
- Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
+ const Packet8i permute_idx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
+ Packet8i hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
Packet8i lo = _mm256_slli_epi64(hi, 52);
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
@@ -937,7 +939,7 @@
// 2^(e - 3b)
b = psub(psub(psub(e, b), b), b); // e - 3b
- hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
+ hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
lo = _mm256_slli_epi64(hi, 52);
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
index e3ba061..dbdb81e 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -493,21 +493,21 @@
cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);
}
} else {
- const std::complex<Scalar> *lhs0, *lhs1;
+ std::complex<Scalar> lhs0, lhs1;
if (UseLhs) {
- lhs0 = &lhs(j + 0, i);
- lhs1 = &lhs(j + 1, i);
- cblock.packet[0] = pload2(lhs0, lhs1);
- lhs0 = &lhs(j + 2, i);
- lhs1 = &lhs(j + 3, i);
- cblock.packet[1] = pload2(lhs0, lhs1);
+ lhs0 = lhs(j + 0, i);
+ lhs1 = lhs(j + 1, i);
+ cblock.packet[0] = pload2(&lhs0, &lhs1);
+ lhs0 = lhs(j + 2, i);
+ lhs1 = lhs(j + 3, i);
+ cblock.packet[1] = pload2(&lhs0, &lhs1);
} else {
- lhs0 = &lhs(i, j + 0);
- lhs1 = &lhs(i, j + 1);
- cblock.packet[0] = pload2(lhs0, lhs1);
- lhs0 = &lhs(i, j + 2);
- lhs1 = &lhs(i, j + 3);
- cblock.packet[1] = pload2(lhs0, lhs1);
+ lhs0 = lhs(i, j + 0);
+ lhs1 = lhs(i, j + 1);
+ cblock.packet[0] = pload2(&lhs0, &lhs1);
+ lhs0 = lhs(i, j + 2);
+ lhs1 = lhs(i, j + 3);
+ cblock.packet[1] = pload2(&lhs0, &lhs1);
}
}
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
index 8edf79c..08855bd 100644
--- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
+++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h
@@ -99,11 +99,9 @@
}
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
-EIGEN_STRONG_INLINE void pgerMMA(__vector_quad* acc, const __vector_pair& a, const Packet4f& b)
+EIGEN_STRONG_INLINE void pgerMMA(__vector_quad*, const __vector_pair&, const Packet4f&)
{
- EIGEN_UNUSED_VARIABLE(acc); // Just for compilation
- EIGEN_UNUSED_VARIABLE(a);
- EIGEN_UNUSED_VARIABLE(b);
+ // Just for compilation
}
template<typename Scalar, typename Packet, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
@@ -150,11 +148,9 @@
}
template<>
-EIGEN_STRONG_INLINE void ploadRhsMMA(const float* rhs, __vector_pair& rhsV)
+EIGEN_STRONG_INLINE void ploadRhsMMA(const float*, __vector_pair&)
{
// Just for compilation
- EIGEN_UNUSED_VARIABLE(rhs);
- EIGEN_UNUSED_VARIABLE(rhsV);
}
// PEEL_MMA loop factor.
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
index b1618e5..deb4c86 100644
--- a/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -67,27 +67,26 @@
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
- const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
- return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
- (a_imag * b_real - a_real * b_imag) * norm);
+ const T norm = (b_real * b_real + b_imag * b_imag);
+ return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm,
+ (a_imag * b_real - a_real * b_imag) / norm);
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {
+ const T a_real = numext::real(a);
+ const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
- // Guard against over/under-flow.
- const T scale = T(1) / (numext::abs(b_real) + numext::abs(b_imag));
- const T a_real_scaled = numext::real(a) * scale;
- const T a_imag_scaled = numext::imag(a) * scale;
- const T b_real_scaled = b_real * scale;
- const T b_imag_scaled = b_imag * scale;
-
- const T b_norm2_scaled = b_real_scaled * b_real_scaled + b_imag_scaled * b_imag_scaled;
- return std::complex<T>(
- (a_real_scaled * b_real_scaled + a_imag_scaled * b_imag_scaled) / b_norm2_scaled,
- (a_imag_scaled * b_real_scaled - a_real_scaled * b_imag_scaled) / b_norm2_scaled);
+ // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),
+ // guards against over/under-flow.
+ const bool scale_imag = numext::abs(b_imag) <= numext::abs(b_real);
+ const T rscale = scale_imag ? T(1) : b_real / b_imag;
+ const T iscale = scale_imag ? b_imag / b_real : T(1);
+ const T denominator = b_real * rscale + b_imag * iscale;
+ return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
+ (a_imag * rscale - a_real * iscale) / denominator);
}
template<typename T>
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 1aa361b..a889ab1 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -124,13 +124,6 @@
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(psub<Packet4f>(a.v, b.v)); }
-template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b);
-template<> EIGEN_STRONG_INLINE Packet2cf paddsub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
-{
- Packet4f mask = {-0.0f, -0.0f, 0.0f, 0.0f};
- return Packet2cf(padd(a.v, pxor(mask, b.v)));
-}
-
template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 9cf4e07..2b48570 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3701,6 +3701,11 @@
return F32MaskToBf16Mask(pcmp_lt<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));
}
+template<> EIGEN_STRONG_INLINE Packet4bf pcmp_lt_or_nan<Packet4bf>(const Packet4bf& a, const Packet4bf& b)
+{
+ return F32MaskToBf16Mask(pcmp_lt_or_nan<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));
+}
+
template<> EIGEN_STRONG_INLINE Packet4bf pcmp_le<Packet4bf>(const Packet4bf& a, const Packet4bf& b)
{
return F32MaskToBf16Mask(pcmp_le<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index f6f1b8c..1cab374 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -19,7 +19,7 @@
{
EIGEN_STRONG_INLINE Packet2cf() {}
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
- __m128 v;
+ Packet4f v;
};
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
@@ -66,12 +66,6 @@
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b);
-template<> EIGEN_STRONG_INLINE Packet2cf paddsub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
-{
- const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x0,0x0));
- return Packet2cf(padd(a.v, pxor(mask, b.v)));
-}
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
{
@@ -113,19 +107,13 @@
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
-#if EIGEN_GNUC_AT_MOST(4,2)
- // Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
- res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
-#elif EIGEN_GNUC_AT_LEAST(4,6)
- // Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wuninitialized"
- res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
- #pragma GCC diagnostic pop
+#ifdef EIGEN_VECTORIZE_SSE3
+ res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
#else
- res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
+ res.v = _mm_movelh_ps(res.v, res.v);
#endif
- return Packet2cf(_mm_movelh_ps(res.v,res.v));
+ return res;
}
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
@@ -252,7 +240,7 @@
{
EIGEN_STRONG_INLINE Packet1cd() {}
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
- __m128d v;
+ Packet2d v;
};
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 273dffb..efd7199 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -16,8 +16,8 @@
//------------------------------------------------------------------------------------------
#define EIGEN_WORLD_VERSION 3
-#define EIGEN_MAJOR_VERSION 4
-#define EIGEN_MINOR_VERSION 99
+#define EIGEN_MAJOR_VERSION 3
+#define EIGEN_MINOR_VERSION 90
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -688,8 +688,7 @@
// Does the compiler support result_of?
// result_of was deprecated in c++17 and removed in c++ 20
#ifndef EIGEN_HAS_STD_RESULT_OF
-#if EIGEN_MAX_CPP_VER >= 11 && \
- (defined(__cplusplus) && __cplusplus >= 201103L && __cplusplus < 201703L)
+#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17
#define EIGEN_HAS_STD_RESULT_OF 1
#else
#define EIGEN_HAS_STD_RESULT_OF 0
@@ -708,8 +707,7 @@
#endif // EIGEN_HAS_STD_HASH
#ifndef EIGEN_HAS_STD_INVOKE_RESULT
-#if EIGEN_MAX_CPP_VER >= 17 && \
- (defined(__cplusplus) && __cplusplus >= 201703L)
+#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17
#define EIGEN_HAS_STD_INVOKE_RESULT 1
#else
#define EIGEN_HAS_STD_INVOKE_RESULT 0
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 7cbe8a6..875318c 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -566,6 +566,17 @@
}
};
+#if EIGEN_HAS_RVALUE_REFERENCES
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::move(start, end, target);
+}
+#else
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::copy(start, end, target);
+}
+#endif
/*****************************************************************************
*** Implementation of runtime stack allocation (falling back to malloc) ***
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 9c77717..2c63a95 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -136,15 +136,14 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
operator T() const { return T(Value); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void setValue(T) const {}
+ void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
};
template<typename T> class variable_if_dynamic<T, Dynamic>
{
T m_value;
- EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
public:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 5a8d0c1..ee5548a 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -54,10 +54,12 @@
{
ActualMatrixType matrix(mat);
- Packet4f _L1 = matrix.template packet<MatrixAlignment>(0);
- Packet4f _L2 = matrix.template packet<MatrixAlignment>(4);
- Packet4f _L3 = matrix.template packet<MatrixAlignment>(8);
- Packet4f _L4 = matrix.template packet<MatrixAlignment>(12);
+ const float* data = matrix.data();
+ const Index stride = matrix.innerStride();
+ Packet4f _L1 = ploadt<Packet4f,MatrixAlignment>(data);
+ Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
+ Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
+ Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
// Four 2x2 sub-matrices of the input matrix
// input = [[A, B],
@@ -189,25 +191,26 @@
Packet2d A1, A2, B1, B2, C1, C2, D1, D2;
+ const double* data = matrix.data();
+ const Index stride = matrix.innerStride();
if (StorageOrdersMatch)
{
- A1 = matrix.template packet<MatrixAlignment>(0);
- B1 = matrix.template packet<MatrixAlignment>(2);
- A2 = matrix.template packet<MatrixAlignment>(4);
- B2 = matrix.template packet<MatrixAlignment>(6);
- C1 = matrix.template packet<MatrixAlignment>(8);
- D1 = matrix.template packet<MatrixAlignment>(10);
- C2 = matrix.template packet<MatrixAlignment>(12);
- D2 = matrix.template packet<MatrixAlignment>(14);
+ A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);
+ B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);
+ A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);
+ B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);
+ C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);
+ D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);
+ C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);
+ D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);
}
else
{
Packet2d temp;
- A1 = matrix.template packet<MatrixAlignment>(0);
- C1 = matrix.template packet<MatrixAlignment>(2);
- A2 = matrix.template packet<MatrixAlignment>(4);
- C2 = matrix.template packet<MatrixAlignment>(6);
-
+ A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0);
+ C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2);
+ A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4);
+ C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6);
temp = A1;
A1 = vec2d_unpacklo(A1, A2);
A2 = vec2d_unpackhi(temp, A2);
@@ -216,10 +219,10 @@
C1 = vec2d_unpacklo(C1, C2);
C2 = vec2d_unpackhi(temp, C2);
- B1 = matrix.template packet<MatrixAlignment>(8);
- D1 = matrix.template packet<MatrixAlignment>(10);
- B2 = matrix.template packet<MatrixAlignment>(12);
- D2 = matrix.template packet<MatrixAlignment>(14);
+ B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8);
+ D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10);
+ B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12);
+ D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14);
temp = B1;
B1 = vec2d_unpacklo(B1, B2);
diff --git a/bench/basicbenchmark.h b/bench/basicbenchmark.h
index 3fdc357..8059375 100644
--- a/bench/basicbenchmark.h
+++ b/bench/basicbenchmark.h
@@ -16,13 +16,13 @@
{
asm("#begin_bench_loop LazyEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
- m = (I + 0.00005 * (m + m.lazy() * m)).eval();
+ m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
}
else if (Mode==OmpEval)
{
asm("#begin_bench_loop OmpEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
- m = (I + 0.00005 * (m + m.lazy() * m)).evalOMP();
+ m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
}
else
{
diff --git a/bench/sparse_randomsetter.cpp b/bench/sparse_randomsetter.cpp
index 19a76e3..c433742 100644
--- a/bench/sparse_randomsetter.cpp
+++ b/bench/sparse_randomsetter.cpp
@@ -1,6 +1,7 @@
#define NOGMM
#define NOMTL
+#define EIGEN_GOOGLEHASH_SUPPORT 1
#include <map>
#include <ext/hash_map>
diff --git a/test/SafeScalar.h b/test/SafeScalar.h
new file mode 100644
index 0000000..c5cb757
--- /dev/null
+++ b/test/SafeScalar.h
@@ -0,0 +1,30 @@
+
+// A Scalar that asserts for uninitialized access.
+template<typename T>
+class SafeScalar {
+ public:
+ SafeScalar() : initialized_(false) {}
+ SafeScalar(const SafeScalar& other) {
+ *this = other;
+ }
+ SafeScalar& operator=(const SafeScalar& other) {
+ val_ = T(other);
+ initialized_ = true;
+ return *this;
+ }
+
+ SafeScalar(T val) : val_(val), initialized_(true) {}
+ SafeScalar& operator=(T val) {
+ val_ = val;
+ initialized_ = true;
+ }
+
+ operator T() const {
+ VERIFY(initialized_ && "Uninitialized access.");
+ return val_;
+ }
+
+ private:
+ T val_;
+ bool initialized_;
+};
diff --git a/test/dense_storage.cpp b/test/dense_storage.cpp
index 7fa2585..36ccbb0 100644
--- a/test/dense_storage.cpp
+++ b/test/dense_storage.cpp
@@ -8,17 +8,16 @@
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
+#include "AnnoyingScalar.h"
+#include "SafeScalar.h"
#include <Eigen/Core>
-template <typename T, int Rows, int Cols>
-void dense_storage_copy()
+template <typename T, int Size, int Rows, int Cols>
+void dense_storage_copy(int rows, int cols)
{
- static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols);
- typedef DenseStorage<T,Size, Rows,Cols, 0> DenseStorageType;
+ typedef DenseStorage<T, Size, Rows, Cols, 0> DenseStorageType;
- const int rows = (Rows==Dynamic) ? 4 : Rows;
- const int cols = (Cols==Dynamic) ? 3 : Cols;
const int size = rows*cols;
DenseStorageType reference(size, rows, cols);
T* raw_reference = reference.data();
@@ -31,14 +30,11 @@
VERIFY_IS_EQUAL(raw_reference[i], raw_copied_reference[i]);
}
-template <typename T, int Rows, int Cols>
-void dense_storage_assignment()
+template <typename T, int Size, int Rows, int Cols>
+void dense_storage_assignment(int rows, int cols)
{
- static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols);
- typedef DenseStorage<T,Size, Rows,Cols, 0> DenseStorageType;
+ typedef DenseStorage<T, Size, Rows, Cols, 0> DenseStorageType;
- const int rows = (Rows==Dynamic) ? 4 : Rows;
- const int cols = (Cols==Dynamic) ? 3 : Cols;
const int size = rows*cols;
DenseStorageType reference(size, rows, cols);
T* raw_reference = reference.data();
@@ -52,6 +48,34 @@
VERIFY_IS_EQUAL(raw_reference[i], raw_copied_reference[i]);
}
+template <typename T, int Size, int Rows, int Cols>
+void dense_storage_swap(int rows0, int cols0, int rows1, int cols1)
+{
+ typedef DenseStorage<T, Size, Rows, Cols, 0> DenseStorageType;
+
+ const int size0 = rows0*cols0;
+ DenseStorageType a(size0, rows0, cols0);
+ for (int i=0; i<size0; ++i) {
+ a.data()[i] = static_cast<T>(i);
+ }
+
+ const int size1 = rows1*cols1;
+ DenseStorageType b(size1, rows1, cols1);
+ for (int i=0; i<size1; ++i) {
+ b.data()[i] = static_cast<T>(-i);
+ }
+
+ a.swap(b);
+
+ for (int i=0; i<size0; ++i) {
+ VERIFY_IS_EQUAL(b.data()[i], static_cast<T>(i));
+ }
+
+ for (int i=0; i<size1; ++i) {
+ VERIFY_IS_EQUAL(a.data()[i], static_cast<T>(-i));
+ }
+}
+
template<typename T, int Size, std::size_t Alignment>
void dense_storage_alignment()
{
@@ -78,30 +102,78 @@
#endif
}
+template<typename T>
+void dense_storage_tests() {
+ // Dynamic Storage.
+ dense_storage_copy<T,Dynamic,Dynamic,Dynamic>(4, 3);
+ dense_storage_copy<T,Dynamic,Dynamic,3>(4, 3);
+ dense_storage_copy<T,Dynamic,4,Dynamic>(4, 3);
+ // Fixed Storage.
+ dense_storage_copy<T,12,4,3>(4, 3);
+ dense_storage_copy<T,12,Dynamic,Dynamic>(4, 3);
+ dense_storage_copy<T,12,4,Dynamic>(4, 3);
+ dense_storage_copy<T,12,Dynamic,3>(4, 3);
+ // Fixed Storage with Uninitialized Elements.
+ dense_storage_copy<T,18,Dynamic,Dynamic>(4, 3);
+ dense_storage_copy<T,18,4,Dynamic>(4, 3);
+ dense_storage_copy<T,18,Dynamic,3>(4, 3);
+
+ // Dynamic Storage.
+ dense_storage_assignment<T,Dynamic,Dynamic,Dynamic>(4, 3);
+ dense_storage_assignment<T,Dynamic,Dynamic,3>(4, 3);
+ dense_storage_assignment<T,Dynamic,4,Dynamic>(4, 3);
+ // Fixed Storage.
+ dense_storage_assignment<T,12,4,3>(4, 3);
+ dense_storage_assignment<T,12,Dynamic,Dynamic>(4, 3);
+ dense_storage_assignment<T,12,4,Dynamic>(4, 3);
+ dense_storage_assignment<T,12,Dynamic,3>(4, 3);
+ // Fixed Storage with Uninitialized Elements.
+ dense_storage_assignment<T,18,Dynamic,Dynamic>(4, 3);
+ dense_storage_assignment<T,18,4,Dynamic>(4, 3);
+ dense_storage_assignment<T,18,Dynamic,3>(4, 3);
+
+ // Dynamic Storage.
+ dense_storage_swap<T,Dynamic,Dynamic,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,Dynamic,Dynamic,Dynamic>(4, 3, 2, 1);
+ dense_storage_swap<T,Dynamic,Dynamic,Dynamic>(2, 1, 4, 3);
+ dense_storage_swap<T,Dynamic,Dynamic,3>(4, 3, 4, 3);
+ dense_storage_swap<T,Dynamic,Dynamic,3>(4, 3, 2, 3);
+ dense_storage_swap<T,Dynamic,Dynamic,3>(2, 3, 4, 3);
+ dense_storage_swap<T,Dynamic,4,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,Dynamic,4,Dynamic>(4, 3, 4, 1);
+ dense_storage_swap<T,Dynamic,4,Dynamic>(4, 1, 4, 3);
+ // Fixed Storage.
+ dense_storage_swap<T,12,4,3>(4, 3, 4, 3);
+ dense_storage_swap<T,12,Dynamic,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,12,Dynamic,Dynamic>(4, 3, 2, 1);
+ dense_storage_swap<T,12,Dynamic,Dynamic>(2, 1, 4, 3);
+ dense_storage_swap<T,12,4,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,12,4,Dynamic>(4, 3, 4, 1);
+ dense_storage_swap<T,12,4,Dynamic>(4, 1, 4, 3);
+ dense_storage_swap<T,12,Dynamic,3>(4, 3, 4, 3);
+ dense_storage_swap<T,12,Dynamic,3>(4, 3, 2, 3);
+ dense_storage_swap<T,12,Dynamic,3>(2, 3, 4, 3);
+ // Fixed Storage with Uninitialized Elements.
+ dense_storage_swap<T,18,Dynamic,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,18,Dynamic,Dynamic>(4, 3, 2, 1);
+ dense_storage_swap<T,18,Dynamic,Dynamic>(2, 1, 4, 3);
+ dense_storage_swap<T,18,4,Dynamic>(4, 3, 4, 3);
+ dense_storage_swap<T,18,4,Dynamic>(4, 3, 4, 1);
+ dense_storage_swap<T,18,4,Dynamic>(4, 1, 4, 3);
+ dense_storage_swap<T,18,Dynamic,3>(4, 3, 4, 3);
+ dense_storage_swap<T,18,Dynamic,3>(4, 3, 2, 3);
+ dense_storage_swap<T,18,Dynamic,3>(2, 3, 4, 3);
+
+ dense_storage_alignment<T,16,8>();
+ dense_storage_alignment<T,16,16>();
+ dense_storage_alignment<T,16,32>();
+ dense_storage_alignment<T,16,64>();
+}
+
EIGEN_DECLARE_TEST(dense_storage)
{
- dense_storage_copy<int,Dynamic,Dynamic>();
- dense_storage_copy<int,Dynamic,3>();
- dense_storage_copy<int,4,Dynamic>();
- dense_storage_copy<int,4,3>();
-
- dense_storage_copy<float,Dynamic,Dynamic>();
- dense_storage_copy<float,Dynamic,3>();
- dense_storage_copy<float,4,Dynamic>();
- dense_storage_copy<float,4,3>();
-
- dense_storage_assignment<int,Dynamic,Dynamic>();
- dense_storage_assignment<int,Dynamic,3>();
- dense_storage_assignment<int,4,Dynamic>();
- dense_storage_assignment<int,4,3>();
-
- dense_storage_assignment<float,Dynamic,Dynamic>();
- dense_storage_assignment<float,Dynamic,3>();
- dense_storage_assignment<float,4,Dynamic>();
- dense_storage_assignment<float,4,3>();
-
- dense_storage_alignment<float,16,8>();
- dense_storage_alignment<float,16,16>();
- dense_storage_alignment<float,16,32>();
- dense_storage_alignment<float,16,64>();
+ dense_storage_tests<int>();
+ dense_storage_tests<float>();
+ dense_storage_tests<SafeScalar<float> >();
+ dense_storage_tests<AnnoyingScalar>();
}
diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp
index 65b80c3..0fb2f4d 100644
--- a/test/eigensolver_selfadjoint.cpp
+++ b/test/eigensolver_selfadjoint.cpp
@@ -234,15 +234,21 @@
{
int s = 0;
for(int i = 0; i < g_repeat; i++) {
+
// trivial test for 1x1 matrices:
CALL_SUBTEST_1( selfadjointeigensolver(Matrix<float, 1, 1>()));
CALL_SUBTEST_1( selfadjointeigensolver(Matrix<double, 1, 1>()));
+ CALL_SUBTEST_1( selfadjointeigensolver(Matrix<std::complex<double>, 1, 1>()));
+
// very important to test 3x3 and 2x2 matrices since we provide special paths for them
CALL_SUBTEST_12( selfadjointeigensolver(Matrix2f()) );
CALL_SUBTEST_12( selfadjointeigensolver(Matrix2d()) );
+ CALL_SUBTEST_12( selfadjointeigensolver(Matrix2cd()) );
CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) );
CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) );
+ CALL_SUBTEST_13( selfadjointeigensolver(Matrix3cd()) );
CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) );
+ CALL_SUBTEST_2( selfadjointeigensolver(Matrix4cd()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) );
@@ -254,6 +260,8 @@
// some trivial but implementation-wise tricky cases
CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) );
CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(2,2)) );
+ CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(1,1)) );
+ CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(2,2)) );
CALL_SUBTEST_6( selfadjointeigensolver(Matrix<double,1,1>()) );
CALL_SUBTEST_7( selfadjointeigensolver(Matrix<double,2,2>()) );
}
diff --git a/test/numext.cpp b/test/numext.cpp
index cf1ca17..8a2fde5 100644
--- a/test/numext.cpp
+++ b/test/numext.cpp
@@ -62,6 +62,20 @@
}
template<typename T>
+void check_arg() {
+ typedef typename NumTraits<T>::Real Real;
+ VERIFY_IS_EQUAL(numext::abs(T(0)), T(0));
+ VERIFY_IS_EQUAL(numext::abs(T(1)), T(1));
+
+ for(int k=0; k<100; ++k)
+ {
+ T x = internal::random<T>();
+ Real y = numext::arg(x);
+ VERIFY_IS_APPROX( y, std::arg(x) );
+ }
+}
+
+template<typename T>
struct check_sqrt_impl {
static void run() {
for (int i=0; i<1000; ++i) {
@@ -242,10 +256,12 @@
CALL_SUBTEST( check_abs<float>() );
CALL_SUBTEST( check_abs<double>() );
CALL_SUBTEST( check_abs<long double>() );
-
CALL_SUBTEST( check_abs<std::complex<float> >() );
CALL_SUBTEST( check_abs<std::complex<double> >() );
+ CALL_SUBTEST( check_arg<std::complex<float> >() );
+ CALL_SUBTEST( check_arg<std::complex<double> >() );
+
CALL_SUBTEST( check_sqrt<float>() );
CALL_SUBTEST( check_sqrt<double>() );
CALL_SUBTEST( check_sqrt<std::complex<float> >() );
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 67d329a..0bb511d 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -279,10 +279,75 @@
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
}
+template <typename Scalar, typename Packet>
+void packetmath_boolean_mask_ops_real() {
+ const int PacketSize = internal::unpacket_traits<Packet>::size;
+ const int size = 2 * PacketSize;
+ EIGEN_ALIGN_MAX Scalar data1[size];
+ EIGEN_ALIGN_MAX Scalar data2[size];
+ EIGEN_ALIGN_MAX Scalar ref[size];
+
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = internal::random<Scalar>();
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+
+ CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
+
+ //Test (-0) <=/< (0) for signed operations
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = Scalar(-0.0);
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
+
+ //Test NaN
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = NumTraits<Scalar>::quiet_NaN();
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
+}
+
+template <typename Scalar, typename Packet>
+void packetmath_boolean_mask_ops_notcomplex() {
+ const int PacketSize = internal::unpacket_traits<Packet>::size;
+ const int size = 2 * PacketSize;
+ EIGEN_ALIGN_MAX Scalar data1[size];
+ EIGEN_ALIGN_MAX Scalar data2[size];
+ EIGEN_ALIGN_MAX Scalar ref[size];
+
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = internal::random<Scalar>();
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+
+ CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
+ CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
+
+ //Test (-0) <=/< (0) for signed operations
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = Scalar(-0.0);
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
+ CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
+
+ //Test NaN
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = NumTraits<Scalar>::quiet_NaN();
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
+ CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
+}
+
// Packet16b representing bool does not support ptrue, pandnot or pcmp_eq, since the scalar path
// (for some compilers) compute the bitwise and with 0x1 of the results to keep the value in [0,1].
template<>
void packetmath_boolean_mask_ops<bool, internal::packet_traits<bool>::type>() {}
+template<>
+void packetmath_boolean_mask_ops_notcomplex<bool, internal::packet_traits<bool>::type>() {}
template <typename Scalar, typename Packet>
void packetmath_minus_zero_add() {
@@ -574,6 +639,8 @@
CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
+
+ packetmath_boolean_mask_ops_real<Scalar,Packet>();
// Rounding edge cases.
if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) {
@@ -1020,6 +1087,8 @@
CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_nan_min, (internal::pmin<PropagateNaN>));
CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_nan_max, internal::pmax<PropagateNaN>);
}
+
+ packetmath_boolean_mask_ops_notcomplex<Scalar, Packet>();
}
template <typename Scalar, typename Packet, bool ConjLhs, bool ConjRhs>
diff --git a/test/rvalue_types.cpp b/test/rvalue_types.cpp
index c20a32f..2c9999c 100644
--- a/test/rvalue_types.cpp
+++ b/test/rvalue_types.cpp
@@ -13,41 +13,12 @@
#if EIGEN_HAS_CXX11
#include "MovableScalar.h"
#endif
+#include "SafeScalar.h"
#include <Eigen/Core>
using internal::UIntPtr;
-// A Scalar that asserts for uninitialized access.
-template<typename T>
-class SafeScalar {
- public:
- SafeScalar() : initialized_(false) {}
- SafeScalar(const SafeScalar& other) {
- *this = other;
- }
- SafeScalar& operator=(const SafeScalar& other) {
- val_ = T(other);
- initialized_ = true;
- return *this;
- }
-
- SafeScalar(T val) : val_(val), initialized_(true) {}
- SafeScalar& operator=(T val) {
- val_ = val;
- initialized_ = true;
- }
-
- operator T() const {
- VERIFY(initialized_ && "Uninitialized access.");
- return val_;
- }
-
- private:
- T val_;
- bool initialized_;
-};
-
#if EIGEN_HAS_RVALUE_REFERENCES
template <typename MatrixType>
void rvalue_copyassign(const MatrixType& m)
diff --git a/test/sparse.h b/test/sparse.h
index df471b4..6cd07fc 100644
--- a/test/sparse.h
+++ b/test/sparse.h
@@ -29,10 +29,6 @@
#endif
-#ifdef EIGEN_GOOGLEHASH_SUPPORT
- #include <google/sparse_hash_map>
-#endif
-
#include <Eigen/Cholesky>
#include <Eigen/LU>
#include <Eigen/Sparse>
diff --git a/test/stl_iterators.cpp b/test/stl_iterators.cpp
index 997f801..72bbf82 100644
--- a/test/stl_iterators.cpp
+++ b/test/stl_iterators.cpp
@@ -7,9 +7,9 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#include "main.h"
#include <iterator>
#include <numeric>
-#include "main.h"
template< class Iterator >
std::reverse_iterator<Iterator>
@@ -47,6 +47,18 @@
template<typename XprType>
bool is_generic_randaccess_stl_iterator(const internal::generic_randaccess_stl_iterator<XprType> &) { return true; }
+template<typename Iter>
+bool is_default_constructible_and_assignable(const Iter& it)
+{
+#if EIGEN_HAS_CXX11
+ VERIFY(std::is_default_constructible<Iter>::value);
+ VERIFY(std::is_nothrow_default_constructible<Iter>::value);
+#endif
+ Iter it2;
+ it2 = it;
+ return (it==it2);
+}
+
template<typename Xpr>
void check_begin_end_for_loop(Xpr xpr)
{
@@ -124,6 +136,22 @@
Index i, j;
+ // Verify that iterators are default constructible (See bug #1900)
+ {
+ VERIFY( is_default_constructible_and_assignable(v.begin()));
+ VERIFY( is_default_constructible_and_assignable(v.end()));
+ VERIFY( is_default_constructible_and_assignable(cv.begin()));
+ VERIFY( is_default_constructible_and_assignable(cv.end()));
+
+ VERIFY( is_default_constructible_and_assignable(A.row(0).begin()));
+ VERIFY( is_default_constructible_and_assignable(A.row(0).end()));
+ VERIFY( is_default_constructible_and_assignable(cA.row(0).begin()));
+ VERIFY( is_default_constructible_and_assignable(cA.row(0).end()));
+
+ VERIFY( is_default_constructible_and_assignable(B.row(0).begin()));
+ VERIFY( is_default_constructible_and_assignable(B.row(0).end()));
+ }
+
// Check we got a fast pointer-based iterator when expected
{
VERIFY( is_pointer_based_stl_iterator(v.begin()) );
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index d73c600..0938bb5 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -41,14 +41,6 @@
#include <random>
#include <thread>
-#ifdef _WIN32
-#include <windows.h>
-#elif defined(__APPLE__)
-#include <mach/mach_time.h>
-#else
-#include <time.h>
-#endif
-
#if defined(EIGEN_USE_THREADS) || defined(EIGEN_USE_SYCL)
#include "ThreadPool"
#endif
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
index 9422dcd..ec2e3cb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h
@@ -42,51 +42,84 @@
virtual unsigned int* semaphore() const = 0;
};
-static gpuDeviceProp_t* m_deviceProperties;
-static bool m_devicePropInitialized = false;
+class GpuDeviceProperties {
+ public:
+ GpuDeviceProperties() :
+ initialized_(false), first_(true), device_properties_(nullptr) {}
+
+ ~GpuDeviceProperties() {
+ if (device_properties_) {
+ delete[] device_properties_;
+ }
+ }
+
+ EIGEN_STRONG_INLINE const gpuDeviceProp_t& get(int device) const {
+ return device_properties_[device];
+ }
-static void initializeDeviceProp() {
- if (!m_devicePropInitialized) {
- // Attempts to ensure proper behavior in the case of multiple threads
- // calling this function simultaneously. This would be trivial to
- // implement if we could use std::mutex, but unfortunately mutex don't
- // compile with nvcc, so we resort to atomics and thread fences instead.
- // Note that if the caller uses a compiler that doesn't support c++11 we
- // can't ensure that the initialization is thread safe.
- static std::atomic<bool> first(true);
- if (first.exchange(false)) {
- // We're the first thread to reach this point.
- int num_devices;
- gpuError_t status = gpuGetDeviceCount(&num_devices);
- if (status != gpuSuccess) {
- std::cerr << "Failed to get the number of GPU devices: "
- << gpuGetErrorString(status)
- << std::endl;
- gpu_assert(status == gpuSuccess);
- }
- m_deviceProperties = new gpuDeviceProp_t[num_devices];
- for (int i = 0; i < num_devices; ++i) {
- status = gpuGetDeviceProperties(&m_deviceProperties[i], i);
+ EIGEN_STRONG_INLINE bool isInitialized() const {
+ return initialized_;
+ }
+
+ void initialize() {
+ if (!initialized_) {
+ // Attempts to ensure proper behavior in the case of multiple threads
+ // calling this function simultaneously. This would be trivial to
+ // implement if we could use std::mutex, but unfortunately mutex don't
+ // compile with nvcc, so we resort to atomics and thread fences instead.
+ // Note that if the caller uses a compiler that doesn't support c++11 we
+ // can't ensure that the initialization is thread safe.
+ if (first_.exchange(false)) {
+ // We're the first thread to reach this point.
+ int num_devices;
+ gpuError_t status = gpuGetDeviceCount(&num_devices);
if (status != gpuSuccess) {
- std::cerr << "Failed to initialize GPU device #"
- << i
- << ": "
+ std::cerr << "Failed to get the number of GPU devices: "
<< gpuGetErrorString(status)
<< std::endl;
gpu_assert(status == gpuSuccess);
}
- }
+ device_properties_ = new gpuDeviceProp_t[num_devices];
+ for (int i = 0; i < num_devices; ++i) {
+ status = gpuGetDeviceProperties(&device_properties_[i], i);
+ if (status != gpuSuccess) {
+ std::cerr << "Failed to initialize GPU device #"
+ << i
+ << ": "
+ << gpuGetErrorString(status)
+ << std::endl;
+ gpu_assert(status == gpuSuccess);
+ }
+ }
- std::atomic_thread_fence(std::memory_order_release);
- m_devicePropInitialized = true;
- } else {
- // Wait for the other thread to inititialize the properties.
- while (!m_devicePropInitialized) {
- std::atomic_thread_fence(std::memory_order_acquire);
- std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+ std::atomic_thread_fence(std::memory_order_release);
+ initialized_ = true;
+ } else {
+ // Wait for the other thread to inititialize the properties.
+ while (!initialized_) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+ }
}
}
}
+
+ private:
+ volatile bool initialized_;
+ std::atomic<bool> first_;
+ gpuDeviceProp_t* device_properties_;
+};
+
+EIGEN_ALWAYS_INLINE const GpuDeviceProperties& GetGpuDeviceProperties() {
+ static GpuDeviceProperties* deviceProperties = new GpuDeviceProperties();
+ if (!deviceProperties->isInitialized()) {
+ deviceProperties->initialize();
+ }
+ return *deviceProperties;
+}
+
+EIGEN_ALWAYS_INLINE const gpuDeviceProp_t& GetGpuDeviceProperties(int device) {
+ return GetGpuDeviceProperties().get(device);
}
static const gpuStream_t default_stream = gpuStreamDefault;
@@ -96,12 +129,9 @@
// Use the default stream on the current device
GpuStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
gpuGetDevice(&device_);
- initializeDeviceProp();
}
// Use the default stream on the specified device
- GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {
- initializeDeviceProp();
- }
+ GpuStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {}
// Use the specified stream. Note that it's the
// caller responsibility to ensure that the stream can run on
// the specified device. If no device is specified the code
@@ -118,7 +148,6 @@
gpu_assert(device < num_devices);
device_ = device;
}
- initializeDeviceProp();
}
virtual ~GpuStreamDevice() {
@@ -129,7 +158,7 @@
const gpuStream_t& stream() const { return *stream_; }
const gpuDeviceProp_t& deviceProperties() const {
- return m_deviceProperties[device_];
+ return GetGpuDeviceProperties(device_);
}
virtual void* allocate(size_t num_bytes) const {
gpuError_t err = gpuSetDevice(device_);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 132458a..f0f1e83 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -466,7 +466,7 @@
template <typename Dims1, typename Dims2, ptrdiff_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
- return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
+ return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &&
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
index 13450e1..37c1d1c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
@@ -21,47 +21,11 @@
// We don't support 3d kernels since we currently only use 1 and
// 2d kernels.
gpu_assert(threadIdx.z == 0);
- return clock64() +
- blockIdx.x * blockDim.x + threadIdx.x +
- gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
-
-#elif defined _WIN32
- // Use the current time as a baseline.
- SYSTEMTIME st;
- GetSystemTime(&st);
- int time = st.wSecond + 1000 * st.wMilliseconds;
- // Mix in a random number to make sure that we get different seeds if
- // we try to generate seeds faster than the clock resolution.
- // We need 2 random values since the generator only generate 16 bits at
- // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
- int rnd1 = ::rand();
- int rnd2 = ::rand();
- uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
- return rnd;
-
-#elif defined __APPLE__
- // Same approach as for win32, except that the random number generator
- // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
- uint64_t rnd = ::random() ^ mach_absolute_time();
- return rnd;
-
-#elif defined __native_client__
- // Same approach as for win32, except using clock_gettime
- timespec ts;
- clock_gettime(CLOCK_REALTIME, &ts);
- int rnd1 = ::rand();
- int rnd2 = ::rand();
- uint64_t rnd = (rnd1 | rnd2 << 16) ^ ts.tv_nsec;
- return rnd;
-
+ return blockIdx.x * blockDim.x + threadIdx.x
+ + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
#else
- // Augment the current time with pseudo random number generation
- // to ensure that we get different seeds if we try to generate seeds
- // faster than the clock resolution.
- timespec ts;
- clock_gettime(CLOCK_REALTIME, &ts);
- uint64_t rnd = ::random() ^ ts.tv_nsec;
- return rnd;
+ // Rely on Eigen's random implementation.
+ return random<uint64_t>();
#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 98c8250..a06c4a9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -357,8 +357,8 @@
}
-template <typename Self, typename Reducer>
-struct ScanLauncher<Self, Reducer, GpuDevice, false> {
+template <typename Self, typename Reducer, bool Vectorize>
+struct ScanLauncher<Self, Reducer, GpuDevice, Vectorize> {
void operator()(const Self& self, typename Self::CoeffReturnType* data) {
Index total_size = internal::array_prod(self.dimensions());
Index num_blocks = (total_size / self.size() + 63) / 64;
diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers
index 8930a74..38e09fd 100644
--- a/unsupported/Eigen/IterativeSolvers
+++ b/unsupported/Eigen/IterativeSolvers
@@ -14,6 +14,7 @@
#include "../../Eigen/Jacobi"
#include "../../Eigen/Householder"
+
/**
* \defgroup IterativeLinearSolvers_Module Iterative solvers module
* This module aims to provide various iterative linear and non linear solver algorithms.
@@ -23,11 +24,12 @@
* - an IDR(s) implementation
* - a DGMRES implementation
* - a MINRES implementation
+ *
* \code
* #include <unsupported/Eigen/IterativeSolvers>
* \endcode
*/
-//@{
+
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
@@ -40,6 +42,5 @@
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
-//@}
#endif // EIGEN_ITERATIVE_SOLVERS_MODULE_H
diff --git a/unsupported/Eigen/SparseExtra b/unsupported/Eigen/SparseExtra
index 819cffa..ba5cbd6 100644
--- a/unsupported/Eigen/SparseExtra
+++ b/unsupported/Eigen/SparseExtra
@@ -24,6 +24,7 @@
#ifdef EIGEN_GOOGLEHASH_SUPPORT
#include <google/dense_hash_map>
+ #include <google/sparse_hash_map>
#endif
/**
diff --git a/unsupported/Eigen/src/SparseExtra/RandomSetter.h b/unsupported/Eigen/src/SparseExtra/RandomSetter.h
index 7542cf7..985702b 100644
--- a/unsupported/Eigen/src/SparseExtra/RandomSetter.h
+++ b/unsupported/Eigen/src/SparseExtra/RandomSetter.h
@@ -10,7 +10,13 @@
#ifndef EIGEN_RANDOMSETTER_H
#define EIGEN_RANDOMSETTER_H
-namespace Eigen {
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
+// Ensure the ::google namespace exists, required for checking existence of
+// ::google::dense_hash_map and ::google::sparse_hash_map.
+namespace google {}
+#endif
+
+namespace Eigen {
/** Represents a std::map
*
@@ -56,7 +62,26 @@
};
#endif // EIGEN_UNORDERED_MAP_SUPPORT
-#ifdef _DENSE_HASH_MAP_H_
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
+
+namespace google {
+
+// Namespace work-around, since sometimes dense_hash_map and sparse_hash_map
+// are in the global namespace, and other times they are under ::google.
+using namespace ::google;
+
+template<typename KeyType, typename Scalar>
+struct DenseHashMap {
+ typedef dense_hash_map<KeyType, Scalar> type;
+};
+
+template<typename KeyType, typename Scalar>
+struct SparseHashMap {
+ typedef sparse_hash_map<KeyType, Scalar> type;
+};
+
+} // namespace google
+
/** Represents a google::dense_hash_map
*
* \see RandomSetter
@@ -64,7 +89,7 @@
template<typename Scalar> struct GoogleDenseHashMapTraits
{
typedef int KeyType;
- typedef google::dense_hash_map<KeyType,Scalar> Type;
+ typedef typename google::DenseHashMap<KeyType,Scalar>::type Type;
enum {
IsSorted = 0
};
@@ -72,9 +97,7 @@
static void setInvalidKey(Type& map, const KeyType& k)
{ map.set_empty_key(k); }
};
-#endif
-#ifdef _SPARSE_HASH_MAP_H_
/** Represents a google::sparse_hash_map
*
* \see RandomSetter
@@ -82,7 +105,7 @@
template<typename Scalar> struct GoogleSparseHashMapTraits
{
typedef int KeyType;
- typedef google::sparse_hash_map<KeyType,Scalar> Type;
+ typedef typename google::SparseHashMap<KeyType,Scalar>::type Type;
enum {
IsSorted = 0
};
@@ -134,18 +157,17 @@
* GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits.
*
* For performance and memory consumption reasons it is highly recommended to use one of
- * the Google's hash_map implementation. To enable the support for them, you have two options:
- * - \#include <google/dense_hash_map> yourself \b before Eigen/Sparse header
- * - define EIGEN_GOOGLEHASH_SUPPORT
- * In the later case the inclusion of <google/dense_hash_map> is made for you.
+ * Google's hash_map implementations. To enable the support for them, you must define
+ * EIGEN_GOOGLEHASH_SUPPORT. This will include both <google/dense_hash_map> and
+ * <google/sparse_hash_map> for you.
*
- * \see http://code.google.com/p/google-sparsehash/
+ * \see https://github.com/sparsehash/sparsehash
*/
template<typename SparseMatrixType,
template <typename T> class MapTraits =
-#if defined _DENSE_HASH_MAP_H_
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
GoogleDenseHashMapTraits
-#elif defined _HASH_MAP
+#elif defined(_HASH_MAP)
GnuHashMapTraits
#else
StdMapTraits
diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
index c55676c..062f76e 100644
--- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu
@@ -444,7 +444,7 @@
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1(
d_res_half1, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
d_res_half2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
@@ -461,7 +461,7 @@
Tensor<float, 1> half_prec2(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp
index bc681e3..cdfd10c 100644
--- a/unsupported/test/sparse_extra.cpp
+++ b/unsupported/test/sparse_extra.cpp
@@ -123,10 +123,8 @@
#ifdef EIGEN_UNORDERED_MAP_SUPPORT
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
#endif
- #ifdef _DENSE_HASH_MAP_H_
+ #ifdef EIGEN_GOOGLEHASH_SUPPORT
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
- #endif
- #ifdef _SPARSE_HASH_MAP_H_
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
#endif