Update Eigen to commit:b0f877f8e01e90a5b0f3a79d46ea234899f8b499
CHANGELOG
=========
b0f877f8e - Don'\''t crash on empty tensor contraction.
15fbddaf9 - ASAN fixes for AVX512 GEMM/TRSM
178ef8c97 - qualify non-const symbolic indexed view with is_lvalue
df1049ddf - Small packet math cleanup.
9b48d1021 - Guard all malloc, realloc and free() fonctions with check_that_malloc_is_allowed()
c730290fa - Use the correct truncating intrinsic for double->int casting.
766db0202 - disable raw array indexed view access for 1d arrays
bfbc66e07 - refactor indexedviewmethods, enable non-const ref access with symbolic indices
1a5dfd7c0 - Fix incorrect casting in AVX512DQ path.
a08649994 - Optimize generic_rsqrt_newton_step
b8b8a2614 - Add more missing vectorized casts for int on x86, and remove redundant unit tests
PiperOrigin-RevId: 522181536
Change-Id: I0de49977abd65369d1646642599b61237dae9805
diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h
index f967301..feab3a9 100644
--- a/Eigen/src/Core/IndexedView.h
+++ b/Eigen/src/Core/IndexedView.h
@@ -93,7 +93,6 @@
* - std::vector<int>
* - std::valarray<int>
* - std::array<int>
- * - Plain C arrays: int[N]
* - Eigen::ArrayXi
* - decltype(ArrayXi::LinSpaced(...))
* - Any view/expressions of the previous types
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
index 642e5d6..e5ae03d 100644
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -77,26 +77,29 @@
template <typename Packet, int Steps>
struct generic_rsqrt_newton_step {
static_assert(Steps > 0, "Steps must be at least 1.");
-
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
+ using Scalar = typename unpacket_traits<Packet>::type;
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
run(const Packet& a, const Packet& approx_rsqrt) {
- using Scalar = typename unpacket_traits<Packet>::type;
- const Packet one_point_five = pset1<Packet>(Scalar(1.5));
- const Packet minus_half = pset1<Packet>(Scalar(-0.5));
+ const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
+ const Packet cst_minus_half = pset1<Packet>(Scalar(-1)/Scalar(2));
// Refine the approximation using one Newton-Raphson step:
- // x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n)).
// The approximation is expressed this way to avoid over/under-flows.
- Packet x_newton = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
- for (int step = 1; step < Steps; ++step) {
- x_newton = pmul(x_newton, pmadd(pmul(minus_half, x_newton), pmul(a, x_newton), one_point_five));
+ // x' = x - (x/2) * ( (a*x)*x - 1)
+
+ Packet x = approx_rsqrt;
+ for (int step = 0; step < Steps; ++step) {
+ Packet minushalfx = pmul(cst_minus_half, x);
+ Packet ax = pmul(a, x);
+ Packet ax2m1 = pmadd(ax, x, cst_minus_one);
+ x = pmadd(ax2m1, minushalfx, x);
}
-
- // If approx_rsqrt is 0 or +/-inf, we should return it as is. Note:
- // on intel, approx_rsqrt can be inf for small denormal values.
- const Packet return_approx = por(pcmp_eq(approx_rsqrt, pzero(a)),
- pcmp_eq(pabs(approx_rsqrt), pset1<Packet>(NumTraits<Scalar>::infinity())));
- return pselect(return_approx, approx_rsqrt, x_newton);
+
+ // If x is NaN, then either:
+ // 1) the input is NaN
+ // 2) zero and infinity were multiplied
+ // In either of these cases, return approx_rsqrt
+ return pselect(pisnan(x), approx_rsqrt, x);
}
};
@@ -108,7 +111,6 @@
}
};
-
/** \internal Fast sqrt using Newton-Raphson's method.
Preconditions:
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 3abb5bd..cd90496 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -33,7 +33,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
@@ -201,7 +200,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 2,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index c5e1cc0..af4742b 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -66,7 +66,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
HasCmp = 1,
HasDiv = 1,
@@ -102,7 +101,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
- HasHalfPacket = 1,
HasCmp = 1,
HasDiv = 1,
@@ -128,7 +126,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -172,7 +169,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -873,6 +869,9 @@
template<> EIGEN_STRONG_INLINE Packet8f pselect<Packet8f>(const Packet8f& mask, const Packet8f& a, const Packet8f& b)
{ return _mm256_blendv_ps(b,a,mask); }
+template<> EIGEN_STRONG_INLINE Packet8i pselect<Packet8i>(const Packet8i& mask, const Packet8i& a, const Packet8i& b)
+{ return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(b), _mm256_castsi256_ps(a), _mm256_castsi256_ps(mask))); }
+
template<> EIGEN_STRONG_INLINE Packet4d pselect<Packet4d>(const Packet4d& mask, const Packet4d& a, const Packet4d& b)
{ return _mm256_blendv_pd(b,a,mask); }
diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h
index db19b56..386543e 100644
--- a/Eigen/src/Core/arch/AVX/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -64,7 +64,6 @@
};
#endif // EIGEN_VECTORIZE_AVX512
-
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
return _mm256_cvttps_epi32(a);
}
@@ -77,6 +76,10 @@
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
}
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
+ return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
+}
+
template <>
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
const Packet8f& b) {
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 6d8ee2b..0372e95 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -32,7 +32,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
@@ -185,7 +184,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 4,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/AVX512/GemmKernel.h b/Eigen/src/Core/arch/AVX512/GemmKernel.h
index cb7cfdf..616a058 100644
--- a/Eigen/src/Core/arch/AVX512/GemmKernel.h
+++ b/Eigen/src/Core/arch/AVX512/GemmKernel.h
@@ -641,7 +641,7 @@
}
}
- template <int uk, int max_b_unroll, int a_unroll, int b_unroll, bool ktail, bool fetch_x, bool c_fetch>
+ template <int uk, int max_b_unroll, int a_unroll, int b_unroll, bool ktail, bool fetch_x, bool c_fetch, bool no_a_preload = false>
EIGEN_ALWAYS_INLINE void innerkernel_1uk(const Scalar *&aa, const Scalar *const &ao, const Scalar *const &bo,
Scalar *&co2, int &fetchA_idx, int &fetchB_idx) {
const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
@@ -655,8 +655,8 @@
if (max_b_unroll >= 8)
innerkernel_1pow<uk, 8, 0, um_vecs, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx, fetchB_idx);
- // Load A after pow-loop.
- load_a<0, um_vecs, uk, a_unroll, ktail>(ao);
+ // Load A after pow-loop. Skip this at the end to prevent running over the buffer
+ if (!no_a_preload) load_a<0, um_vecs, uk, a_unroll, ktail>(ao);
}
/* Inner kernel loop structure.
@@ -698,7 +698,7 @@
* bo += b_unroll * kfactor;
*/
- template <int a_unroll, int b_unroll, int k_factor, int max_b_unroll, int max_k_factor, bool c_fetch>
+ template <int a_unroll, int b_unroll, int k_factor, int max_b_unroll, int max_k_factor, bool c_fetch, bool no_a_preload = false>
EIGEN_ALWAYS_INLINE void innerkernel(const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co2) {
int fetchA_idx = 0;
int fetchB_idx = 0;
@@ -707,18 +707,19 @@
const bool ktail = k_factor == 1;
static_assert(k_factor <= 4 && k_factor > 0, "innerkernel maximum k_factor supported is 4");
+ static_assert(no_a_preload == false || (no_a_preload == true && k_factor == 1), "skipping a preload only allowed when k unroll is 1");
if (k_factor > 0)
- innerkernel_1uk<0, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+ innerkernel_1uk<0, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch, no_a_preload>(aa, ao, bo, co2, fetchA_idx,
fetchB_idx);
if (k_factor > 1)
- innerkernel_1uk<1, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+ innerkernel_1uk<1, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch, no_a_preload>(aa, ao, bo, co2, fetchA_idx,
fetchB_idx);
if (k_factor > 2)
- innerkernel_1uk<2, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+ innerkernel_1uk<2, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch, no_a_preload>(aa, ao, bo, co2, fetchA_idx,
fetchB_idx);
if (k_factor > 3)
- innerkernel_1uk<3, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch>(aa, ao, bo, co2, fetchA_idx,
+ innerkernel_1uk<3, max_b_unroll, a_unroll, b_unroll, ktail, fetch_x, c_fetch, no_a_preload>(aa, ao, bo, co2, fetchA_idx,
fetchB_idx);
// Advance A/B pointers after uk-loop.
@@ -729,7 +730,7 @@
template <int a_unroll, int b_unroll, int max_b_unroll>
EIGEN_ALWAYS_INLINE void kloop(const Scalar *&aa, const Scalar *&ao, const Scalar *&bo, Scalar *&co1, Scalar *&co2) {
const int um_vecs = div_up(a_unroll, nelems_in_cache_line);
- if (!use_less_a_regs)
+ if (!use_less_a_regs && k > 1)
a_loads<0, 2, 0, um_vecs, a_unroll>(ao);
else
a_loads<0, 1, 0, um_vecs, a_unroll>(ao);
@@ -743,7 +744,13 @@
// Unrolling k-loop by a factor of 4.
const int max_k_factor = 4;
- Index loop_count = k / max_k_factor;
+ Index kRem = k % max_k_factor;
+ Index k_ = k - kRem;
+ if (k_ >= max_k_factor) {
+ k_ -= max_k_factor;
+ kRem += max_k_factor;
+ }
+ Index loop_count = k_ / max_k_factor;
if (loop_count > 0) {
#ifdef SECOND_FETCH
@@ -771,11 +778,14 @@
}
// k-loop remainder handling.
- loop_count = k % max_k_factor;
- while (loop_count > 0) {
+ loop_count = kRem;
+ while (loop_count > 1) {
innerkernel<a_unroll, b_unroll, 1, max_b_unroll, max_k_factor, 0>(aa, ao, bo, co2);
loop_count--;
}
+ if (loop_count > 0) {
+ innerkernel<a_unroll, b_unroll, 1, max_b_unroll, max_k_factor, 0, true>(aa, ao, bo, co2);
+ }
// Update C matrix.
c_update<max_b_unroll, a_unroll, b_unroll>(co1, co2);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 543f424..129a68c 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -63,7 +63,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -89,7 +88,7 @@
HasCos = EIGEN_FAST_MATH,
HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH,
- HasBlend = 0,
+ HasBlend = 0,
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
@@ -106,13 +105,12 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 1,
HasAbs = 1,
- HasMin = 1,
- HasMax = 1,
- HasConj = 1,
- HasBlend = 0,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 1,
+ HasBlend = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasACos = 1,
@@ -146,7 +144,7 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
+ HasBlend = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasLog = 1,
@@ -168,6 +166,7 @@
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
+ HasBlend = 0,
HasCmp = 1,
HasDiv = 1,
size=16
@@ -455,12 +454,19 @@
EIGEN_DEVICE_FUNC inline Packet16f pselect(const Packet16f& mask,
const Packet16f& a,
const Packet16f& b) {
- __mmask16 mask16 = _mm512_cmp_epi32_mask(
- _mm512_castps_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
+ __mmask16 mask16 = _mm512_cmpeq_epi32_mask(_mm512_castps_si512(mask), _mm512_setzero_epi32());
return _mm512_mask_blend_ps(mask16, a, b);
}
template <>
+EIGEN_DEVICE_FUNC inline Packet16i pselect(const Packet16i& mask,
+ const Packet16i& a,
+ const Packet16i& b) {
+ __mmask16 mask16 = _mm512_cmpeq_epi32_mask(mask, _mm512_setzero_epi32());
+ return _mm512_mask_blend_epi32(mask16, a, b);
+}
+
+template <>
EIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask,
const Packet8d& a,
const Packet8d& b) {
@@ -544,6 +550,7 @@
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
+EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) { return _mm512_inserti32x8(_mm512_castsi256_si512(a), b, 1); }
#else
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
@@ -559,6 +566,9 @@
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
_mm256_castps_si256(b),1));
}
+EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) {
+ return _mm512_inserti64x4(_mm512_castsi256_si512(a), b, 1);
+}
#endif
// Helper function for bit packing snippet of low precision comparison.
@@ -1843,11 +1853,16 @@
}
template <>
-EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,
- const Packet16f& /*thenPacket*/,
- const Packet16f& /*elsePacket*/) {
- eigen_assert(false && "To be implemented");
- return Packet16f();
+EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ifPacket,
+ const Packet16f& thenPacket,
+ const Packet16f& elsePacket) {
+ __mmask16 m = (ifPacket.select[0]) | (ifPacket.select[1] << 1) | (ifPacket.select[2] << 2) |
+ (ifPacket.select[3] << 3) | (ifPacket.select[4] << 4) | (ifPacket.select[5] << 5) |
+ (ifPacket.select[6] << 6) | (ifPacket.select[7] << 7) | (ifPacket.select[8] << 8) |
+ (ifPacket.select[9] << 9) | (ifPacket.select[10] << 10) | (ifPacket.select[11] << 11) |
+ (ifPacket.select[12] << 12) | (ifPacket.select[13] << 13) | (ifPacket.select[14] << 14) |
+ (ifPacket.select[15] << 15);
+ return _mm512_mask_blend_ps(m, elsePacket, thenPacket);
}
template <>
EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket,
@@ -2291,7 +2306,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 1,
HasBlend = 0,
HasInsert = 1,
HasSin = EIGEN_FAST_MATH,
diff --git a/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc b/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
index e137d6a..4c6116c 100644
--- a/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
+++ b/Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc
@@ -299,7 +299,7 @@
* 1-D unroll
* for(startN = 0; startN < endN; startN++)
**/
- template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM>
+ template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadB(
Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
int64_t remM_ = 0) {
@@ -310,12 +310,18 @@
ymm.packet[packetIndexOffset + startN] =
ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB], remMask<EIGEN_AVX_MAX_NUM_ROW>(remM_));
}
- else ymm.packet[packetIndexOffset + startN] = ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB]);
+ else {
+ EIGEN_IF_CONSTEXPR(remN_ == 0) {
+ ymm.packet[packetIndexOffset + startN] = ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB]);
+ }
+ else ymm.packet[packetIndexOffset + startN] =
+ ploadu<vecHalf>((const Scalar *)&B_arr[startN * LDB], remMask<EIGEN_AVX_MAX_NUM_ROW>(remN_));
+ }
- aux_loadB<endN, counter - 1, packetIndexOffset, remM>(B_arr, LDB, ymm, remM_);
+ aux_loadB<endN, counter - 1, packetIndexOffset, remM, remN_>(B_arr, LDB, ymm, remM_);
}
- template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM>
+ template <int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadB(
Scalar *B_arr, int64_t LDB, PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
int64_t remM_ = 0) {
@@ -363,17 +369,17 @@
* 1-D unroll
* for(startN = 0; startN < endN; startN += EIGEN_AVX_MAX_NUM_ROW)
**/
- template <int64_t endN, int64_t counter, bool toTemp, bool remM>
+ template <int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadBBlock(
Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
constexpr int64_t counterReverse = endN - counter;
constexpr int64_t startN = counterReverse;
- transB::template loadB<EIGEN_AVX_MAX_NUM_ROW, startN, false>(&B_temp[startN], LDB_, ymm);
- aux_loadBBlock<endN, counter - EIGEN_AVX_MAX_NUM_ROW, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+ transB::template loadB<EIGEN_AVX_MAX_NUM_ROW, startN, false, (toTemp ? 0 : remN_)>(&B_temp[startN], LDB_, ymm);
+ aux_loadBBlock<endN, counter - EIGEN_AVX_MAX_NUM_ROW, toTemp, remM, remN_>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
}
- template <int64_t endN, int64_t counter, bool toTemp, bool remM>
+ template <int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter <= 0)> aux_loadBBlock(
Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm, int64_t remM_ = 0) {
@@ -424,11 +430,11 @@
* Wrappers for aux_XXXX to hide counter parameter
********************************************************/
- template <int64_t endN, int64_t packetIndexOffset, bool remM>
+ template <int64_t endN, int64_t packetIndexOffset, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE void loadB(Scalar *B_arr, int64_t LDB,
PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
int64_t remM_ = 0) {
- aux_loadB<endN, endN, packetIndexOffset, remM>(B_arr, LDB, ymm, remM_);
+ aux_loadB<endN, endN, packetIndexOffset, remM, remN_>(B_arr, LDB, ymm, remM_);
}
template <int64_t endN, int64_t packetIndexOffset, bool remK, bool remM>
@@ -438,13 +444,13 @@
aux_storeB<endN, endN, packetIndexOffset, remK, remM>(B_arr, LDB, ymm, rem_);
}
- template <int64_t unrollN, bool toTemp, bool remM>
+ template <int64_t unrollN, bool toTemp, bool remM, int64_t remN_ = 0>
static EIGEN_ALWAYS_INLINE void loadBBlock(Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_,
PacketBlock<vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS> &ymm,
int64_t remM_ = 0) {
- EIGEN_IF_CONSTEXPR(toTemp) { transB::template loadB<unrollN, 0, remM>(&B_arr[0], LDB, ymm, remM_); }
+ EIGEN_IF_CONSTEXPR(toTemp) { transB::template loadB<unrollN, 0, remM, 0>(&B_arr[0], LDB, ymm, remM_); }
else {
- aux_loadBBlock<unrollN, unrollN, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+ aux_loadBBlock<unrollN, unrollN, toTemp, remM, remN_>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
}
}
@@ -550,13 +556,13 @@
}
else EIGEN_IF_CONSTEXPR(unrollN == 2) {
// load Lx2 B col major, transpose Lx2 row major
- transB::template loadBBlock<2, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+ transB::template loadBBlock<2, toTemp, remM, 2>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
transB::template transposeLxL<0>(ymm);
transB::template storeBBlock<2, toTemp, remM, 2>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
}
else EIGEN_IF_CONSTEXPR(unrollN == 1) {
// load Lx1 B col major, transpose Lx1 row major
- transB::template loadBBlock<1, toTemp, remM>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
+ transB::template loadBBlock<1, toTemp, remM, 1>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
transB::template transposeLxL<0>(ymm);
transB::template storeBBlock<1, toTemp, remM, 1>(B_arr, LDB, B_temp, LDB_, ymm, remM_);
}
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h
index 60f49a3..02e6335 100644
--- a/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -55,6 +55,10 @@
return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
}
+template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet8d, Packet16i>(const Packet8d& a, const Packet8d& b) {
+ return cat256i(_mm512_cvttpd_epi32(a), _mm512_cvttpd_epi32(b));
+}
+
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
return _mm512_castps_si512(a);
}
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index e448bb6..69cc068 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -91,7 +91,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -360,7 +359,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index e443a63..d477ab7 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -157,7 +157,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
@@ -206,7 +205,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -250,7 +248,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -274,7 +271,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -293,7 +289,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -312,7 +307,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -331,7 +325,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -2710,7 +2703,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 17dd8fb..a04c563 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -46,7 +46,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
- HasHalfPacket = 0,
HasDiv = 1,
HasSin = 0,
@@ -82,7 +81,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
- HasHalfPacket = 0,
HasDiv = 1,
HasLog = 1,
@@ -534,7 +532,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=8,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index b11a9b4..83239c0 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -107,7 +107,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -422,7 +421,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index f03dbed..4e6bcdf 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -80,7 +80,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0, // Packet2f intrinsics not implemented yet
// FIXME check the Has*
HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
@@ -106,7 +105,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0, // Packet2i intrinsics not implemented yet
// FIXME check the Has*
HasDiv = 1,
HasBlend = 1
@@ -850,7 +848,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
// FIXME check the Has*
HasDiv = 1,
HasExp = 1,
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index e436360..97f4116 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -59,7 +59,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
@@ -400,7 +399,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index fac0219..e52e3fb 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -187,7 +187,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -237,7 +236,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -267,7 +265,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 16,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -299,7 +296,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -329,7 +325,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -360,7 +355,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -390,7 +384,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 1,
HasCmp = 1,
HasAdd = 1,
@@ -422,7 +415,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -452,7 +444,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -3410,7 +3401,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -3784,7 +3774,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasCmp = 1,
HasAdd = 1,
@@ -4027,7 +4016,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 8,
- HasHalfPacket = 1,
HasCmp = 1,
HasCast = 1,
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 60308ce..366daa7 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -35,7 +35,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -185,7 +184,6 @@
Vectorizable = 1,
AlignedOnScalar = 0,
size = 1,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 7d608bb..499c16b 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -132,7 +132,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasCmp = 1,
HasDiv = 1,
@@ -171,7 +170,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
- HasHalfPacket = 0,
HasCmp = 1,
HasDiv = 1,
@@ -212,7 +210,6 @@
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
- HasHalfPacket = 0,
size=16,
HasAdd = 1,
@@ -1478,7 +1475,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h
index 2ab0943..df5c72c 100644
--- a/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -27,13 +27,14 @@
};
template <>
-struct type_casting_traits<float, int> {
+struct type_casting_traits<float, double> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
- TgtCoeffRatio = 1
+ TgtCoeffRatio = 2
};
};
+#endif
template <>
struct type_casting_traits<int, float> {
@@ -45,14 +46,22 @@
};
template <>
-struct type_casting_traits<float, double> {
+struct type_casting_traits<float, int> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
- TgtCoeffRatio = 2
+ TgtCoeffRatio = 1
};
};
-#endif
+
+template <>
+struct type_casting_traits<double, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
template <>
struct type_casting_traits<double, float> {
@@ -91,6 +100,12 @@
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
+ return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)),
+ _mm_castsi128_ps(_mm_cvttpd_epi32(b)),
+ (1 << 2) | (1 << 6)));
+}
+
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input
return _mm_cvtps_pd(a);
diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h
index 9c106b3..a2f292f 100644
--- a/Eigen/src/Core/arch/SVE/PacketMath.h
+++ b/Eigen/src/Core/arch/SVE/PacketMath.h
@@ -42,7 +42,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = sve_packet_size_selector<numext::int32_t, EIGEN_ARM64_SVE_VL>::size,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -377,7 +376,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = sve_packet_size_selector<float, EIGEN_ARM64_SVE_VL>::size,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index df5c8d4..7f22e5c 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -53,7 +53,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -78,7 +77,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 1,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 26b6f0d..892e3a1 100644
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -160,7 +160,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -178,7 +177,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
- HasHalfPacket = 0,
HasAdd = 1,
HasSub = 1,
@@ -211,7 +209,6 @@
Vectorizable = 1,
AlignedOnScalar = 1,
size=2,
- HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 13d029c..f4217e2 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -84,6 +84,35 @@
namespace internal {
+/*****************************************************************************
+*** Implementation of portable aligned versions of malloc/free/realloc ***
+*****************************************************************************/
+
+#ifdef EIGEN_NO_MALLOC
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+}
+#elif defined EIGEN_RUNTIME_NO_MALLOC
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+{
+ EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
+ if (update == 1)
+ value = new_value;
+ return value;
+}
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
+}
+#else
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{}
+#endif
+
+
EIGEN_DEVICE_FUNC
inline void throw_std_bad_alloc()
{
@@ -121,7 +150,10 @@
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
eigen_assert(alignment >= sizeof(void*) && alignment <= 128 && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*), less than or equal to 128, and a power of 2");
- void* original = std::malloc(size + alignment);
+
+ check_that_malloc_is_allowed();
+ EIGEN_USING_STD(malloc)
+ void* original = malloc(size + alignment);
if (original == 0) return 0;
uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
@@ -135,7 +167,10 @@
if (ptr) {
uint8_t offset = static_cast<uint8_t>(*(static_cast<uint8_t*>(ptr) - 1));
void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
- std::free(original);
+
+ check_that_malloc_is_allowed();
+ EIGEN_USING_STD(free)
+ free(original);
}
}
@@ -146,11 +181,14 @@
*/
EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
- if (ptr == 0) return handmade_aligned_malloc(new_size, alignment);
+ if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
uint8_t old_offset = *(static_cast<uint8_t*>(ptr) - 1);
void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
- void* original = std::realloc(old_original, new_size + alignment);
- if (original == 0) return 0;
+
+ check_that_malloc_is_allowed();
+ EIGEN_USING_STD(realloc)
+ void* original = realloc(old_original, new_size + alignment);
+ if (original == nullptr) return nullptr;
if (original == old_original) return ptr;
uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
@@ -163,44 +201,17 @@
return aligned;
}
-/*****************************************************************************
-*** Implementation of portable aligned versions of malloc/free/realloc ***
-*****************************************************************************/
-
-#ifdef EIGEN_NO_MALLOC
-EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
-{
- eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
-}
-#elif defined EIGEN_RUNTIME_NO_MALLOC
-EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
-{
- EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
- if (update == 1)
- value = new_value;
- return value;
-}
-EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
-EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
-EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
-{
- eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
-}
-#else
-EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
-{}
-#endif
-
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
* On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
*/
EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
{
- check_that_malloc_is_allowed();
-
+ if (size == 0) return nullptr;
+
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ check_that_malloc_is_allowed();
EIGEN_USING_STD(malloc)
result = malloc(size);
@@ -222,6 +233,8 @@
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ if(ptr)
+ check_that_malloc_is_allowed();
EIGEN_USING_STD(free)
free(ptr);
@@ -237,11 +250,17 @@
*/
EIGEN_DEVICE_FUNC inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
{
- if (ptr == 0) return aligned_malloc(new_size);
+ if (ptr == nullptr) return aligned_malloc(new_size);
+ if (old_size == new_size) return ptr;
+ if (new_size == 0) { aligned_free(ptr); return nullptr; }
+
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
EIGEN_UNUSED_VARIABLE(old_size)
- result = std::realloc(ptr,new_size);
+
+ check_that_malloc_is_allowed();
+ EIGEN_USING_STD(realloc)
+ result = realloc(ptr,new_size);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif
@@ -249,11 +268,6 @@
if (!result && new_size)
throw_std_bad_alloc();
-#ifdef EIGEN_RUNTIME_NO_MALLOC
- if (result != ptr)
- check_that_malloc_is_allowed();
-#endif
-
return result;
}
@@ -271,8 +285,9 @@
template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
{
- check_that_malloc_is_allowed();
+ if (size == 0) return nullptr;
+ check_that_malloc_is_allowed();
EIGEN_USING_STD(malloc)
void *result = malloc(size);
@@ -289,6 +304,8 @@
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
+ if(ptr)
+ check_that_malloc_is_allowed();
EIGEN_USING_STD(free)
free(ptr);
}
@@ -298,9 +315,15 @@
return aligned_realloc(ptr, new_size, old_size);
}
-template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t old_size)
{
- return std::realloc(ptr, new_size);
+ if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
+ if (old_size == new_size) return ptr;
+ if (new_size == 0) { conditional_aligned_free<false>(ptr); return nullptr; }
+
+ check_that_malloc_is_allowed();
+ EIGEN_USING_STD(realloc)
+ return realloc(ptr, new_size);
}
/*****************************************************************************
@@ -424,7 +447,7 @@
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
{
destruct_elements_of_array<T>(ptr, size);
- Eigen::internal::aligned_free(ptr);
+ aligned_free(ptr);
}
/** \internal Deletes objects constructed with conditional_aligned_new
diff --git a/Eigen/src/plugins/IndexedViewMethods.h b/Eigen/src/plugins/IndexedViewMethods.h
index 011fcbe..b796b39 100644
--- a/Eigen/src/plugins/IndexedViewMethods.h
+++ b/Eigen/src/plugins/IndexedViewMethods.h
@@ -9,200 +9,179 @@
#if !defined(EIGEN_PARSED_BY_DOXYGEN)
-// This file is automatically included twice to generate const and non-const versions
-
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#define EIGEN_INDEXED_VIEW_METHOD_CONST const
-#define EIGEN_INDEXED_VIEW_METHOD_TYPE ConstIndexedViewType
-#else
-#define EIGEN_INDEXED_VIEW_METHOD_CONST
-#define EIGEN_INDEXED_VIEW_METHOD_TYPE IndexedViewType
-#endif
-
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
protected:
-
// define some aliases to ease readability
-template<typename Indices>
-struct IvcRowType : public internal::IndexedViewCompatibleType<Indices,RowsAtCompileTime> {};
+template <typename Indices>
+using IvcRowType = typename internal::IndexedViewCompatibleType<Indices, RowsAtCompileTime>::type;
-template<typename Indices>
-struct IvcColType : public internal::IndexedViewCompatibleType<Indices,ColsAtCompileTime> {};
+template <typename Indices>
+using IvcColType = typename internal::IndexedViewCompatibleType<Indices, ColsAtCompileTime>::type;
-template<typename Indices>
-struct IvcType : public internal::IndexedViewCompatibleType<Indices,SizeAtCompileTime> {};
+template <typename Indices>
+using IvcType = typename internal::IndexedViewCompatibleType<Indices, SizeAtCompileTime>::type;
-typedef typename internal::IndexedViewCompatibleType<Index,1>::type IvcIndex;
+typedef typename internal::IndexedViewCompatibleType<Index, 1>::type IvcIndex;
-template<typename Indices>
-typename IvcRowType<Indices>::type
-ivcRow(const Indices& indices) const {
- return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,RowsAtCompileTime>(derived().rows()),Specialized);
+template <typename Indices>
+IvcRowType<Indices> ivcRow(const Indices& indices) const {
+ return internal::makeIndexedViewCompatible(
+ indices, internal::variable_if_dynamic<Index, RowsAtCompileTime>(derived().rows()), Specialized);
}
-template<typename Indices>
-typename IvcColType<Indices>::type
-ivcCol(const Indices& indices) const {
- return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,ColsAtCompileTime>(derived().cols()),Specialized);
+template <typename Indices>
+IvcColType<Indices> ivcCol(const Indices& indices) const {
+ return internal::makeIndexedViewCompatible(
+ indices, internal::variable_if_dynamic<Index, ColsAtCompileTime>(derived().cols()), Specialized);
}
-template<typename Indices>
-typename IvcColType<Indices>::type
-ivcSize(const Indices& indices) const {
- return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,SizeAtCompileTime>(derived().size()),Specialized);
+template <typename Indices>
+IvcColType<Indices> ivcSize(const Indices& indices) const {
+ return internal::makeIndexedViewCompatible(
+ indices, internal::variable_if_dynamic<Index, SizeAtCompileTime>(derived().size()), Specialized);
}
public:
-#endif
+template <typename RowIndices, typename ColIndices>
+using IndexedViewType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
-template<typename RowIndices, typename ColIndices>
-struct EIGEN_INDEXED_VIEW_METHOD_TYPE {
- typedef IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,
- typename IvcRowType<RowIndices>::type,
- typename IvcColType<ColIndices>::type> type;
-};
+template <typename RowIndices, typename ColIndices>
+using ConstIndexedViewType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
// This is the generic version
-template<typename RowIndices, typename ColIndices>
-std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
- && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsIndexedView,
- typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type
- (derived(), ivcRow(rowIndices), ivcCol(colIndices));
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<IndexedViewType<RowIndices, ColIndices>>::ReturnAsIndexedView,
+ IndexedViewType<RowIndices, ColIndices>>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) {
+ return IndexedViewType<RowIndices, ColIndices>(derived(), ivcRow(rowIndices), ivcCol(colIndices));
+}
+
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<ConstIndexedViewType<RowIndices, ColIndices>>::ReturnAsIndexedView,
+ ConstIndexedViewType<RowIndices, ColIndices>>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) const {
+ return ConstIndexedViewType<RowIndices, ColIndices>(derived(), ivcRow(rowIndices), ivcCol(colIndices));
}
// The following overload returns a Block<> object
-template<typename RowIndices, typename ColIndices>
-std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
- && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsBlock,
- typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- typedef typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType BlockType;
- typename IvcRowType<RowIndices>::type actualRowIndices = ivcRow(rowIndices);
- typename IvcColType<ColIndices>::type actualColIndices = ivcCol(colIndices);
- return BlockType(derived(),
- internal::first(actualRowIndices),
- internal::first(actualColIndices),
- internal::index_list_size(actualRowIndices),
- internal::index_list_size(actualColIndices));
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<IndexedViewType<RowIndices, ColIndices>>::ReturnAsBlock,
+ typename internal::traits<IndexedViewType<RowIndices, ColIndices>>::BlockType>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) {
+ typedef typename internal::traits<IndexedViewType<RowIndices, ColIndices>>::BlockType BlockType;
+ IvcRowType<RowIndices> actualRowIndices = ivcRow(rowIndices);
+ IvcColType<ColIndices> actualColIndices = ivcCol(colIndices);
+ return BlockType(derived(), internal::first(actualRowIndices), internal::first(actualColIndices),
+ internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
+}
+
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<ConstIndexedViewType<RowIndices, ColIndices>>::ReturnAsBlock,
+ typename internal::traits<ConstIndexedViewType<RowIndices, ColIndices>>::BlockType>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) const {
+ typedef typename internal::traits<ConstIndexedViewType<RowIndices, ColIndices>>::BlockType BlockType;
+ IvcRowType<RowIndices> actualRowIndices = ivcRow(rowIndices);
+ IvcColType<ColIndices> actualColIndices = ivcCol(colIndices);
+ return BlockType(derived(), internal::first(actualRowIndices), internal::first(actualColIndices),
+ internal::index_list_size(actualRowIndices), internal::index_list_size(actualColIndices));
}
// The following overload returns a Scalar
-template<typename RowIndices, typename ColIndices>
-std::enable_if_t<internal::valid_indexed_view_overload<RowIndices,ColIndices>::value
- && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsScalar,
- CoeffReturnType >
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols()));
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<IndexedViewType<RowIndices, ColIndices>>::ReturnAsScalar && internal::is_lvalue<Derived>::value,
+ Scalar&>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) {
+ return Base::operator()(internal::eval_expr_given_size(rowIndices, rows()),
+ internal::eval_expr_given_size(colIndices, cols()));
}
-// The following three overloads are needed to handle raw Index[N] arrays.
-
-template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndices>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>
-operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>
- (derived(), rowIndices, ivcCol(colIndices));
+template <typename RowIndices, typename ColIndices>
+std::enable_if_t<internal::valid_indexed_view_overload<RowIndices, ColIndices>::value &&
+ internal::traits<ConstIndexedViewType<RowIndices, ColIndices>>::ReturnAsScalar,
+ CoeffReturnType>
+operator()(const RowIndices& rowIndices, const ColIndices& colIndices) const {
+ return Base::operator()(internal::eval_expr_given_size(rowIndices, rows()),
+ internal::eval_expr_given_size(colIndices, cols()));
}
-template<typename RowIndices, typename ColIndicesT, std::size_t ColIndicesN>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type, const ColIndicesT (&)[ColIndicesN]>
-operator()(const RowIndices& rowIndices, const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type,const ColIndicesT (&)[ColIndicesN]>
- (derived(), ivcRow(rowIndices), colIndices);
-}
-
-template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndicesT, std::size_t ColIndicesN>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN], const ColIndicesT (&)[ColIndicesN]>
-operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],const ColIndicesT (&)[ColIndicesN]>
- (derived(), rowIndices, colIndices);
-}
-
-
// Overloads for 1D vectors/arrays
-template<typename Indices>
-std::enable_if_t<
- IsRowMajor && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),
- IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
+template <typename Indices>
+std::enable_if_t<IsRowMajor && (!(internal::get_compile_time_incr<IvcType<Indices>>::value == 1 ||
+ internal::is_valid_index_type<Indices>::value)),
+ IndexedView<Derived, IvcIndex, IvcType<Indices>>>
+operator()(const Indices& indices) {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type>
- (derived(), IvcIndex(0), ivcCol(indices));
+ return IndexedView<Derived, IvcIndex, IvcType<Indices>>(derived(), IvcIndex(0), ivcCol(indices));
}
-template<typename Indices>
-std::enable_if_t<
- (!IsRowMajor) && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_valid_index_type<Indices>::value)),
- IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
+template <typename Indices>
+std::enable_if_t<IsRowMajor && (!(internal::get_compile_time_incr<IvcType<Indices>>::value == 1 ||
+ internal::is_valid_index_type<Indices>::value)),
+ IndexedView<const Derived, IvcIndex, IvcType<Indices>>>
+operator()(const Indices& indices) const {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex>
- (derived(), ivcRow(indices), IvcIndex(0));
+ return IndexedView<const Derived, IvcIndex, IvcType<Indices>>(derived(), IvcIndex(0), ivcCol(indices));
}
-template<typename Indices>
-std::enable_if_t<
- (internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1) && (!internal::is_valid_index_type<Indices>::value) && (!symbolic::is_symbolic<Indices>::value),
- VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
+template <typename Indices>
+std::enable_if_t<(!IsRowMajor) && (!(internal::get_compile_time_incr<IvcType<Indices>>::value == 1 ||
+ internal::is_valid_index_type<Indices>::value)),
+ IndexedView<Derived, IvcType<Indices>, IvcIndex>>
+operator()(const Indices& indices) {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- typename IvcType<Indices>::type actualIndices = ivcSize(indices);
- return VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value>
- (derived(), internal::first(actualIndices), internal::index_list_size(actualIndices));
+ return IndexedView<Derived, IvcType<Indices>, IvcIndex>(derived(), ivcRow(indices), IvcIndex(0));
}
-template<typename IndexType>
-std::enable_if_t<symbolic::is_symbolic<IndexType>::value, CoeffReturnType >
-operator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
- return Base::operator()(internal::eval_expr_given_size(id,size()));
-}
-
-template<typename IndicesT, std::size_t IndicesN>
-std::enable_if_t<IsRowMajor,
- IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >
-operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
+template <typename Indices>
+std::enable_if_t<(!IsRowMajor) && (!(internal::get_compile_time_incr<IvcType<Indices>>::value == 1 ||
+ internal::is_valid_index_type<Indices>::value)),
+ IndexedView<const Derived, IvcType<Indices>, IvcIndex>>
+operator()(const Indices& indices) const {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]>
- (derived(), IvcIndex(0), indices);
+ return IndexedView<const Derived, IvcType<Indices>, IvcIndex>(derived(), ivcRow(indices), IvcIndex(0));
}
-template<typename IndicesT, std::size_t IndicesN>
-std::enable_if_t<!IsRowMajor,
- IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >
-operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
+template <typename Indices>
+std::enable_if_t<(internal::get_compile_time_incr<IvcType<Indices>>::value == 1) &&
+ (!internal::is_valid_index_type<Indices>::value) && (!symbolic::is_symbolic<Indices>::value),
+ VectorBlock<Derived, internal::array_size<Indices>::value>>
+operator()(const Indices& indices) {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex>
- (derived(), indices, IvcIndex(0));
+ IvcType<Indices> actualIndices = ivcSize(indices);
+ return VectorBlock<Derived, internal::array_size<Indices>::value>(derived(), internal::first(actualIndices),
+ internal::index_list_size(actualIndices));
}
-#undef EIGEN_INDEXED_VIEW_METHOD_CONST
-#undef EIGEN_INDEXED_VIEW_METHOD_TYPE
+template <typename Indices>
+std::enable_if_t<(internal::get_compile_time_incr<IvcType<Indices>>::value == 1) &&
+ (!internal::is_valid_index_type<Indices>::value) && (!symbolic::is_symbolic<Indices>::value),
+ VectorBlock<const Derived, internal::array_size<Indices>::value>>
+operator()(const Indices& indices) const {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ IvcType<Indices> actualIndices = ivcSize(indices);
+ return VectorBlock<const Derived, internal::array_size<Indices>::value>(derived(), internal::first(actualIndices),
+ internal::index_list_size(actualIndices));
+}
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#define EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#include "IndexedViewMethods.h"
-#undef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#endif
+template <typename IndexType>
+std::enable_if_t<symbolic::is_symbolic<IndexType>::value && internal::is_lvalue<Derived>::value, Scalar&> operator()(const IndexType& id) {
+ return Base::operator()(internal::eval_expr_given_size(id, size()));
+}
+
+template <typename IndexType>
+std::enable_if_t<symbolic::is_symbolic<IndexType>::value, CoeffReturnType> operator()(const IndexType& id) const {
+ return Base::operator()(internal::eval_expr_given_size(id, size()));
+}
#else // EIGEN_PARSED_BY_DOXYGEN
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index d149960..84a4767 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp
@@ -289,19 +289,11 @@
VERIFY( (A(all, std::array<int,4>{{1,3,2,4}})).ColsAtCompileTime == 4);
VERIFY_IS_APPROX( (A(std::array<int,3>{{1,3,5}}, std::array<int,4>{{9,6,3,0}})), A(seqN(1,3,2), seqN(9,4,-3)) );
+ VERIFY_IS_EQUAL(A(std::array<int, 3>{1, 3, 5}, std::array<int, 4>{3, 1, 6, 5}).RowsAtCompileTime, 3);
+ VERIFY_IS_EQUAL(A(std::array<int, 3>{1, 3, 5}, std::array<int, 4>{3, 1, 6, 5}).ColsAtCompileTime, 4);
- VERIFY_IS_APPROX( A({3, 1, 6, 5}, all), A(std::array<int,4>{{3, 1, 6, 5}}, all) );
- VERIFY_IS_APPROX( A(all,{3, 1, 6, 5}), A(all,std::array<int,4>{{3, 1, 6, 5}}) );
- VERIFY_IS_APPROX( A({1,3,5},{3, 1, 6, 5}), A(std::array<int,3>{{1,3,5}},std::array<int,4>{{3, 1, 6, 5}}) );
-
- VERIFY_IS_EQUAL( A({1,3,5},{3, 1, 6, 5}).RowsAtCompileTime, 3 );
- VERIFY_IS_EQUAL( A({1,3,5},{3, 1, 6, 5}).ColsAtCompileTime, 4 );
-
- VERIFY_IS_APPROX( a({3, 1, 6, 5}), a(std::array<int,4>{{3, 1, 6, 5}}) );
- VERIFY_IS_EQUAL( a({1,3,5}).SizeAtCompileTime, 3 );
-
- VERIFY_IS_APPROX( b({3, 1, 6, 5}), b(std::array<int,4>{{3, 1, 6, 5}}) );
- VERIFY_IS_EQUAL( b({1,3,5}).SizeAtCompileTime, 3 );
+ VERIFY_IS_EQUAL( a(std::array<int,3>{1,3,5}).SizeAtCompileTime, 3 );
+ VERIFY_IS_EQUAL( b(std::array<int,3>{1,3,5}).SizeAtCompileTime, 3 );
// check mat(i,j) with weird types for i and j
{
@@ -364,6 +356,9 @@
A(X,Y) = 1;
A(XX,Y) = 1;
A(X,YY) = 1;
+ // check symbolic indices
+ a(last) = 1;
+ A(last, last) = 1;
// Check compilation of varying integer types as index types:
Index i = n/2;
diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp
index 689a4cc..4b7934f 100644
--- a/test/nomalloc.cpp
+++ b/test/nomalloc.cpp
@@ -225,4 +225,7 @@
CALL_SUBTEST_6(test_reference(Matrix<float,32,32>()));
CALL_SUBTEST_7(test_reference(R1));
CALL_SUBTEST_8(Ref<MatrixXd> R2 = M1.topRows<2>(); test_reference(R2));
+
+ // freeing is now possible
+ Eigen::internal::set_is_malloc_allowed(true);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 3a917a0..55369e1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -91,7 +91,6 @@
eigen_assert(rhs_block);
BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn);
char* block_mem = static_cast<char*>(d.allocate(sz.lhs_size + sz.rhs_size));
- eigen_assert(block_mem);
*lhs_block = reinterpret_cast<LhsScalar*>(block_mem);
*rhs_block = reinterpret_cast<RhsScalar*>(block_mem + sz.lhs_size);
return block_mem;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index 839ff69..5174d8d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -166,7 +166,6 @@
Vectorizable = 0,
size = 1,
AlignedOnScalar = 0,
- HasHalfPacket = 0
};
enum {
HasAdd = 0,
diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp
index 7b67738..81d81ef 100644
--- a/unsupported/test/cxx11_tensor_casts.cpp
+++ b/unsupported/test/cxx11_tensor_casts.cpp
@@ -15,113 +15,23 @@
using Eigen::Tensor;
using Eigen::array;
-static void test_simple_cast()
-{
- Tensor<float, 2> ftensor(20,30);
- ftensor = ftensor.random() * 100.f;
- Tensor<char, 2> chartensor(20,30);
- chartensor.setRandom();
- Tensor<std::complex<float>, 2> cplextensor(20,30);
- cplextensor.setRandom();
-
- chartensor = ftensor.cast<char>();
- cplextensor = ftensor.cast<std::complex<float> >();
-
- for (int i = 0; i < 20; ++i) {
- for (int j = 0; j < 30; ++j) {
- VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
- VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
- }
- }
-}
-
-
-static void test_vectorized_cast()
-{
- Tensor<int, 2> itensor(20,30);
- itensor = itensor.random() / 1000;
- Tensor<float, 2> ftensor(20,30);
- ftensor.setRandom();
- Tensor<double, 2> dtensor(20,30);
- dtensor.setRandom();
-
- ftensor = itensor.cast<float>();
- dtensor = itensor.cast<double>();
-
- for (int i = 0; i < 20; ++i) {
- for (int j = 0; j < 30; ++j) {
- VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j)));
- VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j)));
- }
- }
-}
-
-
-static void test_float_to_int_cast()
-{
- Tensor<float, 2> ftensor(20,30);
- ftensor = ftensor.random() * 1000.0f;
- Tensor<double, 2> dtensor(20,30);
- dtensor = dtensor.random() * 1000.0;
-
- Tensor<int, 2> i1tensor = ftensor.cast<int>();
- Tensor<int, 2> i2tensor = dtensor.cast<int>();
-
- for (int i = 0; i < 20; ++i) {
- for (int j = 0; j < 30; ++j) {
- VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j)));
- VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j)));
- }
- }
-}
-
-
-static void test_big_to_small_type_cast()
-{
- Tensor<double, 2> dtensor(20, 30);
- dtensor.setRandom();
- Tensor<float, 2> ftensor(20, 30);
- ftensor = dtensor.cast<float>();
-
- for (int i = 0; i < 20; ++i) {
- for (int j = 0; j < 30; ++j) {
- VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
- }
- }
-}
-
-
-static void test_small_to_big_type_cast()
-{
- Tensor<float, 2> ftensor(20, 30);
- ftensor.setRandom();
- Tensor<double, 2> dtensor(20, 30);
- dtensor = ftensor.cast<double>();
-
- for (int i = 0; i < 20; ++i) {
- for (int j = 0; j < 30; ++j) {
- VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
- }
- }
-}
-
template <typename FromType, typename ToType>
static void test_type_cast() {
- Tensor<FromType, 2> ftensor(100, 200);
+ Tensor<FromType, 2> ftensor(101, 201);
// Generate random values for a valid cast.
- for (int i = 0; i < 100; ++i) {
- for (int j = 0; j < 200; ++j) {
+ for (int i = 0; i < 101; ++i) {
+ for (int j = 0; j < 201; ++j) {
ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value();
}
}
- Tensor<ToType, 2> ttensor(100, 200);
+ Tensor<ToType, 2> ttensor(101, 201);
ttensor = ftensor.template cast<ToType>();
- for (int i = 0; i < 100; ++i) {
- for (int j = 0; j < 200; ++j) {
- const ToType ref = internal::cast<FromType,ToType>(ftensor(i, j));
- VERIFY_IS_APPROX(ttensor(i, j), ref);
+ for (int i = 0; i < 101; ++i) {
+ for (int j = 0; j < 201; ++j) {
+ const ToType ref = static_cast<ToType>(ftensor(i, j));
+ VERIFY_IS_EQUAL(ttensor(i, j), ref);
}
}
}
@@ -161,12 +71,6 @@
EIGEN_DECLARE_TEST(cxx11_tensor_casts)
{
- CALL_SUBTEST(test_simple_cast());
- CALL_SUBTEST(test_vectorized_cast());
- CALL_SUBTEST(test_float_to_int_cast());
- CALL_SUBTEST(test_big_to_small_type_cast());
- CALL_SUBTEST(test_small_to_big_type_cast());
-
CALL_SUBTEST(test_cast_runner<bool>::run());
CALL_SUBTEST(test_cast_runner<int8_t>::run());
CALL_SUBTEST(test_cast_runner<int16_t>::run());