Internal change
PiperOrigin-RevId: 286243430
Change-Id: Idd0c322b8ead5effc67df51503b604204cf049b9
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index dc9af3a..35a8074 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -73,14 +73,65 @@
enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims };
struct TensorBlockResourceRequirements {
- TensorBlockShapeType shape_type;
- size_t size;
+ TensorBlockShapeType shape_type; // target block shape
+ size_t size; // target block size
+ TensorOpCost cost_per_coeff; // cost of computing a single block element
+
+ template <typename Scalar>
+ static TensorBlockResourceRequirements withShapeAndSize(
+ TensorBlockShapeType shape_type, size_t size_in_bytes,
+ TensorOpCost cost) {
+ const size_t size = numext::maxi(size_t(1), size_in_bytes / sizeof(Scalar));
+ return {shape_type, size, cost};
+ }
+
+ template <typename Scalar>
+ static TensorBlockResourceRequirements withShapeAndSize(
+ TensorBlockShapeType shape_type, size_t size_in_bytes) {
+ // This default cost per coefficient is valid for most materialized tensor
+ // block evaluation implementations, because they typically just read
+ // coefficients from the underlying tensor storage, and write to the tensor
+ // block buffer (scratch or destination memory, reads and writes have linear
+ // access pattern). We ignore the fixed cost of block evaluation, because in
+ // practice it should negligible.
+ //
+ // Lazy block evaluation adds the cost of calling a functor for each
+ // coefficient.
+ //
+ // All non-trivial block evaluation implementations must provide their own
+ // cost approximation (e.g. shuffling inner dimension has a much higher cost
+ // because it reads memory randomly, although the total number of moved
+ // bytes is the same).
+ return withShapeAndSize<Scalar>(shape_type, size_in_bytes,
+ {/*bytes_loaded=*/sizeof(Scalar),
+ /*bytes_stored=*/sizeof(Scalar),
+ /*compute_cycles=*/0});
+ }
+
+ template <typename Scalar>
+ static TensorBlockResourceRequirements skewed(size_t size_in_bytes) {
+ return withShapeAndSize<Scalar>(TensorBlockShapeType::kSkewedInnerDims,
+ size_in_bytes);
+ }
+
+ template <typename Scalar>
+ static TensorBlockResourceRequirements uniform(size_t size_in_bytes) {
+ return withShapeAndSize<Scalar>(TensorBlockShapeType::kUniformAllDims,
+ size_in_bytes);
+ }
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements
- merge(const TensorBlockResourceRequirements &lhs,
- const TensorBlockResourceRequirements &rhs) {
- return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
+ merge(const TensorBlockResourceRequirements& lhs,
+ const TensorBlockResourceRequirements& rhs) {
+ return {merge(lhs.shape_type, rhs.shape_type), // shape_type
+ merge(lhs.size, rhs.size), // size
+ merge(lhs.cost_per_coeff, rhs.cost_per_coeff)}; // cost_per_coeff
+ }
+
+ TensorBlockResourceRequirements& addCostPerCoeff(TensorOpCost cost) {
+ cost_per_coeff += cost;
+ return *this;
}
// This is a resource requirement that should be returned from expressions
@@ -88,10 +139,10 @@
// expression with raw buffer access).
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() {
- return {TensorBlockShapeType::kUniformAllDims, 1};
+ return {TensorBlockShapeType::kUniformAllDims, 1, {0, 0, 0}};
}
-private:
+ private:
using Requirements = TensorBlockResourceRequirements;
EIGEN_DEVICE_FUNC
@@ -100,13 +151,19 @@
}
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs,
- TensorBlockShapeType rhs) {
+ static EIGEN_STRONG_INLINE TensorBlockShapeType
+ merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs) {
return (lhs == TensorBlockShapeType::kSkewedInnerDims ||
rhs == TensorBlockShapeType::kSkewedInnerDims)
? TensorBlockShapeType::kSkewedInnerDims
: TensorBlockShapeType::kUniformAllDims;
}
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost,
+ TensorOpCost rhs_cost) {
+ return lhs_cost + rhs_cost;
+ }
};
// -------------------------------------------------------------------------- //
@@ -131,8 +188,10 @@
class DestinationBuffer {
public:
enum DestinationBufferKind : int {
- // The above explicit specification of "int" as the enum basetype is needed
- // to get around a HIPCC link error ("the field type is not amp-compatible")
+ // The above explicit specification of "int" as the enum basetype is
+ // needed
+ // to get around a HIPCC link error ("the field type is not
+ // amp-compatible")
// which is issued for class members with the enum type.
// TODO(rocm):
// remove the "int" basetype once HIPCC has been fixed to not error out
@@ -280,7 +339,7 @@
TensorBlockMapper() = default;
TensorBlockMapper(const DSizes<IndexType, NumDims>& dimensions,
- const TensorBlockResourceRequirements& requirements)
+ const TensorBlockResourceRequirements& requirements)
: m_tensor_dimensions(dimensions), m_requirements(requirements) {
// Compute block dimensions and the total number of blocks.
InitializeBlockDimensions();
@@ -299,8 +358,8 @@
return m_block_dimensions;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- BlockDescriptor blockDescriptor(IndexType block_index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor
+ blockDescriptor(IndexType block_index) const {
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
IndexType offset = 0;
@@ -416,7 +475,7 @@
eigen_assert(m_block_dimensions.TotalSize() >=
numext::mini<IndexType>(target_block_size,
- m_tensor_dimensions.TotalSize()));
+ m_tensor_dimensions.TotalSize()));
// Calculate block counts by dimension and total block count.
DSizes<IndexType, NumDims> block_count;
@@ -761,7 +820,6 @@
template <typename UnaryOp, typename ArgTensorBlock>
class TensorCwiseUnaryBlock {
-
static const bool NoArgBlockAccess =
internal::is_void<typename ArgTensorBlock::XprType>::value;
@@ -793,7 +851,6 @@
template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
class TensorCwiseBinaryBlock {
-
static const bool NoArgBlockAccess =
internal::is_void<typename LhsTensorBlock::XprType>::value ||
internal::is_void<typename RhsTensorBlock::XprType>::value;
@@ -840,7 +897,6 @@
template <typename BlockFactory, typename ArgTensorBlock>
class TensorUnaryExprBlock {
-
typedef typename ArgTensorBlock::XprType ArgXprType;
static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
@@ -872,7 +928,6 @@
template <typename BlockFactory, typename Arg1TensorBlock,
typename Arg2TensorBlock, typename Arg3TensorBlock>
class TensorTernaryExprBlock {
-
typedef typename Arg1TensorBlock::XprType Arg1XprType;
typedef typename Arg2TensorBlock::XprType Arg2XprType;
typedef typename Arg3TensorBlock::XprType Arg3XprType;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 620c874..3408f90 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -620,12 +620,10 @@
internal::TensorBlockResourceRequirements getResourceRequirements() const {
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
// tensors. But this might need further tuning.
- const size_t target_block_size = numext::maxi<size_t>(
- 1, m_device.firstLevelCacheSize() / sizeof(Scalar));
-
+ const size_t target_size = m_device.firstLevelCacheSize();
return internal::TensorBlockResourceRequirements::merge(
- {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
- m_impl.getResourceRequirements());
+ m_impl.getResourceRequirements(),
+ internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index f51a855..5b28e70 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -296,11 +296,9 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- const size_t target_block_size =
- numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
-
+ const size_t target_size = m_device.lastLevelCacheSize();
return internal::TensorBlockResourceRequirements::merge(
- {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
+ internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
m_impl.getResourceRequirements());
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 146cc32..d4532b7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -521,7 +521,9 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- return m_argImpl.getResourceRequirements();
+ static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
+ return m_argImpl.getResourceRequirements().addCostPerCoeff(
+ {0, 0, functor_cost / PacketSize});
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
@@ -654,9 +656,11 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
return internal::TensorBlockResourceRequirements::merge(
- m_leftImpl.getResourceRequirements(),
- m_rightImpl.getResourceRequirements());
+ m_leftImpl.getResourceRequirements(),
+ m_rightImpl.getResourceRequirements())
+ .addCostPerCoeff({0, 0, functor_cost / PacketSize});
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
@@ -934,11 +938,16 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ auto then_req = m_thenImpl.getResourceRequirements();
+ auto else_req = m_elseImpl.getResourceRequirements();
+
+ auto merged_req =
+ internal::TensorBlockResourceRequirements::merge(then_req, else_req);
+ merged_req.cost_per_coeff =
+ then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
+
return internal::TensorBlockResourceRequirements::merge(
- m_condImpl.getResourceRequirements(),
- internal::TensorBlockResourceRequirements::merge(
- m_thenImpl.getResourceRequirements(),
- m_elseImpl.getResourceRequirements()));
+ m_condImpl.getResourceRequirements(), merged_req);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index b90791d..93bab11 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -245,8 +245,8 @@
evaluator.getResourceRequirements();
// Update target block size based on cost model.
- TensorOpCost cost = evaluator.costPerCoeff(Vectorizable);
- double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(1, cost);
+ double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(
+ 1, requirements.cost_per_coeff);
requirements.size = static_cast<size_t>(1.0 / taskSize);
TensorBlockMapper block_mapper(
@@ -259,7 +259,8 @@
align *
divup<size_t>(block_size * sizeof(typename Evaluator::Scalar), align);
- return {block_mapper, cost * block_size, aligned_blocksize};
+ return {block_mapper, requirements.cost_per_coeff * block_size,
+ aligned_blocksize};
}
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index fb4b5e2..b1ff1d8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -166,10 +166,10 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- const size_t target_block_size = numext::maxi<size_t>(
- 1, m_device.firstLevelCacheSize() / sizeof(Scalar));
- return {internal::TensorBlockShapeType::kSkewedInnerDims,
- target_block_size};
+ const size_t target_size = m_device.firstLevelCacheSize();
+ // TODO(ezhulenev): Generator should have a cost.
+ return internal::TensorBlockResourceRequirements::skewed<Scalar>(
+ target_size);
}
struct BlockIteratorState {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 5c20366..879a67e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -634,10 +634,9 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- const size_t target_block_size =
- numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
+ const size_t target_size = m_device.lastLevelCacheSize();
return internal::TensorBlockResourceRequirements::merge(
- {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
+ internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
m_impl.getResourceRequirements());
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 201bea6..e070d0b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -229,10 +229,9 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- const size_t target_block_size =
- numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
+ const size_t target_size = m_device.lastLevelCacheSize();
return internal::TensorBlockResourceRequirements::merge(
- {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
+ internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
m_impl.getResourceRequirements());
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index c4ac81d..2fc85c1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -246,10 +246,12 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
internal::TensorBlockResourceRequirements getResourceRequirements() const {
- const size_t target_block_size =
- numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
- return {internal::TensorBlockShapeType::kSkewedInnerDims,
- target_block_size};
+ const size_t target_size = m_device.lastLevelCacheSize();
+ // Block evaluation reads underlying memory in reverse order, and default
+ // cost model does not properly catch this in bytes stored/loaded.
+ return internal::TensorBlockResourceRequirements::skewed<Scalar>(
+ target_size)
+ .addCostPerCoeff({0, 0, 24});
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 1a6891f..597ca64 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -249,14 +249,21 @@
static const int inner_dim =
Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
- const size_t target_block_size = numext::maxi<size_t>(
- 1, m_device.firstLevelCacheSize() / sizeof(Scalar));
-
+ const size_t target_size = m_device.firstLevelCacheSize();
const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
- return {inner_dim_shuffled
- ? internal::TensorBlockShapeType::kUniformAllDims
- : internal::TensorBlockShapeType::kSkewedInnerDims,
- target_block_size};
+
+ // Shuffled inner dimensions leads to a random memory access, which is not
+ // captured by default cost model bytes loaded/stored. We add this cost
+ // explicitly. The number of cycles picked based on the benchmarks.
+ // TODO(ezhulenev): This number was picked based on a very questionable
+ // benchmarks, add benchmarks that are representative of real workloads.
+ using BlockRequirements = internal::TensorBlockResourceRequirements;
+ if (inner_dim_shuffled) {
+ return BlockRequirements::uniform<Scalar>(target_size)
+ .addCostPerCoeff({0, 0, NumDims * 28});
+ } else {
+ return BlockRequirements::skewed<Scalar>(target_size);
+ }
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 33dc253..5fb12e0 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -21,6 +21,7 @@
using Eigen::ColMajor;
using Eigen::internal::TensorBlockShapeType;
+static TensorOpCost zeroCost() { return {0, 0, 0}; }
template<typename T>
static const T& choose(int layout, const T& col, const T& row) {
@@ -73,7 +74,7 @@
// Test uniform blocks.
TensorBlockMapper uniform_block_mapper(
- tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100});
+ tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100, zeroCost()});
VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
@@ -85,7 +86,7 @@
// Test skewed to inner dims blocks.
TensorBlockMapper skewed_block_mapper(
- tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100});
+ tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100, zeroCost()});
VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
@@ -130,7 +131,8 @@
std::set<Index> coeff_set;
// Try different combinations of block types and sizes.
- TensorBlockMapper block_mapper(dims, {RandomShape(), RandomTargetSize(dims)});
+ TensorBlockMapper block_mapper(
+ dims, {RandomShape(), RandomTargetSize(dims), zeroCost()});
for (int i = 0; i < block_mapper.blockCount(); ++i) {
auto block = block_mapper.blockDescriptor(i);
@@ -233,9 +235,8 @@
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
@@ -248,9 +249,8 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
@@ -260,9 +260,8 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
@@ -276,9 +275,8 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
@@ -288,9 +286,8 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
@@ -304,9 +301,8 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
@@ -317,9 +313,8 @@
} else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(6, block.dimensions()[3]);
@@ -333,9 +328,8 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
@@ -346,9 +340,8 @@
} else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
+ max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(9, block.dimensions()[3]);
@@ -369,9 +362,9 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(10, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
@@ -381,9 +374,9 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
@@ -396,9 +389,9 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
for (int i = 1; i < 5; ++i) {
@@ -408,9 +401,9 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
for (int i = 3; i >= 0; --i) {
@@ -424,9 +417,9 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(3, block.dimensions()[1]);
@@ -437,9 +430,9 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(15, block.dimensions()[3]);
@@ -454,9 +447,9 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
@@ -468,9 +461,9 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
@@ -485,9 +478,9 @@
if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
@@ -498,9 +491,9 @@
} else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
- TensorBlockMapper
- block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
- max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims,
+ {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
TensorBlock block = block_mapper.blockDescriptor(0);
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
@@ -524,7 +517,8 @@
DSizes<Index, 1> dims(0);
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
- TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims, {block_shape, max_coeff_count, zeroCost()});
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
VERIFY(block_mapper.blockTotalSize() >= 1);
}
@@ -537,7 +531,8 @@
for (int dim2 = 0; dim2 < 3; ++dim2) {
DSizes<Index, 2> dims(dim1, dim2);
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
- TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
+ TensorBlockMapper block_mapper(
+ dims, {block_shape, max_coeff_count, zeroCost()});
if (dim1 * dim2 == 0) {
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
}
diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp
index 4a785dc..81f0c90 100644
--- a/unsupported/test/cxx11_tensor_block_eval.cpp
+++ b/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -64,7 +64,8 @@
using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
BlockMapper block_mapper(dims,
{internal::TensorBlockShapeType::kSkewedInnerDims,
- internal::random<size_t>(1, dims.TotalSize())});
+ internal::random<size_t>(1, dims.TotalSize()),
+ {0, 0, 0}});
Index total_blocks = block_mapper.blockCount();
Index block_index = internal::random<Index>(0, total_blocks - 1);
diff --git a/unsupported/test/cxx11_tensor_block_io.cpp b/unsupported/test/cxx11_tensor_block_io.cpp
index 2558443..b8600ea 100644
--- a/unsupported/test/cxx11_tensor_block_io.cpp
+++ b/unsupported/test/cxx11_tensor_block_io.cpp
@@ -75,8 +75,8 @@
// Construct a tensor block mapper.
using TensorBlockMapper =
internal::TensorBlockMapper<NumDims, Layout, Index>;
- TensorBlockMapper block_mapper(dims, {RandomBlockShape(),
- RandomTargetBlockSize(dims)});
+ TensorBlockMapper block_mapper(
+ dims, {RandomBlockShape(), RandomTargetBlockSize(dims), {0, 0, 0}});
// We will copy data from input to output through this buffer.
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
@@ -146,8 +146,10 @@
// NOTE: Tensor block mapper works with shuffled dimensions.
using TensorBlockMapper =
internal::TensorBlockMapper<NumDims, Layout, Index>;
- TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(),
- RandomTargetBlockSize(output_tensor_dims)});
+ TensorBlockMapper block_mapper(output_tensor_dims,
+ {RandomBlockShape(),
+ RandomTargetBlockSize(output_tensor_dims),
+ {0, 0, 0}});
// We will copy data from input to output through this buffer.
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());