blob: 2554ce372d4b714fd59f572065edb6031616dd3d [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H
#define EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H
namespace Eigen {
/** SpatialConvolutionBackwardInput
* \ingroup CXX11_NeuralNetworks_Module
*
* \brief Computes the backprop for the input of a 2D convolution.
*
* The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others)
* The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width)
* The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout.
*
* If row_in_stride, col_in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every row_in_stride, col_in_stride input pixels.
*
* The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable).
*
* It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
*
*/
#ifdef EIGEN_HAS_INDEX_LIST
typedef IndexList<type2index<0>, type2index<0>, type2index<1>, type2index<1> >
ReverseColMajor;
typedef IndexList<type2index<1>, type2index<1>, type2index<0>, type2index<0> >
ReverseRowMajor;
#else
typedef array<bool, 4> ReverseColMajor;
typedef array<bool, 4> ReverseRowMajor;
#endif
template <typename OutputBackward, typename Kernel>
EIGEN_ALWAYS_INLINE static const typename internal::conditional<
internal::traits<OutputBackward>::Layout == ColMajor,
TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
internal::traits<OutputBackward>::NumDimensions>,
const TensorContractionOp<
const array<
IndexPair<typename internal::traits<OutputBackward>::Index>, 1>,
const Eigen::TensorForcedEvalOp<const TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
2>,
const TensorShufflingOp<
const array<
typename internal::traits<OutputBackward>::Index, 4>,
const TensorReverseOp<const ReverseColMajor,
const Kernel> > > >,
const TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
2>,
const TensorImagePatchOp<Dynamic, Dynamic,
const OutputBackward> > > >,
TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
internal::traits<OutputBackward>::NumDimensions>,
const TensorContractionOp<
const array<
IndexPair<typename internal::traits<OutputBackward>::Index>, 1>,
const TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
2>,
const TensorImagePatchOp<Dynamic, Dynamic,
const OutputBackward> >,
const Eigen::TensorForcedEvalOp<const TensorReshapingOp<
const DSizes<typename internal::traits<OutputBackward>::Index,
2>,
const TensorShufflingOp<
const array<
typename internal::traits<OutputBackward>::Index, 4>,
const TensorReverseOp<const ReverseRowMajor,
const Kernel> > > > > > >::type
SpatialConvolutionBackwardInput(
const Kernel& kernel, const OutputBackward& output_backward,
typename internal::traits<OutputBackward>::Index inputRows,
typename internal::traits<OutputBackward>::Index inputCols,
const DenseIndex row_stride = 1, const DenseIndex col_stride = 1,
const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
typedef typename internal::traits<OutputBackward>::Index TensorIndex;
typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel);
TensorRef<Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
EIGEN_STATIC_ASSERT(internal::traits<Kernel>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
static const bool isColMajor = (internal::traits<OutputBackward>::Layout == ColMajor);
static const int NumDims = internal::traits<OutputBackward>::NumDimensions;
// Number of filters to apply. This is the same as the output depth of the result
const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3];
// Number of channels. This is the same as the input depth.
const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2];
const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1];
const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0];
// This is the effective kernel size, taking into account the (*_in_stride - 1) zero-values
// inserted between consecutive kernel elements in atrous convolution
const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (row_in_stride - 1);
const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (col_in_stride - 1);
const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2);
const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3);
// Computing the forward padding
const TensorIndex forward_pad_top = numext::maxi<Index>(0, ((outputRows - 1) * row_stride + kernelRowsEff - inputRows) / 2);
const TensorIndex forward_pad_left = numext::maxi<Index>(0, ((outputCols - 1) * col_stride + kernelColsEff - inputCols) / 2);
const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top;
const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left;
const TensorIndex padding_bottom = inputRows - (outputRows - 1) * row_stride - 2 - padding_top + kernelRowsEff;
const TensorIndex padding_right = inputCols - (outputCols - 1) * col_stride - 2 - padding_left + kernelColsEff;
eigen_assert(padding_top >= 0);
eigen_assert(padding_left >= 0);
eigen_assert(padding_bottom >= 0);
eigen_assert(padding_right >= 0);
// The kernel has dimensions filters X channels X patch_rows X patch_cols
// We need to reverse the kernel along dimensions corresponding to rows and
// cols.
// TODO(yangke): we can make things slightly faster by collapsing the dimensions
// where we don't reverse. Try that once we have a faster compiler.
typedef typename internal::conditional<isColMajor, ReverseColMajor,
ReverseRowMajor>::type Reverse;
Reverse kernel_reverse;
#ifndef EIGEN_HAS_INDEX_LIST
if (isColMajor) {
kernel_reverse[0] = false;
kernel_reverse[1] = false;
kernel_reverse[2] = true;
kernel_reverse[3] = true;
} else {
kernel_reverse[0] = true;
kernel_reverse[1] = true;
kernel_reverse[2] = false;
kernel_reverse[3] = false;
}
#endif
// Reorder the dimensions to filters X patch_rows X patch_cols X channels
array<TensorIndex, 4> kernel_shuffle;
if (isColMajor) {
kernel_shuffle[0] = 0;
kernel_shuffle[1] = 2;
kernel_shuffle[2] = 3;
kernel_shuffle[3] = 1;
} else {
kernel_shuffle[0] = 2;
kernel_shuffle[1] = 0;
kernel_shuffle[2] = 1;
kernel_shuffle[3] = 3;
}
// Collapse the dims
DSizes<TensorIndex, 2> kernel_dims;
if (isColMajor) {
kernel_dims[0] = kernelFilters * kernelRows * kernelCols;
kernel_dims[1] = kernelChannels;
} else {
kernel_dims[1] = kernelFilters * kernelRows * kernelCols;
kernel_dims[0] = kernelChannels;
}
// The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS
// When we extract the image patches from output_backward, it will have dimensions
// out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * OTHERS)
DSizes<TensorIndex, 2> pre_contract_dims;
if (isColMajor) {
pre_contract_dims[0] = kernelFilters * kernelRows * kernelCols;
pre_contract_dims[1] = inputRows * inputCols;
for (int i = 3; i < NumDims; ++i) {
pre_contract_dims[1] *= out.dimension(i);
}
} else {
pre_contract_dims[1] = kernelFilters * kernelRows * kernelCols;
pre_contract_dims[0] = inputRows * inputCols;
for (int i = 0; i < NumDims - 3; ++i) {
pre_contract_dims[0] *= out.dimension(i);
}
}
// We will contract along the fused dimension that contains the kernelFilters,
// the kernelRows and the kernelCols.
array<IndexPair<TensorIndex>, 1> contract_dims;
if (isColMajor) {
// col-major: kernel.contract(output.patches)
contract_dims[0] = IndexPair<TensorIndex>(0, 0);
} else {
// row-major: output.patches.contract(kernel)
contract_dims[0] = IndexPair<TensorIndex>(1, 1);
}
// Post contraction, the dimensions of the input_backprop is
// channels X input_rows X input_cols X OTHERS
DSizes<TensorIndex, NumDims> post_contract_dims;
if (isColMajor) {
post_contract_dims[0] = kernelChannels;
post_contract_dims[1] = inputRows;
post_contract_dims[2] = inputCols;
for (int i = 3; i < NumDims; ++i) {
post_contract_dims[i] = out.dimension(i);
}
} else {
post_contract_dims[NumDims - 1] = kernelChannels;
post_contract_dims[NumDims - 2] = inputRows;
post_contract_dims[NumDims - 3] = inputCols;
for (int i = 0; i < NumDims - 3; ++i) {
post_contract_dims[i] = out.dimension(i);
}
}
return choose(
Cond<internal::traits<OutputBackward>::Layout == ColMajor>(),
kernel.reverse(kernel_reverse)
.shuffle(kernel_shuffle)
.reshape(kernel_dims)
.eval()
.contract(output_backward
.extract_image_patches(
kernelRows, kernelCols, 1, 1,
row_in_stride, col_in_stride,
row_stride, col_stride,
padding_top, padding_bottom,
padding_left, padding_right, OutScalar(0))
.reshape(pre_contract_dims),
contract_dims)
.reshape(post_contract_dims),
output_backward
.extract_image_patches(kernelRows, kernelCols, 1, 1,
row_in_stride, col_in_stride,
row_stride, col_stride,
padding_top, padding_bottom,
padding_left, padding_right,
OutScalar(0))
.reshape(pre_contract_dims)
.contract(kernel.reverse(kernel_reverse)
.shuffle(kernel_shuffle)
.reshape(kernel_dims)
.eval(),
contract_dims)
.reshape(post_contract_dims));
}
/** SpatialConvolutionBackwardKernel
* \ingroup CXX11_NeuralNetworks_Module
*
* \brief Computes the backprop for the filter of a 2D convolution.
*
* The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others)
* The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width)
* The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout.
*
* If row_in_stride, col_in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every row_in_stride, col_in_stride input pixels.
*
* The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable).
*
* It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
*
*/
template <typename OutputBackward, typename Input>
EIGEN_ALWAYS_INLINE static const typename internal::conditional<
internal::traits<OutputBackward>::Layout == ColMajor,
TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 4>,
const TensorContractionOp<
const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
const TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 2>,
const OutputBackward>,
const TensorShufflingOp<
const array<typename internal::traits<OutputBackward>::Index, 2>,
const TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 2>,
const TensorImagePatchOp<Dynamic, Dynamic, const Input>
>
>
>
>,
TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 4>,
const TensorContractionOp<
const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
const TensorShufflingOp<
const array<typename internal::traits<OutputBackward>::Index, 2>,
const TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 2>,
const TensorImagePatchOp<Dynamic, Dynamic, const Input>
>
>,
const TensorReshapingOp<
const DSizes<typename internal::traits<Input>::Index, 2>,
const OutputBackward>
>
>
>::type
SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits<Input>::Index kernelRows, typename internal::traits<Input>::Index kernelCols, const DenseIndex row_stride = 1, const DenseIndex col_stride = 1, const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
typedef typename internal::traits<Input>::Index TensorIndex;
typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
TensorRef<Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
// stride and in_stride cannot both be larger than 1
eigen_assert(!(row_stride > 1 && row_in_stride > 1) && !(col_stride > 1 && col_in_stride > 1));
static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
static const int NumDims = internal::traits<Input>::NumDimensions;
EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == internal::traits<OutputBackward>::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE);
const TensorIndex inputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
const TensorIndex inputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2);
const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3);
// Number of filters to apply. This is the same as the output depth of the result
const TensorIndex kernelFilters = isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1];
// Number of channels. This is the same as the input depth.
const TensorIndex kernelChannels = isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1];
// This is the effective kernel size, taking into account the (*_in_stride - 1) zero-values
// inserted between consecutive kernel elements in atrous convolution
const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (row_in_stride - 1);
const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (col_in_stride - 1);
// Computing the forward padding
const TensorIndex padRows = numext::maxi<Index>(
0, (outputRows - 1) * row_stride + kernelRowsEff - inputRows);
const TensorIndex padCols = numext::maxi<Index>(
0, (outputCols - 1) * col_stride + kernelColsEff - inputCols);
const TensorIndex padding_top = padRows / 2;
const TensorIndex padding_bottom = padRows - padding_top;
const TensorIndex padding_left = padCols / 2;
const TensorIndex padding_right = padCols - padding_left;
// Reshaped out
DSizes<TensorIndex, 2> output_dims;
if (isColMajor) {
output_dims[0] = kernelFilters;
output_dims[1] = outputRows * outputCols;
for (int i = 3; i < NumDims; ++i) {
output_dims[1] *= out.dimension(i);
}
} else {
output_dims[1] = kernelFilters;
output_dims[0] = outputCols * outputRows;
for (int i = 0; i < NumDims - 3; ++i) {
output_dims[0] *= out.dimension(i);
}
}
// Reshaped extract_image_patches(in)
DSizes<TensorIndex, 2> pre_contract_dims;
if (isColMajor) {
pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols;
pre_contract_dims[1] = outputRows * outputCols;
for (int i = 3; i < NumDims; ++i) {
pre_contract_dims[1] *= in.dimension(i);
}
eigen_assert(output_dims[1] == pre_contract_dims[1]);
} else {
pre_contract_dims[1] = kernelCols * kernelRows * kernelChannels;
pre_contract_dims[0] = outputRows * outputCols;
for (int i = 0; i < NumDims - 3; ++i) {
pre_contract_dims[0] *= in.dimension(i);
}
eigen_assert(output_dims[0] == pre_contract_dims[0]);
}
array<TensorIndex, 2> shuffle_dims;
shuffle_dims[0] = 1;
shuffle_dims[1] = 0;
array<IndexPair<TensorIndex>, 1> contract_dims;
contract_dims[0] = IndexPair<TensorIndex>(1, 0);
// After the contraction, the kernel will have the desired shape
// out_depth X in_shape X kernel_rows X kernel_cols
DSizes<TensorIndex, 4> kernel_dims;
if (isColMajor) {
kernel_dims[0] = kernelFilters;
kernel_dims[1] = kernelChannels;
kernel_dims[2] = kernelRows;
kernel_dims[3] = kernelCols;
} else {
kernel_dims[3] = kernelFilters;
kernel_dims[2] = kernelChannels;
kernel_dims[1] = kernelRows;
kernel_dims[0] = kernelCols;
}
return choose(
Cond<internal::traits<Input>::Layout == ColMajor>(),
output_backward.reshape(output_dims)
.contract(
input.extract_image_patches(
kernelRows, kernelCols, row_stride, col_stride,
row_in_stride, col_in_stride, 1, 1, padding_top, padding_bottom,
padding_left, padding_right, OutScalar(0))
.reshape(pre_contract_dims)
.shuffle(shuffle_dims),
contract_dims)
.reshape(kernel_dims),
input.extract_image_patches(
kernelRows, kernelCols, row_stride, col_stride,
row_in_stride, col_in_stride, 1, 1, padding_top, padding_bottom,
padding_left, padding_right, OutScalar(0))
.reshape(pre_contract_dims)
.shuffle(shuffle_dims)
.contract(
output_backward.reshape(output_dims),
contract_dims)
.reshape(kernel_dims));
}
} // end namespace Eigen
#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H