* Add support for non-1 batch size in Conv2D and DepthwiseConv2D in interpreter backend.
* Fix a bug in ShapeRange class that caused undefined behavior when it is constructed from a temporary.
Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
class ShapeRange {
public:
- explicit ShapeRange(const Shape &shape) : _shape(const_cast<Shape&>(shape))
- {}
+ explicit ShapeRange(const Shape& shape) : _shape(shape) {}
+
+ explicit ShapeRange(Shape&& shape) : _shape(std::move(shape)) {}
ShapeIter begin() {
return ShapeIter(_shape, 0);
}
private:
- Shape& _shape;
+ Shape _shape;
};
} // namespace mir
*/
#include "Conv2D.h"
-#include "common.h"
#include "core/modelIR/ShapeRange.h"
-#include <cmath>
-namespace nnc
-{
+namespace nnc {
using namespace mir;
-using namespace mir::ops;
-
-Index reduce(const Index &idx)
-{
- Index res = idx;
- res.resize(idx.rank() - 1);
- return res;
-}
// Mostly compatible with tensorflow implementation
// Assuming input is in NHWC format with batch omitted( [in_height, in_width, in_channels] )
// Kernel is in [filter_height, filter_width, in_channels, out_channels]
// Refer to https://www.tensorflow.org/api_docs/python/tf/nn/conv2d for info
-std::vector<TensorVariant> Conv2D::operator()()
-{
- auto res = allocate_tensor(_op.getOutputShape(0));
- Tensor<float> resAccesor(res);
- Shape strides{_op.getStrides().dim(0), _op.getStrides().dim(1), 1};
- Index pads{_op.getPaddingBefore().at(0), _op.getPaddingBefore().at(1), 0};
-
- Shape outShape = resAccesor.getShape();
- // Assume batch size == 1 and strip it off.
- assert(outShape.dim(0) == 1);
- outShape = {outShape.dim(1), outShape.dim(2), outShape.dim(3)};
-
- outShape.dim(2) = 1;
- ShapeRange outRange(outShape);
-
- Shape inShape = _input.getShape();
- // Assume batch size == 1 and strip it off.
- assert(inShape.dim(0) == 1);
- inShape = {inShape.dim(1), inShape.dim(2), inShape.dim(3)};
-
- ShapeRange inRange(inShape);
+std::vector<TensorVariant> Conv2D::operator()() {
+ const Shape& in_shape = _op.getInputShape(0);
+ const Shape& kernel_shape = _op.getInputShape(1);
+ const Shape& out_shape = _op.getOutputShape(0);
+ const Shape& strides = _op.getStrides();
+ const std::vector<int32_t>& pads = _op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(2) == in_shape.dim(3));
+ assert(kernel_shape.dim(3) == out_shape.dim(3));
+ assert(strides.rank() == 2);
+ assert(pads.size() == 2);
+
+ int32_t num_kernels = kernel_shape.dim(3);
- Shape kShape = _kernel.getShape();
- int32_t numKernels = kShape.dim(3);
- kShape.dim(3) = 1;
- ShapeRange kernelRange(kShape);
-
- Index inputIdx;
- inputIdx.resize(inShape.rank());
-
- for (auto &outIdx : outRange)
- {
- // Take into account stripped off batch dimension.
- Index tmp_out_index{0, outIdx.at(0), outIdx.at(1), outIdx.at(2)};
-
- for (auto& kernelIdx : kernelRange)
- {
- translate(inputIdx, outIdx, kernelIdx, strides, pads);
- if (inRange.contains(inputIdx))
- {
- auto kernelRegion = _kernel.getRegion(kernelIdx);
- assert( kernelRegion.size() == numKernels );
-
- auto outRegion = resAccesor.getRegion(tmp_out_index);
- assert( outRegion.size() == numKernels );
-
- // Take into account stripped off batch dimension.
- Index tmp_in_index{0, inputIdx.at(0), inputIdx.at(1), inputIdx.at(2)};
- auto in = _input.at(tmp_in_index);
-
- for (int32_t kernelIndex = 0; kernelIndex < numKernels; ++kernelIndex)
- {
- outRegion.base()[kernelIndex] += in * kernelRegion.base()[kernelIndex];
+ auto res = allocate_tensor(_op.getOutputShape(0));
+ Tensor<float> res_accessor(res);
+
+ ShapeRange in_range(in_shape);
+ ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1});
+ ShapeRange kernel_range(Shape{kernel_shape.dim(0), kernel_shape.dim(1), kernel_shape.dim(2), 1});
+
+ Index in_index;
+ in_index.resize(4);
+
+ for (const auto& out_index : out_range) {
+ auto out_region = res_accessor.getRegion(out_index);
+ assert(out_region.size() == num_kernels);
+ for (const auto& kernel_index : kernel_range) {
+ in_index.at(0) = out_index.at(0);
+ for (int i = 0; i < 2; ++i)
+ in_index.at(1 + i) = out_index.at(1 + i) * strides.dim(i) + kernel_index.at(i) - pads[i];
+ in_index.at(3) = kernel_index.at(2);
+
+ if (in_range.contains(in_index)) {
+ auto kernel_region = _kernel.getRegion(kernel_index);
+ assert(kernel_region.size() == num_kernels);
+ float in_val = _input.at(in_index);
+ for (int32_t kernel_i = 0; kernel_i < num_kernels; ++kernel_i) {
+ out_region.base()[kernel_i] += in_val * kernel_region.base()[kernel_i];
}
}
}
Conv2D::Conv2D(const TensorVariant& input,
const TensorVariant& kernel,
- const Conv2DOp& op)
+ const ops::Conv2DOp& op)
: _input(input), _kernel(kernel), _op(op) {
- assert(_op.getInputShape(0).rank() == 4);
- assert(_input.getShape().rank() == 4);
- assert(_kernel.getShape().rank() == 4);
}
} // namespace nnc
*/
#include "DepthwiseConv2D.h"
-#include "common.h"
#include "core/modelIR/ShapeRange.h"
-namespace nnc
-{
+namespace nnc {
using namespace mir;
-using namespace mir::ops;
-std::vector<TensorVariant> DepthwiseConv2D::operator()()
-{
- TensorVariant res = allocate_tensor(_op.getOutputShape(0));
- Tensor<float> resAccessor(res);
-
- Shape strides({_op.getStrides().dim(0), _op.getStrides().dim(1), 1});
- Index pads({_op.getPaddingBefore().at(0), _op.getPaddingBefore().at(1), 0});
-
- Shape outShape = res.getShape();
- // Assume batch size == 1 and strip it off.
- assert(outShape.dim(0) == 1);
- outShape = {outShape.dim(1), outShape.dim(2), outShape.dim(3)};
-
- outShape.dim(2) = 1;
- ShapeRange outRange(outShape);
-
- Shape inShape = _input.getShape();
- // Assume batch size == 1 and strip it off.
- assert(inShape.dim(0) == 1);
- inShape = {inShape.dim(1), inShape.dim(2), inShape.dim(3)};
-
- ShapeRange inRange(inShape);
-
- Index inIdx;
- inIdx.resize(outShape.rank());
-
- auto kernelShape = _kernel.getShape();
- int32_t channelMultiplierDim = kernelShape.rank() - 1;
- int channelMultiplier = kernelShape.dim(channelMultiplierDim);
-
- for (auto &outIdx : outRange)
- {
- // Take into account stripped off batch dimension.
- Index tmp_out_index{0, outIdx.at(0), outIdx.at(1), outIdx.at(2)};
-
- for (auto &kIdx : ShapeRange(kernelShape))
- {
- translate(inIdx, outIdx, kIdx, strides, pads);
-
- if (inRange.contains(inIdx))
- {
- // Take into account stripped off batch dimension.
- Index tmp_in_index{0, inIdx.at(0), inIdx.at(1), inIdx.at(2)};
- auto in = _input.at(tmp_in_index);
- auto b = _kernel.at(kIdx);
- Index outIdxK = tmp_out_index;
- outIdxK.at(3) = kIdx.at(2) * channelMultiplier + kIdx.at(channelMultiplierDim);
- resAccessor.at(outIdxK) += in * b;
+std::vector<TensorVariant> DepthwiseConv2D::operator()() {
+ const Shape& in_shape = _op.getInputShape(0);
+ const Shape& kernel_shape = _op.getInputShape(1);
+ const Shape& out_shape = _op.getOutputShape(0);
+ const Shape& strides = _op.getStrides();
+ const std::vector<int32_t>& pads = _op.getPaddingBefore();
+
+ assert(in_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(2) == in_shape.dim(3));
+ assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3));
+ assert(strides.rank() == 2);
+ assert(pads.size() == 2);
+
+ int32_t channel_multiplier = kernel_shape.dim(3);
+
+ TensorVariant res = allocate_tensor(out_shape);
+ Tensor<float> res_accessor(res);
+
+ ShapeRange in_range(in_shape);
+ ShapeRange kernel_range(kernel_shape);
+ ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1});
+
+ Index in_index;
+ in_index.resize(4);
+
+ for (const auto& out_index : out_range) {
+ Index out_index_k = out_index;
+ for (const auto& kernel_index : kernel_range) {
+ in_index.at(0) = out_index.at(0);
+ for (int i = 0; i < 2; ++i)
+ in_index.at(1 + i) = out_index.at(1 + i) * strides.dim(i) + kernel_index.at(i) - pads[i];
+ in_index.at(3) = kernel_index.at(2);
+
+ if (in_range.contains(in_index)) {
+ out_index_k.at(3) = kernel_index.at(2) * channel_multiplier + kernel_index.at(3);
+ res_accessor.at(out_index_k) += _input.at(in_index) * _kernel.at(kernel_index);
}
}
}
DepthwiseConv2D::DepthwiseConv2D(const TensorVariant& input,
const TensorVariant& kernel,
- const DepthwiseConv2DOp& op)
+ const ops::DepthwiseConv2DOp& op)
: _input(input), _kernel(kernel), _op(op) {
- assert(_op.getInputShape(0).rank() == 4);
- assert(_input.getShape().rank() == 4);
- assert(_kernel.getShape().rank() == 4);
- assert(_kernel.getShape().dim(2) == _input.getShape().dim(3));
}
} // namespace nnc
for (iT output_c = 1; output_c <= 3; ++output_c)
for (iT stride_h = 1; stride_h <= 3; ++stride_h)
for (iT stride_w = 1; stride_w <= 3; ++stride_w) {
- vector<int> input_shape_data{1, 5, 7, static_cast<int>(input_c)}; // NHWC
+ vector<int> input_shape_data{3, 5, 7, static_cast<int>(input_c)}; // NHWC
vector<int> kernel_shape_data{kernel_h, kernel_w, input_c, output_c}; // HWCN
mir::Shape strides{stride_h, stride_w};
vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
for (iT stride_w = 1; stride_w <= 3; ++stride_w)
for (iT stride_h = 1; stride_h <= 3; ++stride_h)
for (iT multiplier = 1; multiplier <= 2; ++multiplier) {
- vector<int> input_shape_data{1, 7, 6, static_cast<int>(channels)}; // NHWC
+ vector<int> input_shape_data{3, 7, 6, static_cast<int>(channels)}; // NHWC
vector<int> kernel_shape_data{kernel_h, kernel_w, channels, multiplier}; // HWCN
mir::Shape strides{stride_h, stride_w};
vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
for (iT channels = 1; channels <= 2; ++channels)
for (iT stride_h = 1; stride_h <= 3; ++stride_h)
for (iT stride_w = 1; stride_w <= 3; ++stride_w) {
- vector<int> shape_data{1, 5, 7, static_cast<int>(channels)};
+ vector<int> shape_data{3, 5, 7, static_cast<int>(channels)};
mir::Shape window_shape{windowH, windowW};
mir::Shape strides{stride_h, stride_w};
Tensor input_atensor;