From 4593a7296b99ff7ca6ece3a22828aff0bc7800b3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 22 Jan 2019 16:02:14 +0300 Subject: [PATCH] [nnc] Support for non-1 batch in Conv2D and DepthwiseConv2D operations (#2902) * Add support for non-1 batch size in Conv2D and DepthwiseConv2D in interpreter backend. * Fix a bug in ShapeRange class that caused undefined behavior when it is constructed from a temporary. Signed-off-by: Sergei Barannikov --- contrib/nnc/include/core/modelIR/ShapeRange.h | 7 +- contrib/nnc/passes/interpreter/ops/Conv2D.cpp | 110 ++++++++------------- .../nnc/passes/interpreter/ops/DepthwiseConv2D.cpp | 96 ++++++++---------- .../nnc/unittests/soft_backend/CPPOperations.cpp | 6 +- 4 files changed, 88 insertions(+), 131 deletions(-) diff --git a/contrib/nnc/include/core/modelIR/ShapeRange.h b/contrib/nnc/include/core/modelIR/ShapeRange.h index fb7f05e..12bf83b 100644 --- a/contrib/nnc/include/core/modelIR/ShapeRange.h +++ b/contrib/nnc/include/core/modelIR/ShapeRange.h @@ -75,8 +75,9 @@ class ShapeIter : class ShapeRange { public: - explicit ShapeRange(const Shape &shape) : _shape(const_cast(shape)) - {} + explicit ShapeRange(const Shape& shape) : _shape(shape) {} + + explicit ShapeRange(Shape&& shape) : _shape(std::move(shape)) {} ShapeIter begin() { return ShapeIter(_shape, 0); @@ -96,7 +97,7 @@ class ShapeRange { } private: - Shape& _shape; + Shape _shape; }; } // namespace mir diff --git a/contrib/nnc/passes/interpreter/ops/Conv2D.cpp b/contrib/nnc/passes/interpreter/ops/Conv2D.cpp index 4ee14a2..176587c 100644 --- a/contrib/nnc/passes/interpreter/ops/Conv2D.cpp +++ b/contrib/nnc/passes/interpreter/ops/Conv2D.cpp @@ -15,80 +15,57 @@ */ #include "Conv2D.h" -#include "common.h" #include "core/modelIR/ShapeRange.h" -#include -namespace nnc -{ +namespace nnc { using namespace mir; -using namespace mir::ops; - -Index reduce(const Index &idx) -{ - Index res = idx; - res.resize(idx.rank() - 1); - return res; -} // Mostly compatible with tensorflow implementation // Assuming input is in NHWC format with batch omitted( [in_height, in_width, in_channels] ) // Kernel is in [filter_height, filter_width, in_channels, out_channels] // Refer to https://www.tensorflow.org/api_docs/python/tf/nn/conv2d for info -std::vector Conv2D::operator()() -{ - auto res = allocate_tensor(_op.getOutputShape(0)); - Tensor resAccesor(res); - Shape strides{_op.getStrides().dim(0), _op.getStrides().dim(1), 1}; - Index pads{_op.getPaddingBefore().at(0), _op.getPaddingBefore().at(1), 0}; - - Shape outShape = resAccesor.getShape(); - // Assume batch size == 1 and strip it off. - assert(outShape.dim(0) == 1); - outShape = {outShape.dim(1), outShape.dim(2), outShape.dim(3)}; - - outShape.dim(2) = 1; - ShapeRange outRange(outShape); - - Shape inShape = _input.getShape(); - // Assume batch size == 1 and strip it off. - assert(inShape.dim(0) == 1); - inShape = {inShape.dim(1), inShape.dim(2), inShape.dim(3)}; - - ShapeRange inRange(inShape); +std::vector Conv2D::operator()() { + const Shape& in_shape = _op.getInputShape(0); + const Shape& kernel_shape = _op.getInputShape(1); + const Shape& out_shape = _op.getOutputShape(0); + const Shape& strides = _op.getStrides(); + const std::vector& pads = _op.getPaddingBefore(); + + assert(in_shape.rank() == 4); + assert(kernel_shape.rank() == 4); + assert(kernel_shape.dim(2) == in_shape.dim(3)); + assert(kernel_shape.dim(3) == out_shape.dim(3)); + assert(strides.rank() == 2); + assert(pads.size() == 2); + + int32_t num_kernels = kernel_shape.dim(3); - Shape kShape = _kernel.getShape(); - int32_t numKernels = kShape.dim(3); - kShape.dim(3) = 1; - ShapeRange kernelRange(kShape); - - Index inputIdx; - inputIdx.resize(inShape.rank()); - - for (auto &outIdx : outRange) - { - // Take into account stripped off batch dimension. - Index tmp_out_index{0, outIdx.at(0), outIdx.at(1), outIdx.at(2)}; - - for (auto& kernelIdx : kernelRange) - { - translate(inputIdx, outIdx, kernelIdx, strides, pads); - if (inRange.contains(inputIdx)) - { - auto kernelRegion = _kernel.getRegion(kernelIdx); - assert( kernelRegion.size() == numKernels ); - - auto outRegion = resAccesor.getRegion(tmp_out_index); - assert( outRegion.size() == numKernels ); - - // Take into account stripped off batch dimension. - Index tmp_in_index{0, inputIdx.at(0), inputIdx.at(1), inputIdx.at(2)}; - auto in = _input.at(tmp_in_index); - - for (int32_t kernelIndex = 0; kernelIndex < numKernels; ++kernelIndex) - { - outRegion.base()[kernelIndex] += in * kernelRegion.base()[kernelIndex]; + auto res = allocate_tensor(_op.getOutputShape(0)); + Tensor res_accessor(res); + + ShapeRange in_range(in_shape); + ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1}); + ShapeRange kernel_range(Shape{kernel_shape.dim(0), kernel_shape.dim(1), kernel_shape.dim(2), 1}); + + Index in_index; + in_index.resize(4); + + for (const auto& out_index : out_range) { + auto out_region = res_accessor.getRegion(out_index); + assert(out_region.size() == num_kernels); + for (const auto& kernel_index : kernel_range) { + in_index.at(0) = out_index.at(0); + for (int i = 0; i < 2; ++i) + in_index.at(1 + i) = out_index.at(1 + i) * strides.dim(i) + kernel_index.at(i) - pads[i]; + in_index.at(3) = kernel_index.at(2); + + if (in_range.contains(in_index)) { + auto kernel_region = _kernel.getRegion(kernel_index); + assert(kernel_region.size() == num_kernels); + float in_val = _input.at(in_index); + for (int32_t kernel_i = 0; kernel_i < num_kernels; ++kernel_i) { + out_region.base()[kernel_i] += in_val * kernel_region.base()[kernel_i]; } } } @@ -99,11 +76,8 @@ std::vector Conv2D::operator()() Conv2D::Conv2D(const TensorVariant& input, const TensorVariant& kernel, - const Conv2DOp& op) + const ops::Conv2DOp& op) : _input(input), _kernel(kernel), _op(op) { - assert(_op.getInputShape(0).rank() == 4); - assert(_input.getShape().rank() == 4); - assert(_kernel.getShape().rank() == 4); } } // namespace nnc diff --git a/contrib/nnc/passes/interpreter/ops/DepthwiseConv2D.cpp b/contrib/nnc/passes/interpreter/ops/DepthwiseConv2D.cpp index dfb4308..8582db2 100644 --- a/contrib/nnc/passes/interpreter/ops/DepthwiseConv2D.cpp +++ b/contrib/nnc/passes/interpreter/ops/DepthwiseConv2D.cpp @@ -15,63 +15,49 @@ */ #include "DepthwiseConv2D.h" -#include "common.h" #include "core/modelIR/ShapeRange.h" -namespace nnc -{ +namespace nnc { using namespace mir; -using namespace mir::ops; -std::vector DepthwiseConv2D::operator()() -{ - TensorVariant res = allocate_tensor(_op.getOutputShape(0)); - Tensor resAccessor(res); - - Shape strides({_op.getStrides().dim(0), _op.getStrides().dim(1), 1}); - Index pads({_op.getPaddingBefore().at(0), _op.getPaddingBefore().at(1), 0}); - - Shape outShape = res.getShape(); - // Assume batch size == 1 and strip it off. - assert(outShape.dim(0) == 1); - outShape = {outShape.dim(1), outShape.dim(2), outShape.dim(3)}; - - outShape.dim(2) = 1; - ShapeRange outRange(outShape); - - Shape inShape = _input.getShape(); - // Assume batch size == 1 and strip it off. - assert(inShape.dim(0) == 1); - inShape = {inShape.dim(1), inShape.dim(2), inShape.dim(3)}; - - ShapeRange inRange(inShape); - - Index inIdx; - inIdx.resize(outShape.rank()); - - auto kernelShape = _kernel.getShape(); - int32_t channelMultiplierDim = kernelShape.rank() - 1; - int channelMultiplier = kernelShape.dim(channelMultiplierDim); - - for (auto &outIdx : outRange) - { - // Take into account stripped off batch dimension. - Index tmp_out_index{0, outIdx.at(0), outIdx.at(1), outIdx.at(2)}; - - for (auto &kIdx : ShapeRange(kernelShape)) - { - translate(inIdx, outIdx, kIdx, strides, pads); - - if (inRange.contains(inIdx)) - { - // Take into account stripped off batch dimension. - Index tmp_in_index{0, inIdx.at(0), inIdx.at(1), inIdx.at(2)}; - auto in = _input.at(tmp_in_index); - auto b = _kernel.at(kIdx); - Index outIdxK = tmp_out_index; - outIdxK.at(3) = kIdx.at(2) * channelMultiplier + kIdx.at(channelMultiplierDim); - resAccessor.at(outIdxK) += in * b; +std::vector DepthwiseConv2D::operator()() { + const Shape& in_shape = _op.getInputShape(0); + const Shape& kernel_shape = _op.getInputShape(1); + const Shape& out_shape = _op.getOutputShape(0); + const Shape& strides = _op.getStrides(); + const std::vector& pads = _op.getPaddingBefore(); + + assert(in_shape.rank() == 4); + assert(kernel_shape.rank() == 4); + assert(kernel_shape.dim(2) == in_shape.dim(3)); + assert(in_shape.dim(3) * kernel_shape.dim(3) == out_shape.dim(3)); + assert(strides.rank() == 2); + assert(pads.size() == 2); + + int32_t channel_multiplier = kernel_shape.dim(3); + + TensorVariant res = allocate_tensor(out_shape); + Tensor res_accessor(res); + + ShapeRange in_range(in_shape); + ShapeRange kernel_range(kernel_shape); + ShapeRange out_range(Shape{out_shape.dim(0), out_shape.dim(1), out_shape.dim(2), 1}); + + Index in_index; + in_index.resize(4); + + for (const auto& out_index : out_range) { + Index out_index_k = out_index; + for (const auto& kernel_index : kernel_range) { + in_index.at(0) = out_index.at(0); + for (int i = 0; i < 2; ++i) + in_index.at(1 + i) = out_index.at(1 + i) * strides.dim(i) + kernel_index.at(i) - pads[i]; + in_index.at(3) = kernel_index.at(2); + + if (in_range.contains(in_index)) { + out_index_k.at(3) = kernel_index.at(2) * channel_multiplier + kernel_index.at(3); + res_accessor.at(out_index_k) += _input.at(in_index) * _kernel.at(kernel_index); } } } @@ -81,12 +67,8 @@ std::vector DepthwiseConv2D::operator()() DepthwiseConv2D::DepthwiseConv2D(const TensorVariant& input, const TensorVariant& kernel, - const DepthwiseConv2DOp& op) + const ops::DepthwiseConv2DOp& op) : _input(input), _kernel(kernel), _op(op) { - assert(_op.getInputShape(0).rank() == 4); - assert(_input.getShape().rank() == 4); - assert(_kernel.getShape().rank() == 4); - assert(_kernel.getShape().dim(2) == _input.getShape().dim(3)); } } // namespace nnc diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp index 92f5f27..4c6fcb8 100644 --- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp @@ -625,7 +625,7 @@ TEST(cpp_operations_test, conv2d) { for (iT output_c = 1; output_c <= 3; ++output_c) for (iT stride_h = 1; stride_h <= 3; ++stride_h) for (iT stride_w = 1; stride_w <= 3; ++stride_w) { - vector input_shape_data{1, 5, 7, static_cast(input_c)}; // NHWC + vector input_shape_data{3, 5, 7, static_cast(input_c)}; // NHWC vector kernel_shape_data{kernel_h, kernel_w, input_c, output_c}; // HWCN mir::Shape strides{stride_h, stride_w}; vector> input_ntensors(2); @@ -658,7 +658,7 @@ TEST(cpp_operations_test, depthwise_conv) { for (iT stride_w = 1; stride_w <= 3; ++stride_w) for (iT stride_h = 1; stride_h <= 3; ++stride_h) for (iT multiplier = 1; multiplier <= 2; ++multiplier) { - vector input_shape_data{1, 7, 6, static_cast(channels)}; // NHWC + vector input_shape_data{3, 7, 6, static_cast(channels)}; // NHWC vector kernel_shape_data{kernel_h, kernel_w, channels, multiplier}; // HWCN mir::Shape strides{stride_h, stride_w}; vector> input_ntensors(2); @@ -764,7 +764,7 @@ static void genericPoolTest(Func test_func, const vector shape_data{1, 5, 7, static_cast(channels)}; + vector shape_data{3, 5, 7, static_cast(channels)}; mir::Shape window_shape{windowH, windowW}; mir::Shape strides{stride_h, stride_w}; Tensor input_atensor; -- 2.7.4