* Change the ModelIR Conv2D kernel format from HWIO to OHWI to match TensorFlow Lite format.
* Adjust importers and backends.
Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
namespace ops {
void Conv2DOp::inferOutputShapes() {
+ // Input shape: [N, Hi, Wi, Ci].
+ // Kernel shape: [Co, Hk, Wk, Ci].
const auto& input_shape = getInputShape(0);
const auto& kernel_shape = getInputShape(1);
assert(input_shape.rank() == 4);
assert(kernel_shape.rank() == 4);
+ assert(kernel_shape.dim(3) == input_shape.dim(3));
assert(_strides.rank() == 2);
assert(_paddingBefore.size() == 2);
assert(_paddingAfter.size() == 2);
// Batch size and number of channels.
output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(3) = kernel_shape.dim(3);
+ output_shape.dim(3) = kernel_shape.dim(0);
// Height and width.
for (int i = 0; i < 2; i++) {
// out_size = ceil((in_size - kernel_size + 1) / stride) =
// (in_size - kernel_size + 1 + stride - 1) / stride =
// (in_size - kernel_size) / stride + 1
- output_shape.dim(1 + i) = (padded_input - kernel_shape.dim(i)) / _strides.dim(i) + 1;
+ output_shape.dim(1 + i) = (padded_input - kernel_shape.dim(1 + i)) / _strides.dim(i) + 1;
}
setOutputShape(0, output_shape);
}
}
-/**
- * @brief Generate DOM for PadStrideInfo object
- * @tparam Oper Class of operation with pad and stride properties
- * @param op Operation entity to generate variable for
- * @param prefix First part of generated variable name
- * @param block Code block where insert variable declaration
- * @return generated variable
- */
-template<class Oper>
-static shared_ptr<ArtifactVariable>
- genPadStrideInfo(const Oper& op, const string& prefix, ArtifactBlock* block) {
+template <typename Op>
+shared_ptr<ArtifactVariable>
+AclCppOpGenerator::genPadStrideInfo(const Op& op, const string& prefix, ArtifactBlock* block) {
using AF = ArtifactFactory;
const Shape& strides = transposeShape<1, 0>(op.getStrides());
template <typename Op>
void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, const string& suffix) {
- IODescriptor ir_input = op.getPrevNodes()[0];
- IODescriptor ir_weights = op.getPrevNodes()[1];
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_weights = op.getInput(1);
IODescriptor ir_output = op.getOutput(0);
auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
if (ir_weights_op == nullptr)
throw AclCppException("Unsupported operation type");
- auto ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue());
+ auto ir_weights_tensor = ir_weights_op->getValue();
+ if (op.getType() == Operation::Type::conv2D) {
+ // [Co, Hk, Wk, Ci] -> [Co, Ci, Hk, Wk].
+ ir_weights_tensor = transposeTensor<0, 3, 1, 2>(ir_weights_tensor);
+ } else {
+ ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_tensor);
+ }
+
const Shape& ir_weights_shape = ir_weights_tensor.getShape();
// get output tensor name that is used as base for other names
std::shared_ptr<ArtifactId> genTransposeACLtoMIR(const std::string& name,
const mir::Shape& input_shape,
const std::shared_ptr<ArtifactId>& input);
+
+ /**
+ * @brief Generate DOM for PadStrideInfo object
+ * @tparam Oper Class of operation with pad and stride properties
+ * @param op Operation entity to generate variable for
+ * @param prefix First part of generated variable name
+ * @param block Code block where insert variable declaration
+ * @return generated variable
+ */
+ template <typename Op>
+ std::shared_ptr<ArtifactVariable>
+ genPadStrideInfo(const Op& op, const std::string& prefix, ArtifactBlock* block);
+
/**
* @brief The common part of the convolution and the depthwise convolution.
*/
// first we need to convert kernel of grouped convolution to appropriate ordinary kernel
if (num_groups != 1)
kernel_tensor = fixGroupedKernel(num_groups, kernel_tensor);
-
+ kernel_tensor = transposeTensor<3, 0, 1, 2>(kernel_tensor);
auto kernel = createOp<ops::ConstantOp>("Constant", kernel_tensor)->getOutput(0);
result = createOp<ops::Conv2DOp>("Conv2D", convertCaffeToMIR(inputs[0]), kernel,
stride_shape, pad_before, pad_after);
// first we need to convert kernel of grouped convolution to appropriate ordinary kernel
kernel_weights = fixGroupedKernel(params.group(), kernel_weights);
}
+ kernel_weights = transposeTensor<3, 0, 1, 2>(kernel_weights);
auto kernel = createOp<ops::ConstantOp>("", kernel_weights)->getOutput(0);
result = createOp<ops::Conv2DOp>(layer.name(), convertCaffeToMIR(inputs[0]), kernel,
strides, padding, padding);
#include "Conv2D.h"
#include "core/modelIR/ShapeRange.h"
+#include "core/modelIR/TensorUtil.h"
namespace nnc {
// Kernel is in [filter_height, filter_width, in_channels, out_channels]
// Refer to https://www.tensorflow.org/api_docs/python/tf/nn/conv2d for info
std::vector<TensorVariant> Conv2D::operator()() {
- const Shape& in_shape = _op.getInputShape(0);
- const Shape& kernel_shape = _op.getInputShape(1);
+ const Shape& in_shape = _input.getShape();
+ const Shape& kernel_shape = _kernel.getShape();
const Shape& out_shape = _op.getOutputShape(0);
const Shape& strides = _op.getStrides();
const std::vector<int32_t>& pads = _op.getPaddingBefore();
int32_t num_kernels = kernel_shape.dim(3);
+ Tensor<float> kernel(_kernel);
auto res = allocate_tensor(_op.getOutputShape(0));
Tensor<float> res_accessor(res);
in_index.at(3) = kernel_index.at(2);
if (in_range.contains(in_index)) {
- auto kernel_region = _kernel.getRegion(kernel_index);
+ auto kernel_region = kernel.getRegion(kernel_index);
assert(kernel_region.size() == num_kernels);
float in_val = _input.at(in_index);
for (int32_t kernel_i = 0; kernel_i < num_kernels; ++kernel_i) {
Conv2D::Conv2D(const TensorVariant& input,
const TensorVariant& kernel,
const ops::Conv2DOp& op)
- : _input(input), _kernel(kernel), _op(op) {
+ : _input(input), _kernel(transposeTensor<1, 2, 3, 0>(kernel)), _op(op) {
}
} // namespace nnc
private:
const mir::Tensor<float> _input;
- mir::Tensor<float> _kernel;
+ mir::TensorVariant _kernel;
const mir::ops::Conv2DOp& _op;
};
// first we need to convert kernel of grouped convolution to appropriate ordinary kernel
if (num_groups != 1)
kernel_tensor = fixGroupedKernel(num_groups, kernel_tensor);
+ kernel_tensor = transposeTensor<3, 0, 1, 2>(kernel_tensor);
auto kernel = createOp<ops::ConstantOp>(kernel_tensor)->getOutput(0);
result = createOp<ops::Conv2DOp>(transposed_input, kernel, cdata.strides_shape,
cdata.padding_before, cdata.padding_after);
void ModelAnalyzer::visit(ops::Conv2DOp& op) {
const auto& kernel_shape = op.getInputShape(1);
const auto& out_shape = op.getOutputShape(0);
- const int32_t tmp_size = kernel_shape.dim(0) * kernel_shape.dim(1) * kernel_shape.dim(2)
+ const int32_t tmp_size = kernel_shape.dim(1) * kernel_shape.dim(2) * kernel_shape.dim(3)
* out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2);
updateMaxTemporarySize(static_cast<size_t>(tmp_size));
appendOperationToInference(&op, "conv2d", {_temp_tensor_id});
const auto pad_h = static_cast<int16>(pads[0]);
const auto pad_w = static_cast<int16>(pads[1]);
- // Transpose the kernel from HWIO to OHWI format.
- const Shape kernel_shape = kernel.getShape();
- const RuntimeShape kernel_rt_shape = {static_cast<int>(kernel_shape[3]),
- static_cast<int>(kernel_shape[0]),
- static_cast<int>(kernel_shape[1]),
- static_cast<int>(kernel_shape[2])};
-
- const RuntimeShape out_rt_shape = shapeToRuntimeShape(out_shape);
- const RuntimeShape im2col_shape{out_rt_shape.Dims(0), //batch
- out_rt_shape.Dims(1), //height
- out_rt_shape.Dims(2), //width
- static_cast<int>(kernel_shape[2] *
- kernel_shape[0] *
- kernel_shape[1])};
+ const Shape& kernel_shape = kernel.getShape();
+ const Shape im2col_shape{out_shape[0], out_shape[1], out_shape[2],
+ kernel_shape[1] * kernel_shape[2] * kernel_shape[3]};
float* im2col_data = nullptr;
- if (stride_w != 1 || stride_h != 1 || kernel_shape[0] != 1 || kernel_shape[1] != 1) {
+ if (stride_w != 1 || stride_h != 1 || kernel_shape[1] != 1 || kernel_shape[2] != 1) {
im2col_data = temporary.getData();
}
- const ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h};
-
- unique_ptr<float[]> kernel_data(new float[kernel_rt_shape.FlatSize()]);
- TransposeParams transpose_params{4, {3, 0, 1, 2}};
- Transpose(transpose_params,
- shapeToRuntimeShape(kernel_shape), kernel.getData(),
- kernel_rt_shape, kernel_data.get());
+ const ConvParams conv_params{{pad_w, pad_h}, stride_w, stride_h};
Conv(conv_params,
shapeToRuntimeShape(input.getShape()), input.getData(),
- kernel_rt_shape, kernel_data.get(),
- out_rt_shape, out.getData(),
- im2col_shape, im2col_data);
+ shapeToRuntimeShape(kernel_shape), kernel.getData(),
+ shapeToRuntimeShape(out_shape), out.getData(),
+ shapeToRuntimeShape(im2col_shape), im2col_data);
}
void convTransposed2d(Tensor& out, const char* params, const Tensor& input, const Tensor& kernel,
? std::max(0, window_shape.dim(i) - strides.dim(i))
: std::max(0, window_shape.dim(i) - input_shape.dim(1 + i) % strides.dim(i));
padding_before[i] = padding / 2;
- padding_after[i] = (padding + 1) / 2;
+ padding_after[i] = padding - padding_before[i];
}
break;
case tflite::Padding_VALID:
auto kernel = inputs.at(1);
auto bias = inputs.at(2);
- // OHWI -> HWIO
- // TODO Insert TransposeOp instead when ACL backend is ready for that.
- const auto& kernel_tensor = mir::transposeTensor<1, 2, 3, 0>(extractTensor(kernel));
- kernel = createOp<ops::ConstantOp>(kernel_tensor)->getOutput(0);
+ kernel = createOp<ops::ConstantOp>(extractTensor(kernel))->getOutput(0);
Shape strides{opts->stride_h(), opts->stride_w()};
std::vector<int32_t> padding_before(2);
const auto& input_shape = input.getShape();
const auto& kernel_shape = kernel.getShape();
- calculatePadding(opts->padding(), input_shape, kernel_shape,
+ Shape window_shape{kernel_shape.dim(1), kernel_shape.dim(2)};
+ calculatePadding(opts->padding(), input_shape, window_shape,
strides, padding_before, padding_after);
auto result = createOp<ops::Conv2DOp>(input, kernel, strides, padding_before, padding_after);
const auto& input_shape = input.getShape();
const auto& kernel_shape = kernel.getShape();
- calculatePadding(opts->padding(), input_shape, kernel_shape,
+ Shape window_shape{kernel_shape.dim(0), kernel_shape.dim(1)};
+ calculatePadding(opts->padding(), input_shape, window_shape,
strides, padding_before, padding_after);
auto result = createOp<ops::DepthwiseConv2DOp>(input, kernel,
TEST(acl_backend_mir_to_dom, conv2d) {
const int32_t channels = 3;
- mir::Shape kernel_shape{3, 3, channels, 1}; // Height, Width, input Channels, output Channel
+ mir::Shape kernel_shape{1, 3, 3, channels}; // output Channels, Height, Width, input Channels
mir::Shape strides{1, 1};
mir::TensorVariant kernel_tensor = createTensorVariant(kernel_shape);
for (iT stride_h = 1; stride_h <= 3; ++stride_h)
for (iT stride_w = 1; stride_w <= 3; ++stride_w) {
vector<int> input_shape_data{3, 5, 7, static_cast<int>(input_c)}; // NHWC
- vector<int> kernel_shape_data{kernel_h, kernel_w, input_c, output_c}; // HWCN
+ vector<int> kernel_shape_data{output_c, kernel_h, kernel_w, input_c}; // OHWI
mir::Shape strides{stride_h, stride_w};
vector<unique_ptr<mir::TensorVariant>> input_ntensors(2);
Tensor input_atensor0;