[nnc] Support batch axis in acl backend (#2462)

author Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>

Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)

committer GitHub Enterprise <noreply-CODE@samsung.com>

Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)
author Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)
committer GitHub Enterprise <noreply-CODE@samsung.com>
Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)
diff --git a/contrib/nnc/include/core/modelIR/TensorUtil.h b/contrib/nnc/include/core/modelIR/TensorUtil.h

index d289b16..0685670 100644 (file)
--- a/contrib/nnc/include/core/modelIR/TensorUtil.h
+++ b/contrib/nnc/include/core/modelIR/TensorUtil.h
@@ -34,6 +34,9 @@ namespace mir
  template<int32_t... Ints>
  Shape transposeShape(const Shape& shape) {
    std::vector<int32_t> permutes{Ints...};
+
+  assert(permutes.size() == shape.rank());
+
    Shape result(shape);
      int32_t nof_permutes = std::min<int32_t>(shape.rank(), permutes.size());
  
diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp

index 06aa194..c04d3c1 100644 (file)
--- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
@@ -81,13 +81,14 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) {
  }
  
  void AclCppOpGenerator::visit(ops::ConcatOp& op) {
-  static const char* axis_names[] = {"arm_compute::DataLayoutDimension::HEIGHT",
+  static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
+                                     "arm_compute::DataLayoutDimension::HEIGHT",
                                       "arm_compute::DataLayoutDimension::WIDTH",
                                       "arm_compute::DataLayoutDimension::CHANNEL"};
  
    int axis = op.getAxis() < 0 ? op.getOutputShape(0).rank() + op.getAxis() : op.getAxis();
    assert(axis < sizeof(axis_names) / sizeof(const char*));
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
    auto prefix = out->name() + "_concatenate_layer";
    auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
    auto inputs = inputs_var->use();
@@ -206,7 +207,7 @@ static shared_ptr<ArtifactVariable>
      genPadStrideInfo(const Oper& op, const string& prefix, ArtifactBlock* block) {
    using AF = ArtifactFactory;
  
-  const Shape& strides = transposeShape<1, 0>(op.getStrides());
+  const Shape& strides = transposeShape<1, 0, 2>(op.getStrides());
    assert(strides.rank() == 3 && strides.dim(2) == 1);
  
    // array of paddings
@@ -273,7 +274,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) {
        pooling_type = "arm_compute::PoolingType::AVG";
        break;
      default:
-      assert(false && "Not a supported pooling type");
+      throw AclCppException("Unsupported pooling type");
    }
    
    auto& prev_nodes = op.getPrevNodes();
@@ -281,7 +282,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) {
  
    auto in_op = prev_nodes[0].op;
    auto in = AF::id(tensorName(in_op));
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
    auto prefix = out->name() + "_pooling_layer";
  
    auto pad_stride_info_var = genPadStrideInfo(op, prefix, _constrBlock);
@@ -314,7 +315,10 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
    auto in = AF::id(tensorName(in_op));
  
    // Create the output tensor in the DOM.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  const Shape& out_shape = op.getOutputShape(0);
+  if (out_shape.rank() != 2)
+    throw AclCppException("Unsupported number of dimensions in fc layer");
+  auto out = genTensor(op, transposeShape<1, 0>(out_shape));
    string operation_name = out->name() + "_fully_connected_layer";
  
    // Create the weights tensor in the DOM and use its id.
@@ -346,7 +350,22 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
    auto in = AF::id(tensorName(in_op));
  
    // Create the output tensor in the DOM and obtain its identifier.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  const Shape& out_shape = op.getOutputShape(0);
+  Shape transposed_shape;
+  switch (out_shape.rank()) {
+    case 4:
+      transposed_shape = transposeShape<2, 1, 3, 0>(out_shape);
+      break;
+    case 2:
+      transposed_shape = transposeShape<1, 0>(out_shape);
+      break;
+    case 1:
+      transposed_shape = out_shape;
+      break;
+    default:
+      throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
+  }
+  shared_ptr<ArtifactId> out = genTensor(op, transposed_shape);
  
    // Prefix used for the name of variables related to the operation implementation.
    string operation_name = out->name() + "_bias_add_layer";
@@ -363,10 +382,10 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
      ir_biases_shape = transposeShape<1, 0>(op.getInputShape(0));
    } else {
      // ACL CLArithmeticAddition supports input tensors broadcasting.
-    for (int i = 0; i < ir_input_shape.rank() - 1; ++i)
+    for (int i = 0; i < ir_input_shape.rank(); ++i)
        ir_biases_shape.dim(i) = 1;
  
-    ir_biases_shape.dim(-1) = ir_biases.getShape().dim(0);
+    ir_biases_shape.dim(2) = ir_biases.getShape().dim(0);
    }
    auto biases = genTensor(operation_name + "_biases", ir_biases_shape);
  
@@ -382,7 +401,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
  }
  
  void AclCppOpGenerator::visit(ops::VariableOp& op) {
-  auto tensor = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  auto tensor = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
    allocate(tensor);
  }
  
@@ -404,7 +423,31 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
    auto in = AF::id(tensorName(in_op));
  
    // Create the output tensor in the DOM and return its id.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  const Shape& out_shape = op.getOutputShape(0);
+
+  // This check confirms that we can "safely" reshape data
+  // The only safe configuration of output shape is (1...1, N, 1 ... 1)
+  bool found_non_one = false;
+  for (int32_t i = 0; i < out_shape.rank(); ++i) {
+    if (out_shape.dim(i) != 1) {
+      if (found_non_one)
+        throw AclCppException("Unsupported result of reshape");
+      found_non_one = true;
+    }
+  }
+
+  Shape transposed_shape;
+  switch (out_shape.rank()) {
+    case 2:
+      transposed_shape = transposeShape<1, 0>(out_shape);
+      break;
+    case 1:
+      transposed_shape = out_shape;
+      break;
+    default:
+      throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
+  }
+  shared_ptr<ArtifactId> out = genTensor(op, transposed_shape);
  
    // Create an instance of the CLReshapeLayer class as a member of the artifact class.
    auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
@@ -423,22 +466,38 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) {
    // Get input tensor identifier in the generated artifact.
    auto in = AF::id(tensorName(in_op));
  
-  // Generate output tensor description in the DOM.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  // Create the output tensor in the DOM and obtain its identifier.
+  const Shape& out_shape = op.getOutputShape(0);
+  Shape transposed_shape;
+  switch (out_shape.rank()) {
+    case 4:
+      transposed_shape = transposeShape<2, 1, 3, 0>(out_shape);
+      break;
+    case 2:
+      transposed_shape = transposeShape<1, 0>(out_shape);
+      break;
+    case 1:
+      transposed_shape = out_shape;
+      break;
+    default:
+      throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
+  }
+  shared_ptr<ArtifactId> out = genTensor(op, transposed_shape);
+
    auto operation_name = out->name() + "_scale_layer";
  
    const auto& ir_scales = op.getWeights();
  
    // Reshape the IR scales tensor and generate the corresponding DOM tensor.
    Shape ir_scales_shape;
-  const auto ir_input_shape = transposeShape<1, 0, 2>(op.getInputShape(0));
+  const auto ir_input_shape = transposeShape<2, 1, 3, 0>(op.getInputShape(0));
    ir_scales_shape.resize(ir_input_shape.rank());
  
    // ACL CLArithmeticDivision supports input tensors broadcasting.
-  for (int i = 0; i < ir_input_shape.rank() - 1; ++i)
+  for (int i = 0; i < ir_input_shape.rank(); ++i)
      ir_scales_shape.dim(i) = 1;
  
-  ir_scales_shape.dim(-1) = ir_scales.getShape().dim(0);
+  ir_scales_shape.dim(2) = ir_scales.getShape().dim(0);
    auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
  
    // We do not use the genMultiplication() function here because the input needs broadcasting.
@@ -558,7 +617,7 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons
    auto in = AF::id(tensorName(in_op));
  
    // Create the output tensor in the DOM.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
    string operation_name = out->name() + suffix;
  
    // Generate a tensor for weights (kernel) in the DOM.
@@ -600,7 +659,7 @@ void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& act
    auto in = AF::id(tensorName(in_op));
  
    // Create the output tensor in the DOM and return its id.
-  auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+  auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
    auto prefix = out->name() + "_activation_layer";
  
    // Create an instance of the ActivationLayerInfo class as a local variable in the artifact
diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp

index 952a039..e62edcc 100644 (file)
--- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp
+++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp
@@ -151,7 +151,7 @@ static int getAxisValue(const OptsType& opts) {
  
    // axis 1 represents channels in caffe, in Model ir it is second dimension for now
    if (axis == 1)
-    return 2;
+    return 3;
  
    return axis;
  }
@@ -400,15 +400,11 @@ CaffeOpCreator::convertPooling(const std::vector<IODescriptor>& inputs,
  std::vector<IODescriptor>
  CaffeOpCreator::convertSoftmax(const std::vector<IODescriptor>& inputs,
                                 const caffe::SoftmaxParameter& opts) {
+  assert(inputs.size() == 1);
    auto input = inputs[0];
    auto& input_shape = input.op->getOutputShape(input.index);
    // Workaround until we've got Transpose operation.
    assert(input_shape.rank() == 4 || input_shape.rank() == 2);
-  if (input_shape.rank() == 4) {
-    assert(input_shape.dim(0) == 1);
-    Shape new_shape{input_shape.dim(1), input_shape.dim(2), input_shape.dim(3)};
-    input = createOp<ops::ReshapeOp>(input, new_shape)->getOutput(0);
-  }
    auto softmax = createOp<ops::SoftmaxOp>(input, getAxisValue(opts));
    return {softmax->getOutput(0)};
  }
author	Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
	Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)
committer	GitHub Enterprise <noreply-CODE@samsung.com>
	Fri, 30 Nov 2018 17:36:50 +0000 (20:36 +0300)
contrib/nnc/include/core/modelIR/TensorUtil.h		patch \| blob \| history
contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp		patch \| blob \| history
contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp		patch \| blob \| history