[nnc] Prepare the ACL backend for the future changes related to adding of the Tensor...

author Сергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 <s.barannikov@samsung.com>

Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)

committer Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>

Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)
author Сергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 <s.barannikov@samsung.com>
Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)
committer Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)
diff --git a/contrib/nnc/include/core/modelIR/Operation.h b/contrib/nnc/include/core/modelIR/Operation.h

index 86c7b90..9794ccc 100644 (file)
--- a/contrib/nnc/include/core/modelIR/Operation.h
+++ b/contrib/nnc/include/core/modelIR/Operation.h
@@ -56,6 +56,11 @@ public:
    std::size_t getNumInputs() const { return _num_inputs; }
    std::size_t getNumOutputs() const { return _num_outputs; }
  
+  IODescriptor getInput(std::size_t index) {
+    assert(index < _inputs.size());
+    return _inputs[index];
+  }
+
    const IODescriptor getOutput(std::size_t index);
  
    const std::vector<IODescriptor>& getPrevNodes() const { return _inputs; }
diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp

index edb2733..9b745f3 100644 (file)
--- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
@@ -92,7 +92,7 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) {
    g->accept(this);
  
    // Generate all the deferred entities.
-  genNamed();
+  genNamed(g);
    genPersistentTensorAllocations();
    genDeserializations();
    genFillings();
@@ -104,6 +104,9 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) {
  }
  
  void AclCppOpGenerator::visit(ops::ConcatOp& op) {
+  const auto& ir_inputs = op.getPrevNodes();
+  IODescriptor ir_output = op.getOutput(0);
+
    static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
                                       "arm_compute::DataLayoutDimension::CHANNEL",
                                       "arm_compute::DataLayoutDimension::HEIGHT",
@@ -114,13 +117,13 @@ void AclCppOpGenerator::visit(ops::ConcatOp& op) {
           "axis outside this range is not supported in ACL");
    const char* axis_name = axis_names[axis];
  
-  auto out = genTensor(op, op.getOutputShape(0));
+  auto out = genTensor(ir_output);
    auto prefix = out->name() + "_concatenate_layer";
    auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
    auto inputs = inputs_var->use();
  
-  for (auto i : op.getPrevNodes())
-    _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(i.op)))}, inputs);
+  for (IODescriptor ir_input : ir_inputs)
+    _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
  
    auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
                          {inputs, AF::ref(out), AF::lit(axis_name)});
@@ -138,12 +141,13 @@ void AclCppOpGenerator::visit(ops::DepthwiseConv2DOp& op) {
  }
  
  void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
-  auto& in_ops = op.getPrevNodes();
-  assert(in_ops.size() == 1);
-  auto in_op = in_ops[0].op;
-  auto in = AF::id(tensorName(in_op));
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
+
+  auto in = AF::id(tensorName(ir_input));
  
-  int rank = op.getOutputShape(0).rank();
+  int rank = ir_output.getShape().rank();
    // CLPermute does not support all kinds of permutations now.
    // rank can be more than 2 in our models, so we can not use CLTranspose.
    // This means we can support tensors with no more then one axis > 1.
@@ -152,7 +156,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
    int nof_long_axes = 0;
  
    for (int i = 0; i < rank; ++i) {
-    if (op.getOutputShape(0).dim(i) > 1)
+    if (ir_output.getShape().dim(i) > 1)
        ++nof_long_axes;
    }
  
@@ -161,9 +165,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
      throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1");
  
    // Create the output tensor.
-  const Shape& in_out_shape = op.getOutputShape(0);
-
-  shared_ptr<ArtifactId> output = genTensor(op, in_out_shape);
+  shared_ptr<ArtifactId> output = genTensor(ir_output);
    auto layer_name_prefix = output->name();
  
    if (axis == 0) {
@@ -180,7 +182,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
      // Then we need two tensors for intermediate results. This is because we do a couple of auxiliary
      // reshapes: one to transform the input tensor to a unidimensional tensor and the second to
      // transorm the result of the softmax operation back to the original form.
-    Shape sm_shape(in_out_shape);
+    Shape sm_shape(ir_output.getShape());
  
      std::swap(sm_shape.dim(axis), sm_shape.dim(-1));
  
@@ -266,6 +268,10 @@ AclCppOpGenerator::genTransposeACLtoMIR(const string& name,
  }
  
  void AclCppOpGenerator::visit(ops::PoolOp& op) {
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
+
    const char* pooling_type = nullptr;
  
    switch (op.getPoolingType()) {
@@ -278,20 +284,16 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) {
      default:
        throw AclCppException("Unsupported pooling type");
    }
-  
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 1);
  
-  auto in_op = prev_nodes[0].op;
-  string in_name = tensorName(in_op);
+  string in_name = tensorName(ir_input);
    auto in_id = AF::id(in_name);
  
-  const string output_tensor_name = tensorName(&op);
+  const string output_tensor_name = tensorName(ir_output);
  
    // Transpose data from MIR format to format compatible with ACL
    const string transposed_input_name = output_tensor_name + "transposed_input";
    shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), in_id);
+      genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), in_id);
  
    const string layer_name = output_tensor_name + "_pooling_layer";
  
@@ -315,7 +317,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) {
    shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
  
    // Generate auxiliary tensor to hold transposed output of pool in NCHW format
-  Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+  Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape());
    shared_ptr<ArtifactId> transposed_output =
        genTensor(layer_name + "_out_transpose", transposed_output_shape);
  
@@ -330,31 +332,28 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) {
  
    genTensorDeallocation(_infBlock, transposed_input);
    genTensorDeallocation(_infBlock, transposed_output);
-
-  if (op.getNextNodes().empty())
-    _outputs.insert(&op);
  }
  
  void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 2);
+  assert(op.getNumInputs() == 2);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_weights = op.getInput(1);
+  IODescriptor ir_output = op.getOutput(0);
  
-  auto in_op = prev_nodes[0].op;
-  auto ir_weights_op = dynamic_cast<mir::ops::ConstantOp*>(prev_nodes[1].op);
+  auto ir_weights_op = dynamic_cast<mir::ops::ConstantOp*>(ir_weights.op);
    if (ir_weights_op == nullptr)
      throw AclCppException("Unsupported operation type");
  
-  const TensorVariant ir_weights = transposeTensor<1, 0>(ir_weights_op->getValue());
+  const TensorVariant ir_weights_tensor = transposeTensor<1, 0>(ir_weights_op->getValue());
    const Shape& ir_weights_shape = ir_weights.getShape();
  
    // Get the input node tensor id in the DOM.
-  auto in = AF::id(tensorName(in_op));
+  auto in = AF::id(tensorName(ir_input));
  
    // Create the output tensor in the DOM.
-  const Shape& out_shape = op.getOutputShape(0);
-  if (out_shape.rank() != 2)
+  if (ir_output.getShape().rank() != 2)
      throw AclCppException("Unsupported number of dimensions in fc layer");
-  auto out = genTensor(op, out_shape);
+  auto out = genTensor(ir_output);
    string operation_name = out->name() + "_fully_connected_layer";
  
    // Create the weights tensor in the DOM and use its id.
@@ -366,7 +365,7 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
  
    addToPersistentTensors(weights);
    // Serialize the weights tensor and generate the function to deserialize it in the artifact.
-  serializeTensor(weights, ir_weights);
+  serializeTensor(weights, ir_weights_tensor);
    addToPersistentTensors(out);
    genLayerExecution(layer);
  }
@@ -380,27 +379,29 @@ void AclCppOpGenerator::visit(ops::CappedReluOp& op) {
  }
  
  void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 2);
-  auto in_op = prev_nodes[0].op;
-  auto ir_biases_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
-  if (ir_biases_op == nullptr)
+  assert(op.getNumInputs() == 2);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_weights = op.getInput(1);
+  IODescriptor ir_output = op.getOutput(0);
+
+  auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
+  if (ir_weights_op == nullptr)
      throw AclCppException("Unsupported operation type");
  
-  const auto& ir_biases = ir_biases_op->getValue();
-  assert(ir_biases.getShape().rank() == 1);
+  const auto& ir_weights_tensor = ir_weights_op->getValue();
+  assert(ir_weights_tensor.getShape().rank() == 1);
  
    // Get the input node tensor id in the DOM.
-  shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+  shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
  
-  const string output_tensor_name = tensorName(&op);
+  const string output_tensor_name = tensorName(ir_output);
  
    shared_ptr<ArtifactId> transposed_input;
    Shape transposed_output_shape;
    shared_ptr<ArtifactId> transposed_output;
  
    // Create the output tensor in the DOM and obtain its identifier.
-  const Shape& out_shape = op.getOutputShape(0);
+  const Shape& out_shape = ir_output.getShape();
    const string transposed_output_name = output_tensor_name + "_transposed_output";
  
    switch (out_shape.rank()) {
@@ -408,7 +409,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
        // transpose input to NCHW format supported by ACL
        const string transposed_input_name = output_tensor_name + "_transposed_input";
        transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
-      transposed_input = genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
+      transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input);
  
        transposed_output =
            genTensor(transposed_output_name, transposed_output_shape);
@@ -418,7 +419,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
      case 1:
        transposed_output_shape = out_shape;
        transposed_input = input;
-      transposed_output = genTensor(tensorName(&op), out_shape);
+      transposed_output = genTensor(tensorName(ir_output), out_shape);
        break;
      default:
        throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
@@ -428,14 +429,14 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
    string layer_name = transposed_output->name() + "_bias_add_layer";
  
    // Reshape the IR biases tensor and generate the corresponding DOM tensor.
-  const auto ir_input_shape = op.getInputShape(0);
+  const auto& ir_input_shape = ir_input.getShape();
    Shape ir_biases_shape(ir_input_shape.rank());
  
    // ACL CLArithmeticAddition supports input tensors broadcasting.
    for (int i = 0; i < ir_input_shape.rank(); ++i)
      ir_biases_shape.dim(i) = 1;
  
-  ir_biases_shape.dim(1) = ir_biases.getShape().dim(0);
+  ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
    auto biases = genTensor(layer_name + "_biases", ir_biases_shape);
  
    // Instantiate the CLArithmeticAddition object.
@@ -445,7 +446,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
  
    addToPersistentTensors(biases);
    // Save the IR biases tensor to later read this in the artifact.
-  serializeTensor(biases, ir_biases);
+  serializeTensor(biases, ir_weights_tensor);
    genTensorAllocation(_infBlock, transposed_output);
    genLayerExecution(layer);
  
@@ -457,14 +458,11 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
      genTensorDeallocation(_infBlock, transposed_input);
      genTensorDeallocation(_infBlock, transposed_output);
    }
-
-  if (op.getNextNodes().empty())
-    _outputs.insert(&op);
  }
  
  void AclCppOpGenerator::visit(ops::InputOp& op) {
    shared_ptr<ArtifactId> tensor;
-  tensor = genTensor(op, op.getOutputShape(0));
+  tensor = genTensor(op.getOutput(0));
    addToPersistentTensors(tensor);
  }
  
@@ -499,9 +497,8 @@ static bool shouldSerializeConstant(ops::ConstantOp& op) {
  
  void AclCppOpGenerator::visit(ops::ConstantOp& op) {
    if (shouldSerializeConstant(op)) {
-    Shape out_shape = op.getOutputShape(0);
      TensorVariant data = op.getValue();
-    shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+    shared_ptr<ArtifactId> out = genTensor(op.getOutput(0));
      addToPersistentTensors(out);
      serializeTensor(out, data);
    }
@@ -512,15 +509,15 @@ void AclCppOpGenerator::visit(ops::ReluOp& op) {
  }
  
  void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 1);
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
  
    // Get the id of the input tensor in the generated artifact.
-  auto in_op = prev_nodes[0].op;
-  auto in = AF::id(tensorName(in_op));
+  auto in = AF::id(tensorName(ir_input));
  
    // Create the output tensor in the DOM and return its id.
-  const Shape& out_shape = op.getOutputShape(0);
+  const Shape& out_shape = ir_output.getShape();
  
    // This check confirms that we can "safely" reshape data
    // The only safe configuration of output shape is (1...1, N, 1 ... 1)
@@ -533,7 +530,7 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
      }
    }
  
-  shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+  shared_ptr<ArtifactId> out = genTensor(ir_output);
  
    // Create an instance of the CLReshapeLayer class as a member of the artifact class.
    auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
@@ -545,28 +542,30 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
  void AclCppOpGenerator::visit(ops::ScaleOp& op) {
    // May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking
    // two input tensors with the same shapes.
-  auto prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 2);
-  auto in_op = prev_nodes[0].op;
-  auto ir_scales_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
-  if (ir_scales_op == nullptr)
+  assert(op.getNumInputs() == 2);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_weights = op.getInput(1);
+  IODescriptor ir_output = op.getOutput(0);
+
+  auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
+  if (ir_weights_op == nullptr)
      throw AclCppException("Unsupported operation type");
  
-  const auto& ir_scales = ir_scales_op->getValue();
-  assert(ir_scales.getShape().rank() == 1);
+  const auto& ir_weights_tensor = ir_weights_op->getValue();
+  assert(ir_weights_tensor.getShape().rank() == 1);
  
    // Get input tensor identifier in the generated artifact.
-  auto input = AF::id(tensorName(in_op));
+  auto input = AF::id(tensorName(ir_input));
  
-  const string output_tensor_name = tensorName(&op);
+  const string output_tensor_name = tensorName(ir_output);
  
    // transpose input to NCHW format supported by ACL
    const string transposed_input_name = output_tensor_name + "_transposed_input";
    shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
+      genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input);
  
    // Create the output tensor in the DOM and obtain its identifier.
-  const Shape& out_shape = op.getOutputShape(0);
+  const Shape& out_shape = ir_output.getShape();
    Shape transposed_output_shape;
    switch (out_shape.rank()) {
      case 4:
@@ -587,14 +586,14 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) {
    auto operation_name = transposed_output->name() + "_scale_layer";
  
    // Reshape the IR scales tensor and generate the corresponding DOM tensor.
-  const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(op.getInputShape(0));
+  const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input.getShape());
    Shape ir_scales_shape(ir_input_shape.rank());
  
    // ACL CLArithmeticDivision supports input tensors broadcasting.
    for (int i = 0; i < ir_input_shape.rank(); ++i)
      ir_scales_shape.dim(i) = 1;
  
-  ir_scales_shape.dim(1) = ir_scales.getShape().dim(0);
+  ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
    auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
  
    // We do not use the genMultiplication() function here because the input needs broadcasting.
@@ -619,7 +618,7 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) {
  
    addToPersistentTensors(scales);
    // Save the IR scales tensor to later read this in the artifact.
-  serializeTensor(scales, ir_scales);
+  serializeTensor(scales, ir_weights_tensor);
    addToPersistentTensors(unit);
    // Fill the unit tensor with the 1 value.
    fillTensor(unit, "1");
@@ -633,9 +632,6 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) {
  
    genTensorDeallocation(_infBlock, transposed_input);
    genTensorDeallocation(_infBlock, transposed_output);
-
-  if (op.getNextNodes().empty())
-    _outputs.insert(&op);
  }
  
  void AclCppOpGenerator::visit(mir::ops::SliceOp&) {
@@ -648,16 +644,17 @@ void AclCppOpGenerator::visit(ops::BatchNormOp&) {
  }
  
  void AclCppOpGenerator::visit(ops::DropoutOp& op) {
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
+
    // Just copy input tensor to the output one.
-  auto prev_ops = op.getPrevNodes();
-  assert(prev_ops.size() == 1);
-  Operation* in_op = prev_ops[0].op;
  
    // Get input tensor identifier in the generated artifact.
-  std::shared_ptr<ArtifactId> in = AF::id(tensorName(in_op));
+  shared_ptr<ArtifactId> in = AF::id(tensorName(ir_input));
  
    // Generate output tensor description in the DOM.
-  shared_ptr<ArtifactId> out = AF::id(tensorName(&op));
+  shared_ptr<ArtifactId> out = AF::id(tensorName(ir_output));
  
    _constrBlock->var("arm_compute::CLTensor&", out->name(), {}, {in});
  }
@@ -667,35 +664,34 @@ void AclCppOpGenerator::visit(ops::TanhOp& op) {
  }
  
  void AclCppOpGenerator::visit(ops::ElementwiseOp& op) {
+  assert(op.getNumInputs() >= 2);
+  const auto& ir_inputs = op.getPrevNodes();
+  IODescriptor ir_output = op.getOutput(0);
+
    // Create the output tensor in the DOM and obtain its identifier.
-  auto out = genTensor(op, op.getOutputShape(0));
+  auto out = genTensor(ir_output);
    addToPersistentTensors(out);
  
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() >= 2);
-
-  auto in_op1 = prev_nodes[0].op;
-
    // Get the identifier of the first input tensor in the DOM.
-  auto in1 = AF::id(tensorName(in_op1));
+  auto in1 = AF::id(tensorName(ir_inputs[0]));
  
-  for (size_t i = 1; i < prev_nodes.size(); ++i) {
-    auto in_op2 = prev_nodes[i].op;
+  for (size_t i = 1; i < ir_inputs.size(); ++i) {
+    IODescriptor ir_input = ir_inputs[i];
  
      // Get the identifier of the second input tensor in the DOM.
-    auto in2 = AF::id(tensorName(in_op2));
+    auto in2 = AF::id(tensorName(ir_input));
  
      // Chaining the partial results of binary operations.
      // On the last iteration the result is saved in the node output.
      // Different ACL layers used to implement different types of elementwise operations.
      switch (op.getOpType()) {
        case ops::ElementwiseOp::OpType::mul:
-        in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, op.getInputShape(i),
-                                in1, in2, i == prev_nodes.size() - 1 ? out : nullptr);
+        in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, ir_input.getShape(),
+                                in1, in2, i == ir_inputs.size() - 1 ? out : nullptr);
          break;
        case ops::ElementwiseOp::OpType::add:
-        in1 = genAddition(out->name() + "_" + "addition", i - 1, op.getInputShape(i),
-                          in1, in2, i == prev_nodes.size() - 1 ? out : nullptr);
+        in1 = genAddition(out->name() + "_" + "addition", i - 1, ir_input.getShape(),
+                          in1, in2, i == ir_inputs.size() - 1 ? out : nullptr);
          break;
        default:
          throw AclCppException("This min elementwise operation is currently not supported");
@@ -712,12 +708,15 @@ void AclCppOpGenerator::visit(ops::EluOp&) {
  }
  
  void AclCppOpGenerator::visit(ops::PadOp& op) {
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
+
    // Get the id of the input tensor.
-  auto in_op = op.getPrevNodes()[0].op;
-  auto input = AF::id(tensorName(in_op));
+  auto input = AF::id(tensorName(ir_input));
  
    // Create the output tensor in the DOM
-  auto out = genTensor(op, op.getOutputShape(0));
+  auto out = genTensor(ir_output);
    addToPersistentTensors(out);
  
    // Generate PadLayer params
@@ -741,30 +740,30 @@ void AclCppOpGenerator::visit(ops::PadOp& op) {
  
  template <typename Op>
  void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, const string& suffix) {
-  const auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 2);
+  IODescriptor ir_input = op.getPrevNodes()[0];
+  IODescriptor ir_weights = op.getPrevNodes()[1];
+  IODescriptor ir_output = op.getOutput(0);
  
-  auto in_op = prev_nodes[0].op;
-  auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
+  auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
    if (ir_weights_op == nullptr)
      throw AclCppException("Unsupported operation type");
  
-  auto ir_weights = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue());
-  const Shape& ir_weights_shape = ir_weights.getShape();
+  auto ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue());
+  const Shape& ir_weights_shape = ir_weights_tensor.getShape();
  
    // get output tensor name that is used as base for other names
-  const string output_tensor_name = tensorName(&op);
+  const string output_tensor_name = tensorName(ir_output);
  
    // Get the identifier of the input tensor in the DOM.
-  auto input = AF::id(tensorName(in_op));
+  auto input = AF::id(tensorName(ir_input));
  
    // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
    shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(output_tensor_name + "_transposed_input", op.getInputShape(0), input);
+      genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input.getShape(), input);
  
    // Create the transposed output tensor in the DOM.
    const string transposed_output_name = output_tensor_name + "_transposed_output";
-  Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+  Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape());
    shared_ptr<ArtifactId> transposed_output =
        genTensor(transposed_output_name, transposed_output_shape);
  
@@ -796,7 +795,7 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons
  
    addToPersistentTensors(weights);
    // Save the IR weights tensor to later read this in the artifact.
-  serializeTensor(weights, ir_weights);
+  serializeTensor(weights, ir_weights_tensor);
    genTensorAllocation(_infBlock, transposed_output);
    genLayerExecution(layer);
  
@@ -806,22 +805,19 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons
  
    genTensorDeallocation(_infBlock, transposed_input);
    genTensorDeallocation(_infBlock, transposed_output);
-
-  if (op.getNextNodes().empty())
-    _outputs.insert(&op);
  }
  
  void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name,
                                        float a, float b) {
-  auto &prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 1);
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
  
    // Get the id of the input tensor.
-  auto in_op = prev_nodes[0].op;
-  auto in = AF::id(tensorName(in_op));
+  auto in = AF::id(tensorName(ir_input));
  
    // Create the output tensor in the DOM and return its id.
-  shared_ptr<ArtifactId> output = genTensor(op, op.getOutputShape(0));
+  shared_ptr<ArtifactId> output = genTensor(ir_output);
  
    auto prefix = output->name() + "_activation_layer";
  
@@ -841,7 +837,7 @@ void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& act
    genLayerExecution(layer);
  }
  
-shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, int index,
+shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, size_t index,
                                                        const Shape& ir_shape,
                                                        shared_ptr<ArtifactId> in1,
                                                        shared_ptr<ArtifactId> in2,
@@ -927,16 +923,19 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genMultiplication(const string& prefix
    return out;
  }
  
-string AclCppOpGenerator::tensorName(const Operation* op) const {
+string AclCppOpGenerator::tensorName(IODescriptor ir_tensor) const {
    string tensor_name;
  
-  if (!op->getName().empty()) {
-    tensor_name = "_" + op->getName();
+  // TODO Use the tensor name instead of the operation name.
+  const auto& op_name = ir_tensor.op->getName();
+
+  if (!op_name.empty()) {
+    tensor_name = "_" + op_name;
      replace_if(tensor_name.begin(),
                 tensor_name.end(),
                 [](char c) { return std::isalnum(c) == 0; }, '_');
    } else {
-    tensor_name = "tensor_" + to_string(op->getId());
+    tensor_name = "tensor_" + to_string(ir_tensor.op->getId());
    }
  
    return tensor_name;
@@ -985,28 +984,24 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string& name,
    return id;
  }
  
-shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(Operation& op, const Shape& ir_shape) {
-  if (op.getType() == Operation::Type::input)
-    _inputs.insert(&op);
-
-  if (op.getNextNodes().empty() && op.getType() != Operation::Type::constant)
-    _outputs.insert(&op);
-
-  return genTensor(tensorName(&op), ir_shape, !op.getName().empty());
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(IODescriptor ir_tensor) {
+  return genTensor(tensorName(ir_tensor), ir_tensor.getShape(), !ir_tensor.op->getName().empty());
  }
  
-void AclCppOpGenerator::genNamed() {
-  if (_inputs.size() == 1) {
+void AclCppOpGenerator::genNamed(Graph* graph) {
+  const auto& inputs = graph->collectInputs();
+  if (inputs.size() == 1) {
      auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getInput");
      auto b = f->getBlock();
-    auto id = AF::id(tensorName(*_inputs.begin()));
+    auto id = AF::id(tensorName(inputs[0]->getOutput(0)));
      b->ret(id);
    }
  
-  if (_outputs.size() == 1) {
+  const auto& outputs = graph->collectOutputs();
+  if (outputs.size() == 1) {
      auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getOutput");
      auto b = f->getBlock();
-    auto id = AF::id(tensorName(*_outputs.begin()));
+    auto id = AF::id(tensorName(outputs[0]->getOutput(0)));
      b->ret(id);
    }
  }
@@ -1137,19 +1132,19 @@ void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId>& inp
  }
  
  void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) {
-  auto& prev_nodes = op.getPrevNodes();
-  assert(prev_nodes.size() == 1);
-  auto in_op = prev_nodes[0].op;
+  assert(op.getNumInputs() == 1);
+  IODescriptor ir_input = op.getInput(0);
+  IODescriptor ir_output = op.getOutput(0);
  
    // Get the input node tensor id in the DOM.
-  shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+  shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
    const vector<size_t>& mir_axis_order = op.getAxisOrder();
  
    // Create the output tensor in the DOM.
-  if (op.getOutputShape(0).rank() != 4)
+  if (ir_output.getShape().rank() != 4)
      throw AclCppException("Unsupported number of dimensions in transpose operation");
    // TODO replace transpose shape
-  shared_ptr<ArtifactId> output = genTensor(op, op.getOutputShape(0));
+  shared_ptr<ArtifactId> output = genTensor(ir_output);
  
    // Actual generation of operation and related stuff
    genTranspose(input, output, mir_axis_order, false);
diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h

index 3e02610..6b37ff6 100644 (file)
--- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h
+++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h
@@ -135,7 +135,7 @@ private:
     *                   to the left of and including the in2 term, or the operation out if in2 was
     *                   the last term in the sequence.
     */
-  std::shared_ptr<ArtifactId> genAddition(const std::string& prefix, int index,
+  std::shared_ptr<ArtifactId> genAddition(const std::string& prefix, size_t index,
                                            const mir::Shape& ir_shape,
                                            std::shared_ptr<ArtifactId> in1,
                                            std::shared_ptr<ArtifactId> in2,
@@ -170,7 +170,7 @@ private:
    /**
     * @brief Generates a unique name for the tensor.
     */
-  std::string tensorName(const mir::Operation* op) const;
+  std::string tensorName(mir::IODescriptor ir_tensor) const;
  
    /**
     * @brief Generates variables tensor shape in DOM.
@@ -198,11 +198,10 @@ private:
  
    /**
     * @brief Generates a DOM tensor.
-   * @param op - an IR operation for which this tensor is generated.
-   * @param ir_shape - a shape in IR.
+   * @param ir_tensor - the ModelIR tensor.
     * @return - a DOM identifier for the created tensor.
     */
-  std::shared_ptr<ArtifactId> genTensor(mir::Operation& op, const mir::Shape& ir_shape);
+  std::shared_ptr<ArtifactId> genTensor(mir::IODescriptor ir_tensor);
  
    /**
     * @brief generate transposing operation, @p mir_perm contains dimensions in MIR order (batch has index 0)
@@ -217,8 +216,9 @@ private:
  
    /**
     * @brief Generates accessors for the input/output tensors.
+   * @param graph - the ModelIR graph.
     */
-  void genNamed();
+  void genNamed(mir::Graph* graph);
  
    /**
     * @brief Schedule a tensor serialization.
@@ -294,16 +294,6 @@ private:
    void genLayerExecution(std::shared_ptr<ArtifactId> layer_id);
  
    /**
-   * @brief Input nodes.
-   */
-  std::set<mir::Operation*> _inputs;
-
-  /**
-   * @brief Output nodes.
-   */
-  std::set<mir::Operation*> _outputs;
-
-  /**
     * @brief All named tensors names.
     */
    std::set<std::string> _tensorNames;
diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp

index 1a7a3ca..c403abd 100644 (file)
--- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp
+++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp
@@ -255,12 +255,12 @@ void ModelAnalyzer::visit(ops::BiasAddOp& op) {
  }
  
  void ModelAnalyzer::visit(ops::InputOp& op) {
-  assert(op.getPrevNodes().empty());
+  assert(op.getNumInputs() == 0);
    appendOperationToInference(&op, "in");
  }
  
  void ModelAnalyzer::visit(ops::ConstantOp& op) {
-  assert(op.getPrevNodes().empty());
+  assert(op.getNumInputs() == 0);
  
    // FIXME This is to work around deserializeTensors not being able to deserialize tensors of type
    // other than float32.
diff --git a/contrib/nnc/unittests/core/operation.cpp b/contrib/nnc/unittests/core/operation.cpp

index c2efa74..ad52172 100644 (file)
--- a/contrib/nnc/unittests/core/operation.cpp
+++ b/contrib/nnc/unittests/core/operation.cpp
@@ -31,7 +31,7 @@ TEST(Operation, ConnectionTest) {
    auto op2 = new ops::ReshapeOp(op1->getOutput(0), Shape{});
    op2->setId(1);
  
-  ASSERT_EQ(op1->getId(), op2->getPrevNodes()[0].op->getId());
+  ASSERT_EQ(op1->getId(), op2->getInput(0).op->getId());
  
    delete op1;
    delete op2;
author	Сергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 <s.barannikov@samsung.com>
	Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)
committer	Efimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
	Mon, 28 Jan 2019 10:24:38 +0000 (13:24 +0300)
contrib/nnc/include/core/modelIR/Operation.h		patch \| blob \| history
contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp		patch \| blob \| history
contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h		patch \| blob \| history
contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp		patch \| blob \| history
contrib/nnc/unittests/core/operation.cpp		patch \| blob \| history