[nnc] Don't serialize excess constant tensors in ACL backend (#2906)
authorIvan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>
Fri, 25 Jan 2019 10:11:26 +0000 (13:11 +0300)
committerEfimov Alexander/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
Fri, 25 Jan 2019 10:11:26 +0000 (13:11 +0300)
Reduce memory consumption by elimination of excess constant tensor serialization in ACL backend

Signed-off-by: Ivan Vagin <ivan.vagin@samsung.com>
contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp
contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h

index 97e5070..b493697 100644 (file)
@@ -362,6 +362,7 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
   // Instantiate the CLFullyConnectedLayer object.
   auto layer = genLayer("arm_compute::CLFullyConnectedLayer", operation_name,
                         {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)});
+
   addToPersistentTensors(weights);
   // Serialize the weights tensor and generate the function to deserialize it in the artifact.
   serializeTensor(weights, ir_weights);
@@ -440,6 +441,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
   auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name,
                         {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output),
                          AF::lit("arm_compute::ConvertPolicy::WRAP")});
+
   addToPersistentTensors(biases);
   // Save the IR biases tensor to later read this in the artifact.
   serializeTensor(biases, ir_biases);
@@ -465,15 +467,43 @@ void AclCppOpGenerator::visit(ops::InputOp& op) {
   addToPersistentTensors(tensor);
 }
 
-void AclCppOpGenerator::visit(ops::ConstantOp& op) {
-  Shape out_shape = op.getOutputShape(0);
-  TensorVariant data = op.getValue();
+// FIXME: temporary decision
+static bool shouldSerializeConstant(ops::ConstantOp& op) {
+  // Operations from 'self_serializing_ops_to_inputs' serializing tensors with appropriate index themselves,
+  // so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp
+  static std::map<Operation::Type, int> self_serializing_ops_to_inputs{
+          {Operation::Type::scale, 1},
+          {Operation::Type::conv2D, 1},
+          {Operation::Type::fullyConnected, 1},
+          {Operation::Type::biasAdd, 1}};
+
+  for (auto& next_node : op.getNextNodes()) {
+    auto self_serializing_op_it = self_serializing_ops_to_inputs.find(next_node->getType());
+    // Serialize if next_node type not from 'self_serializing_ops_to_inputs'
+    if (self_serializing_op_it == self_serializing_ops_to_inputs.end())
+      return true;
+
+    // If next_node has current ConstantOp as it's previous node, but not with appropriate index -
+    // serialize current ConstantOp
+    int serializing_input_index = self_serializing_op_it->second;
+    auto next_node_prev_nodes = static_cast<int>(next_node->getPrevNodes().size());
+    for (int i = 0; i < next_node_prev_nodes; ++i) {
+      if (next_node->getPrevNodes()[i].op == &op && i != serializing_input_index)
+        return true;
+    }
+  }
 
-  shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+  return false;
+}
 
-  addToPersistentTensors(out);
-  // Serialize the weights tensor and generate the function to deserialize it in the artifact.
-  serializeTensor(out, data);
+void AclCppOpGenerator::visit(ops::ConstantOp& op) {
+  if (shouldSerializeConstant(op)) {
+    Shape out_shape = op.getOutputShape(0);
+    TensorVariant data = op.getValue();
+    shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+    addToPersistentTensors(out);
+    serializeTensor(out, data);
+  }
 }
 
 void AclCppOpGenerator::visit(ops::ReluOp& op) {
@@ -585,6 +615,7 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) {
   auto layer2 = genLayer("arm_compute::CLArithmeticDivision",
                          operation_name + "_arithmetic_div_layer_2",
                          {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)});
+
   addToPersistentTensors(scales);
   // Save the IR scales tensor to later read this in the artifact.
   serializeTensor(scales, ir_scales);
@@ -737,8 +768,8 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons
 
   // Create the convolution (/depthwise convolution/deconvolution) layer class instance.
   shared_ptr<ArtifactId> layer = genLayer(acl_func_name, operation_name, config_params);
-  addToPersistentTensors(weights);
 
+  addToPersistentTensors(weights);
   // Save the IR weights tensor to later read this in the artifact.
   serializeTensor(weights, ir_weights);
   genTensorAllocation(_infBlock, transposed_output);
@@ -978,8 +1009,8 @@ void AclCppOpGenerator::serializeIRTensor(const TensorVariant& tensor) {
 }
 
 void AclCppOpGenerator::genDeserializations() {
-  for (auto s : _serializations)
-    _constrBlock->call("deserializeTensor", {_parIn, s});
+  for (auto& tensor : _serializations)
+    _constrBlock->call("deserializeTensor", {_parIn, tensor});
 }
 
 void AclCppOpGenerator::genFillings() {
@@ -1016,7 +1047,7 @@ AclCppOpGenerator::genTensorDeallocation(ArtifactBlock* block,
 }
 
 void AclCppOpGenerator::genPersistentTensorAllocations() {
-  for (shared_ptr<ArtifactId> tensor: _persistent_tensors)
+  for (auto& tensor : _persistent_tensors)
     genTensorAllocation(_constrBlock, tensor);
 }
 
index 787c55b..3e02610 100644 (file)
@@ -189,7 +189,7 @@ private:
    * @brief Generates a DOM tensor.
    * @param name - its name.
    * @param ir_shape - IR shape used to construct the tensor.
-   * @param gen_accessor - whether to generate an accessor function for thsi tensor
+   * @param gen_accessor - whether to generate an accessor function for this tensor
    *        in the artifact class.
    * @return - a DOM identifier for the created tensor.
    */