From: Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 Date: Fri, 25 Jan 2019 10:11:26 +0000 (+0300) Subject: [nnc] Don't serialize excess constant tensors in ACL backend (#2906) X-Git-Tag: nncc_backup~924 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=23e28c80f7c082183923d157db335775477f4e76;p=platform%2Fcore%2Fml%2Fnnfw.git [nnc] Don't serialize excess constant tensors in ACL backend (#2906) Reduce memory consumption by elimination of excess constant tensor serialization in ACL backend Signed-off-by: Ivan Vagin --- diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 97e5070..b493697 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -362,6 +362,7 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { // Instantiate the CLFullyConnectedLayer object. auto layer = genLayer("arm_compute::CLFullyConnectedLayer", operation_name, {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)}); + addToPersistentTensors(weights); // Serialize the weights tensor and generate the function to deserialize it in the artifact. serializeTensor(weights, ir_weights); @@ -440,6 +441,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name, {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output), AF::lit("arm_compute::ConvertPolicy::WRAP")}); + addToPersistentTensors(biases); // Save the IR biases tensor to later read this in the artifact. serializeTensor(biases, ir_biases); @@ -465,15 +467,43 @@ void AclCppOpGenerator::visit(ops::InputOp& op) { addToPersistentTensors(tensor); } -void AclCppOpGenerator::visit(ops::ConstantOp& op) { - Shape out_shape = op.getOutputShape(0); - TensorVariant data = op.getValue(); +// FIXME: temporary decision +static bool shouldSerializeConstant(ops::ConstantOp& op) { + // Operations from 'self_serializing_ops_to_inputs' serializing tensors with appropriate index themselves, + // so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp + static std::map self_serializing_ops_to_inputs{ + {Operation::Type::scale, 1}, + {Operation::Type::conv2D, 1}, + {Operation::Type::fullyConnected, 1}, + {Operation::Type::biasAdd, 1}}; + + for (auto& next_node : op.getNextNodes()) { + auto self_serializing_op_it = self_serializing_ops_to_inputs.find(next_node->getType()); + // Serialize if next_node type not from 'self_serializing_ops_to_inputs' + if (self_serializing_op_it == self_serializing_ops_to_inputs.end()) + return true; + + // If next_node has current ConstantOp as it's previous node, but not with appropriate index - + // serialize current ConstantOp + int serializing_input_index = self_serializing_op_it->second; + auto next_node_prev_nodes = static_cast(next_node->getPrevNodes().size()); + for (int i = 0; i < next_node_prev_nodes; ++i) { + if (next_node->getPrevNodes()[i].op == &op && i != serializing_input_index) + return true; + } + } - shared_ptr out = genTensor(op, out_shape); + return false; +} - addToPersistentTensors(out); - // Serialize the weights tensor and generate the function to deserialize it in the artifact. - serializeTensor(out, data); +void AclCppOpGenerator::visit(ops::ConstantOp& op) { + if (shouldSerializeConstant(op)) { + Shape out_shape = op.getOutputShape(0); + TensorVariant data = op.getValue(); + shared_ptr out = genTensor(op, out_shape); + addToPersistentTensors(out); + serializeTensor(out, data); + } } void AclCppOpGenerator::visit(ops::ReluOp& op) { @@ -585,6 +615,7 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) { auto layer2 = genLayer("arm_compute::CLArithmeticDivision", operation_name + "_arithmetic_div_layer_2", {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)}); + addToPersistentTensors(scales); // Save the IR scales tensor to later read this in the artifact. serializeTensor(scales, ir_scales); @@ -737,8 +768,8 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons // Create the convolution (/depthwise convolution/deconvolution) layer class instance. shared_ptr layer = genLayer(acl_func_name, operation_name, config_params); - addToPersistentTensors(weights); + addToPersistentTensors(weights); // Save the IR weights tensor to later read this in the artifact. serializeTensor(weights, ir_weights); genTensorAllocation(_infBlock, transposed_output); @@ -978,8 +1009,8 @@ void AclCppOpGenerator::serializeIRTensor(const TensorVariant& tensor) { } void AclCppOpGenerator::genDeserializations() { - for (auto s : _serializations) - _constrBlock->call("deserializeTensor", {_parIn, s}); + for (auto& tensor : _serializations) + _constrBlock->call("deserializeTensor", {_parIn, tensor}); } void AclCppOpGenerator::genFillings() { @@ -1016,7 +1047,7 @@ AclCppOpGenerator::genTensorDeallocation(ArtifactBlock* block, } void AclCppOpGenerator::genPersistentTensorAllocations() { - for (shared_ptr tensor: _persistent_tensors) + for (auto& tensor : _persistent_tensors) genTensorAllocation(_constrBlock, tensor); } diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h index 787c55b..3e02610 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h @@ -189,7 +189,7 @@ private: * @brief Generates a DOM tensor. * @param name - its name. * @param ir_shape - IR shape used to construct the tensor. - * @param gen_accessor - whether to generate an accessor function for thsi tensor + * @param gen_accessor - whether to generate an accessor function for this tensor * in the artifact class. * @return - a DOM identifier for the created tensor. */