From 263373ee5f03d647a19ee988628040a409161089 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=9A=A9=EC=84=AD/On-Device=20Lab=28SR=29/Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 26 Jul 2019 13:18:21 +0900 Subject: [PATCH] [neurun] Enable handling subtensors for notify{First|Last}Uses (#5915) Enable handling subtensors for notify{First|Last}Uses by preVisit and postVisit. Signed-off-by: Yongseop Kim --- runtimes/neurun/backend/acl_cl/KernelGenerator.cc | 4 +- .../neurun/backend/acl_common/TemplTensorBuilder.h | 221 ++++++++++++++++++++- .../neurun/backend/acl_neon/KernelGenerator.cc | 4 +- runtimes/neurun/backend/cpu/KernelGenerator.cc | 4 + 4 files changed, 220 insertions(+), 13 deletions(-) diff --git a/runtimes/neurun/backend/acl_cl/KernelGenerator.cc b/runtimes/neurun/backend/acl_cl/KernelGenerator.cc index cc1201d..5ae4b7a 100644 --- a/runtimes/neurun/backend/acl_cl/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_cl/KernelGenerator.cc @@ -156,9 +156,9 @@ void KernelGenerator::visit(const model::Subgraph &subgraph) for (const auto &e : subgraph.operations()) { const auto &node = *(e.node); - //_tensor_builder->preVisit(node); + _tensor_builder->preVisit(node); node.accept(*this); - //_tensor_builder->postVisit(node); + _tensor_builder->postVisit(node); } } diff --git a/runtimes/neurun/backend/acl_common/TemplTensorBuilder.h b/runtimes/neurun/backend/acl_common/TemplTensorBuilder.h index e1faee6..93b3f11 100644 --- a/runtimes/neurun/backend/acl_common/TemplTensorBuilder.h +++ b/runtimes/neurun/backend/acl_common/TemplTensorBuilder.h @@ -18,6 +18,7 @@ #define __NEURUN_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__ #include +#include #include #include @@ -32,6 +33,12 @@ namespace backend namespace acl_common { +enum class UsesType +{ + FIRST, + LAST +}; + template class TemplTensorBuilder : public ITensorBuilder { @@ -67,9 +74,8 @@ public: std::shared_ptr wrapTensor(const model::OperandIndex &ind) override; void iterate(const IterateFunction &fn) override; - // TODO Fill these and remember that these will be for the linear executor for a while - void preVisit(const model::Operation &) override {} - void postVisit(const model::Operation &) override {} + void preVisit(const model::Operation &node) override; + void postVisit(const model::Operation &node) override; // TODO Consider removing after #5642 fixes void registerModelObject(const model::OperandIndex &ind, const model::Operand &obj) override; @@ -97,6 +103,7 @@ public: private: void buildTensors(void); void buildSubtensors(void); + void validate(void); private: model::OperandIndexMap _tensor_info_map; @@ -104,6 +111,18 @@ private: model::OperandIndexMap _apply_dim_correction_map; model::Layout _layout; std::unique_ptr _mem_mgr; + + // TODO Consider dividing TensorBuilder into Linear and others + const std::string _executor_str; + + // for linear executor + std::queue> _uses_queue; + uint32_t _first_uses_num; + model::OperandIndexMap _first_uses_visit; + + // for subtensors + model::OperandIndexMap _parent_def; + model::OperandIndexMap _parent_uses; }; } // namespace acl_common @@ -127,7 +146,8 @@ namespace acl_common template TemplTensorBuilder::TemplTensorBuilder( T_AclMemoryManager *mem_mgr) - : _mem_mgr{mem_mgr} + : _mem_mgr{mem_mgr}, _executor_str(util::getConfigString(util::config::EXECUTOR)), + _first_uses_num(0) { assert(_mem_mgr); } @@ -141,6 +161,9 @@ void TemplTensorBuilder::registerTen _tensor_info_map.insert({ind, info}); _apply_dim_correction_map.insert({ind, true}); _layout = layout; + + assert(_first_uses_visit.find(ind) == _first_uses_visit.end()); + _first_uses_visit[ind] = false; } template @@ -151,20 +174,34 @@ void TemplTensorBuilder::registerSub _subtensor_info_map.insert({ind, info}); _apply_dim_correction_map.insert({ind, true}); + + assert(_first_uses_visit.find(ind) == _first_uses_visit.end()); + _first_uses_visit[ind] = false; + + const auto &parent_ind = info.parent(); + + // parent_def + _parent_def[parent_ind] = 1; + + // parent_use + if (_parent_uses.find(parent_ind) == _parent_uses.end()) + _parent_uses[parent_ind] = 1; // 1 means including parent it-self + _parent_uses[parent_ind]++; } template void TemplTensorBuilder::notifyFirstUse( - const model::OperandIndex &) + const model::OperandIndex &ind) { - // TODO FILL THIS + _first_uses_num++; + _uses_queue.emplace(UsesType::FIRST, ind); } template void TemplTensorBuilder::notifyLastUse( - const model::OperandIndex &) + const model::OperandIndex &ind) { - // TODO FILL THIS + _uses_queue.emplace(UsesType::LAST, ind); } template @@ -177,8 +214,9 @@ void TemplTensorBuilder::prepare(voi template void TemplTensorBuilder::allocate(void) { - assert(_tensor_info_map.size() == _mem_mgr->tensors().size()); + validate(); + assert(_tensor_info_map.size() == _mem_mgr->tensors().size()); _mem_mgr->allocate(); } @@ -363,6 +401,166 @@ void TemplTensorBuilder::buildSubten } template +void TemplTensorBuilder::preVisit( + const model::Operation &node) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + auto start_lifetime = [this](const model::OperandIndex &ind) { + // a subtensor? + if (_subtensor_info_map.find(ind) != _subtensor_info_map.end()) + { + const auto &parent_ind = _subtensor_info_map.at(ind).parent(); + if (_parent_def[parent_ind]) + { + _parent_def[parent_ind] = 0; + _mem_mgr->startLifetime(parent_ind); + } + } + // a parent? + else if (_parent_def.find(ind) != _parent_def.end()) + { + if (_parent_def[ind]) + { + _parent_def[ind] = 0; + _mem_mgr->startLifetime(ind); + } + } + else + { + _mem_mgr->startLifetime(ind); + } + }; + + model::OperandIndexMap outputs_map; + for (const auto &ind : node.getOutputs()) + { + assert(_first_uses_visit.find(ind) != _first_uses_visit.end()); + outputs_map[ind] = _first_uses_visit[ind]; + } + + // outputs_map's all elements are true? + auto outputs_map_all_check = [&outputs_map]() { + return std::all_of(outputs_map.begin(), outputs_map.end(), + [](std::pair it) { return it.second; }); + }; + + std::pair peak; + while (!outputs_map_all_check() && (peak = _uses_queue.front()).first == UsesType::FIRST) + { + _uses_queue.pop(); + _first_uses_num--; + + const auto &popped_idx = peak.second; + start_lifetime(popped_idx); + + outputs_map[popped_idx] = true; + _first_uses_visit[popped_idx] = true; + } +} + +template +void TemplTensorBuilder::postVisit( + const model::Operation &node) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + auto finish_lifetime = [this](const model::OperandIndex &ind) { + // a subtensor? + if (_subtensor_info_map.find(ind) != _subtensor_info_map.end()) + { + const auto &parent_ind = _subtensor_info_map.at(ind).parent(); + if (--(_parent_uses[parent_ind]) == 0) + { + _mem_mgr->finishLifetime(parent_ind); + } + } + // a parent? + else if (_parent_uses.find(ind) != _parent_uses.end()) + { + if (--(_parent_uses[ind]) == 0) + { + _mem_mgr->finishLifetime(ind); + } + } + else + { + _mem_mgr->finishLifetime(ind); + } + }; + + const auto &inputs = node.getInputs(); + std::pair peak; + while ((peak = _uses_queue.front()).first == UsesType::LAST) + { + const auto &popped_idx = peak.second; + if (inputs.contains(popped_idx)) + { + _uses_queue.pop(); + finish_lifetime(popped_idx); + } + else + { + break; + } + } + + if (_first_uses_num == 0) + { + while (!_uses_queue.empty()) + { + peak = _uses_queue.front(); + assert(peak.first == UsesType::LAST); + + _uses_queue.pop(); + + finish_lifetime(peak.second); + } + } +} + +template +void TemplTensorBuilder::validate(void) +{ + // For now others executor doesn't need this step + if (_executor_str != "Linear") + { + return; + } + + for (auto it : _tensor_info_map) + { + assert(_first_uses_visit.find(it.first) != _first_uses_visit.end()); + assert(_first_uses_visit[it.first]); + } + + for (auto it : _subtensor_info_map) + { + assert(_first_uses_visit.find(it.first) != _first_uses_visit.end()); + assert(_first_uses_visit[it.first]); + } + + assert(_uses_queue.size() == 0); + assert(_first_uses_num == 0); + + assert(std::all_of( + _parent_def.begin(), _parent_def.end(), + [](std::pair it) { return it.second == 0; })); + assert(std::all_of( + _parent_uses.begin(), _parent_uses.end(), + [](std::pair it) { return it.second == 0; })); +} + +// TODO Consider removing after #5642 fixes +template void TemplTensorBuilder::registerModelObject( const model::OperandIndex &ind, const model::Operand &obj) { @@ -371,6 +569,7 @@ void TemplTensorBuilder::registerMod (void)obj; } +// TODO Consider removing after #5642 fixes template void TemplTensorBuilder::markConstant( const model::OperandIndex &ind) @@ -379,6 +578,7 @@ void TemplTensorBuilder::markConstan (void)ind; } +// TODO Consider removing after #5642 fixes template bool TemplTensorBuilder::isConstant( const model::OperandIndex &ind) @@ -388,12 +588,14 @@ bool TemplTensorBuilder::isConstant( return false; } +// TODO Consider removing after #5642 fixes template void TemplTensorBuilder::deallocateConstants() { // TODO Fill this } +// TODO Consider removing after #5642 fixes template void TemplTensorBuilder::notifyFirstUseIf( const model::OperandIndex &ind) @@ -402,6 +604,7 @@ void TemplTensorBuilder::notifyFirst (void)ind; } +// TODO Consider removing after #5642 fixes template void TemplTensorBuilder::notifyLastUseIf( const model::OperandIndex &ind) diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc index 338bd32..bed68b6 100644 --- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc @@ -165,9 +165,9 @@ void KernelGenerator::visit(const model::Subgraph &subgraph) for (const auto &e : subgraph.operations()) { const auto &node = *(e.node); - //_tensor_builder->preVisit(node); + _tensor_builder->preVisit(node); node.accept(*this); - //_tensor_builder->postVisit(node); + _tensor_builder->postVisit(node); } } diff --git a/runtimes/neurun/backend/cpu/KernelGenerator.cc b/runtimes/neurun/backend/cpu/KernelGenerator.cc index 5def6c9..34868ba 100644 --- a/runtimes/neurun/backend/cpu/KernelGenerator.cc +++ b/runtimes/neurun/backend/cpu/KernelGenerator.cc @@ -324,6 +324,8 @@ void KernelGenerator::visit(const model::operation::PermuteNode &node) const auto output_backend = node.param().output_backend; const auto data_type = node.getDataType(); + output_backend->tensor_builder()->preVisit(node); + auto output_object = output_backend->tensor_builder()->wrapTensor(output_index); auto input_object = input_backend->tensor_builder()->wrapTensor(input_index); @@ -358,6 +360,8 @@ void KernelGenerator::visit(const model::operation::PermuteNode &node) fn->configure(input_object, output_object, out_shape, permuteType, data_type); + input_backend->tensor_builder()->postVisit(node); + _execution_builder->append(std::move(fn)); } -- 2.7.4