From b16504defe5d6e5570d5a147907ea35b80451c51 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 16 Aug 2019 13:12:49 +0900 Subject: [PATCH] [neurun] Refactor scheduler a little bit (#6636) * Simplified makeRank method * Renamed some variables * Fixed some typos Signed-off-by: Ivan Vagin --- runtimes/neurun/core/src/compiler/Scheduler.cc | 96 ++++++++++++-------------- runtimes/neurun/core/src/compiler/Scheduler.h | 22 +++--- 2 files changed, 53 insertions(+), 65 deletions(-) diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc index 6ee692a..bbdab80 100644 --- a/runtimes/neurun/core/src/compiler/Scheduler.cc +++ b/runtimes/neurun/core/src/compiler/Scheduler.cc @@ -23,6 +23,7 @@ #include "backend/IConfig.h" #include "backend/IShapeFixer.h" #include "util/logging.h" +#include "util/Utils.h" #include "exec/FunctionSequence.h" #include #include @@ -72,9 +73,9 @@ static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend * { const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; - /* Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of + /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of * Add with and without broadcasting*/ - /*Quant is also supported: throws an exception in run(): in case of scheduling without warm-up + /*Quant is also unsupported: throws an exception in run(): in case of scheduling without warm-up it isn't catched by tryBackend()*/ if (quant || !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape())) @@ -132,7 +133,7 @@ void Scheduler::scheduleShufflingBackends() << "Started task scheduling: uses all backends to get more metrics for data transfer" << std::endl; size_t backend_ind = 0; - for (const auto &rank : _ranks) + for (const auto &rank : _rank_to_op) { VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl; const auto &node = _graph->operations().at(rank.second); @@ -190,7 +191,7 @@ std::unique_ptr Scheduler::schedule(const graph::Grap if (is_profiling) { bool profile_data_transfer = true; - const auto &node = _graph->operations().at(_ranks.begin()->second); + const auto &node = _graph->operations().at(_rank_to_op.begin()->second); const bool quant = isQuant(*_graph, node); const auto size = getOperationsFlattenedIOSize(*_graph, node); for (const auto *backend : all_backends) @@ -211,7 +212,7 @@ std::unique_ptr Scheduler::schedule(const graph::Grap } // for each task select the backend with the smallest earliest finishing time(eft) - for (const auto &rank : _ranks) + for (const auto &rank : _rank_to_op) { scheduleNode(rank.second); } @@ -259,40 +260,31 @@ void Scheduler::makeRank() { VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl; - model::OperationIndexMap calculated_rank; + _graph->operations().iterate( + [&](const model::OperationIndex &index, const model::Operation &) { DFSMaxRank(index); }); - const int NOT_SET = -1; + // Check that ranks are calculated for all operations(nodes) _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) { - calculated_rank[index] = NOT_SET; + UNUSED_RELEASE(index); + assert(_op_to_rank->find(index) != _op_to_rank->end()); }); - - for (const auto &it : calculated_rank) - { - DFSMaxRank(it.first, calculated_rank); - } - - // Rank for all of the operations(nodes) must be calculated. - assert(std::all_of(calculated_rank.begin(), calculated_rank.end(), - [](const std::pair &v) { - return v.second != NOT_SET; - })); VERBOSE(Scheduler::makeRank) << "task prioritizing finished" << std::endl; } -int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index, - std::unordered_map &calculated_rank) +int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index) { - if (calculated_rank.at(index) >= 0) - return calculated_rank.at(index); + auto op_to_rank_it = _op_to_rank->find(index); + if (op_to_rank_it != _op_to_rank->end()) + return op_to_rank_it->second; const auto all_backends = backend::BackendManager::instance().getAll(); const auto &node = _graph->operations().at(index); - int64_t rank = 0, std = 0; + int64_t rank = 0; const bool quant = isQuant(*_graph, node); const auto size = getOperationsFlattenedIOSize(*_graph, node); - auto all_backends_size = static_cast(all_backends.size()); + auto supported_backends_quantity = static_cast(all_backends.size()); - const auto max_child_rank = DFSChildrensMaxRank(index, calculated_rank); + const auto max_child_rank = DFSChildrenMaxRank(index); // get average exec time of this op for (const auto &backend : all_backends) @@ -309,13 +301,14 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index, else { // this operation isn't supported in this backend - --all_backends_size; + --supported_backends_quantity; } } - assert((all_backends_size > 0) && "Encountered unsupported op"); - rank /= all_backends_size; + assert((supported_backends_quantity > 0) && "Encountered unsupported op"); + rank /= supported_backends_quantity; // get standard deviation + int64_t std = 0; for (const auto backend : all_backends) { const auto exec_time = getTime(backend, node.getName(), quant, size); @@ -324,27 +317,24 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index, std += (exec_time - rank) * (exec_time - rank); } } - std /= all_backends_size; + std /= supported_backends_quantity; if (std > 0) { std = static_cast(std::sqrt(std)); rank *= std; } rank += max_child_rank; - calculated_rank[index] = rank; assert(rank >= 0); - _ranks.emplace(rank, index); - _indexed_ranks->emplace(index, rank); + _rank_to_op.emplace(rank, index); + _op_to_rank->emplace(index, rank); VERBOSE(Scheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.getName() << " is " << rank << std::endl; return rank; } -int64_t -Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index, - std::unordered_map &calculated_rank) +int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index) { const auto all_backends = backend::BackendManager::instance().getAll(); const auto &node = _graph->operations().at(index); @@ -353,7 +343,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index, { const auto &operand = _graph->operands().at(output); const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM; - // avarage data transfer cost of this operand's data + // average data transfer cost of this operand's data int64_t avg_transfer_cost = 1; for (const auto *backend : all_backends) { @@ -377,7 +367,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index, avg_transfer_cost /= all_backends.size(); for (const auto &use : operand.getUses().list()) { - const auto cur_child_rank = DFSMaxRank(use, calculated_rank); + const auto cur_child_rank = DFSMaxRank(use); max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost); } } @@ -433,7 +423,7 @@ void Scheduler::scheduleNode(const model::OperationIndex &index) {prev_op_ft + it.second, prev_op_ft}); } - _parents_eft[index] = eft; + _ops_eft[index] = eft; _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time); _backend_resolver->setBackend(index, chosen_backend); @@ -448,7 +438,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio { const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR); const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR); - // Permutation will cause creating a seperate subgraph that contains just this permutation node. + // Permutation will cause creating a separate subgraph that contains just this permutation node. // This isn't needed for Linear executor since it doesn't use subgraphs // Number 1 ms is picked experimentally int64_t permute_fine = 1000; @@ -476,12 +466,12 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio exec_time *= CPU_DELAY; } - // get max eft of direct (one lavel above) predecessors + // get max eft of direct (one level above) predecessors auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time); int64_t total_transfer_cost = 0; std::vector::iterator> inserted_permutations; - // Find free time for data transfering and insert it into backend taskset. This is needed: + // Find free time for data transferring and insert it into backend taskset. This is needed: // 1. Time for multiple permutations for this node's input is found correctly // 2. If backend==cpu, then free time for this node must come after permutations auto cpu_backend = backend::BackendManager::instance().get("cpu"); @@ -537,23 +527,23 @@ int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Oper std::multimap &transfer_st_exec_time) { int64_t max_pred_eft = 0; - for (const auto &input : node.getInputs()) + for (const auto &input_operand_idx : node.getInputs()) { - const auto &operand = _graph->operands().at(input); - const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM; - // operations, whose output is current node's this input operand - for (const auto &defs : operand.getDef().list()) + const auto &input_operand = _graph->operands().at(input_operand_idx); + const bool quant = input_operand.typeInfo().type() == model::DataType::QUANT8_ASYMM; + + for (const auto &input_node_idx : input_operand.getDef().list()) { - // Data transfer cost from this parent's backend to current node's backend: - auto parent_backend = _backend_resolver->getBackend(defs); + // Data transfer cost from parent's node backend to current node's backend: + auto parent_backend = _backend_resolver->getBackend(input_node_idx); - max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs)); + max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx)); if (parent_backend != backend) { - // Multiply operand size by 2 because size must discribe input+output size + // Multiply operand size by 2 because size must describe input+output size int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant, - operand.info().total_size() * 2); - transfer_st_exec_time.emplace(_parents_eft.at(defs), transfer_cost); + input_operand.info().total_size() * 2); + transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost); } } } diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h index 2bd5a19..0dd8d80 100644 --- a/runtimes/neurun/core/src/compiler/Scheduler.h +++ b/runtimes/neurun/core/src/compiler/Scheduler.h @@ -48,8 +48,8 @@ public: * @param[in] backend_resolver backend resolver */ Scheduler(const neurun::model::Operands &operands) - : _run_cache{}, _backends_avail_time{}, _parents_eft{}, - _indexed_ranks{std::make_shared>()} + : _run_cache{}, _backends_avail_time{}, _ops_eft{}, + _op_to_rank{std::make_shared>()} { _backend_resolver = nnfw::cpp14::make_unique(operands); _exec_time = @@ -65,7 +65,7 @@ public: * https://www.hindawi.com/journals/sp/2016/3676149/ */ std::unique_ptr schedule(const graph::Graph &graph) final; - std::shared_ptr> getIndexedRanks() { return _indexed_ranks; } + std::shared_ptr> getIndexedRanks() { return _op_to_rank; } private: void scheduleNode(const model::OperationIndex &); @@ -89,7 +89,7 @@ private: * * @param[in] backend: backend, for which to return the time * @param[in] node: node to get eft of parents - * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation + * @param[out] transfer_st_exec_time: est and exec time of data transfer operation * * @return earliest finishing time of parent nodes */ @@ -98,11 +98,9 @@ private: void makeRank(); - int64_t DFSMaxRank(const model::OperationIndex &index, - std::unordered_map &calculated_rank); + int64_t DFSMaxRank(const model::OperationIndex &index); - int64_t DFSChildrensMaxRank(const model::OperationIndex &index, - std::unordered_map &calculated_rank); + int64_t DFSChildrenMaxRank(const model::OperationIndex &index); /** * @brief Returns the time, when backend is available for at least given amount of time. * @@ -111,7 +109,7 @@ private: * * @param[in] backend backend, for which to return the time * @param[in] starting_time time, starting which to look for gap - * @param[in] time_amount amound of the time, for which to look gap + * @param[in] time_amount amount of the time, for which to look gap * * @return time, when backend has at least time_amount free time */ @@ -129,9 +127,9 @@ private: std::unordered_map> _run_cache; // Finishing and starting time of each backend std::unordered_map> _backends_avail_time; - model::OperationIndexMap _parents_eft; - std::multimap> _ranks; - std::shared_ptr> _indexed_ranks; + model::OperationIndexMap _ops_eft; + std::multimap> _rank_to_op; + std::shared_ptr> _op_to_rank; std::unique_ptr _backend_resolver; std::unique_ptr _exec_time; const graph::Graph *_graph{nullptr}; -- 2.7.4