From d6d7e350289a7e89e27613b56047372f09ae894b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 30 Aug 2019 10:43:27 +0900 Subject: [PATCH] [neurun] Prettified HE scheduler a little bit (#6982) * [neurun] Prettified HE scheduler a little bit * Removed backend transfer time from _run_cache * _run_cache replaced with _preferred_operations * Profiling information existence checks added Signed-off-by: Ivan Vagin * Review fixes Signed-off-by: Ivan Vagin --- runtimes/neurun/core/src/compiler/Scheduler.cc | 52 ++++++++++++++++---------- runtimes/neurun/core/src/compiler/Scheduler.h | 15 ++++++-- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc index 0032a69..6a7ae58 100644 --- a/runtimes/neurun/core/src/compiler/Scheduler.cc +++ b/runtimes/neurun/core/src/compiler/Scheduler.cc @@ -20,7 +20,6 @@ #include "util/ConfigSource.h" #include "compiler/IExecutionBuilder.h" #include "compiler/BackendResolver.h" -#include "backend/IConfig.h" #include "backend/IShapeFixer.h" #include "util/logging.h" #include "util/Utils.h" @@ -227,40 +226,54 @@ std::unique_ptr Scheduler::schedule(const graph::Grap return std::move(_backend_resolver); } -int64_t Scheduler::getTime(const backend::Backend *backend, const std::string &operation, - bool quant, uint32_t size) +int64_t Scheduler::getOpTime(const backend::Backend *backend, const std::string &operation, + bool quant, uint32_t size) { const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size); if (time != _exec_time->NOT_FOUND) - { return time; - }; - return _run_cache.at(backend).at(operation); + + return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax(); +} + +int64_t Scheduler::getPermuteTime(const backend::Backend *src_backend, + const backend::Backend *dst_backend, bool quant, uint32_t size) +{ + const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size); + if (time != _exec_time->NOT_FOUND) + return time; + + // Makes the scheduler prefer keeping computations on one backend + return size / 200; } int64_t Scheduler::tryBackend(const model::Operation &node, const backend::Backend *backend) { - auto iter = _run_cache.find(backend); - if (iter != _run_cache.end()) + auto iter = _is_supported.find(backend); + if (iter != _is_supported.end()) { auto it2 = iter->second.find(node.getName()); if (it2 != iter->second.end()) { - return _run_cache[backend][node.getName()]; + return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax(); } } try { _backend_resolver->getBackendContext(backend)->shape_fixer->fix(node); - // always prefer the one, that is supported - _run_cache[backend][node.getName()] = 1; + + if (!util::getConfigBool(util::config::PROFILING_MODE)) + throw std::runtime_error("You are trying to run heterogeneous scheduler with disabled " + "profiling mode, while there is no profiling information about some " + "nodes. Run scheduler with enabled profiling mode first."); + + _is_supported[backend][node.getName()] = true; } catch (std::runtime_error &e) { - // Put to max so that during scheduling supported backend will be selected - _run_cache[backend][node.getName()] = _exec_time->getMax(); + _is_supported[backend][node.getName()] = false; } - return _run_cache[backend][node.getName()]; + return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax(); } void Scheduler::makeRank() @@ -317,7 +330,7 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index) int64_t std = 0; for (const auto backend : _all_backends) { - const auto exec_time = getTime(backend, node.getName(), quant, size); + const auto exec_time = getOpTime(backend, node.getName(), quant, size); if (exec_time < _exec_time->getMax()) { std += (exec_time - rank) * (exec_time - rank); @@ -362,9 +375,8 @@ int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index) _exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size()); if (transfer_cost == _exec_time->NOT_FOUND) { - // makes the scheduler prefer keeping computations on one backend + // Makes the scheduler prefer keeping computations on one backend transfer_cost = operand.info().total_size() / 100; - _run_cache[backend][other_backend->config()->id()] = transfer_cost; } avg_transfer_cost += transfer_cost; } @@ -464,7 +476,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio return {_exec_time->getMax(), _exec_time->getMax()}; } // get average exec time of the op on this backend - auto exec_time = getTime(backend, node.getName(), quant, size); + auto exec_time = getOpTime(backend, node.getName(), quant, size); if (backend->config()->id() == "cpu" && is_parallel_exec) { exec_time *= CPU_DELAY; @@ -548,8 +560,8 @@ int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Oper if (parent_backend != backend) { // Multiply operand size by 2 because size must describe input+output size - int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant, - input_operand.info().total_size() * 2); + int64_t transfer_cost = + getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2); transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost); } } diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h index a245171..80d6eaf 100644 --- a/runtimes/neurun/core/src/compiler/Scheduler.h +++ b/runtimes/neurun/core/src/compiler/Scheduler.h @@ -48,7 +48,7 @@ public: * @param[in] backend_resolver backend resolver */ Scheduler(const neurun::model::Operands &operands, std::vector backends) - : _run_cache{}, _backends_avail_time{}, _ops_eft{}, + : _is_supported{}, _backends_avail_time{}, _ops_eft{}, _op_to_rank{std::make_shared>()}, _all_backends(std::move(backends)) { @@ -126,15 +126,22 @@ private: int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time, const int64_t &time_amount); - int64_t getTime(const backend::Backend *backend, const std::string &operation, bool quant, - uint32_t size); + int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant, + uint32_t size); + + int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend, + bool quant, uint32_t size); void scheduleShufflingBackends(); int64_t tryBackend(const model::Operation &node, const backend::Backend *backend); private: - std::unordered_map> _run_cache; + // This variable stores backend/node pairs with unknown execution time, and hints scheduler + // whether it should assign these backends to these nodes: + // * It stores false for unsupported nodes + // * During rank calculation with enabled profiling mode it stores true for supported nodes + std::unordered_map> _is_supported; // Finishing and starting time of each backend std::unordered_map> _backends_avail_time; model::OperationIndexMap _ops_eft; -- 2.7.4