From 8a75031d6efa7d796f3e4628624ac09af9ab5e48 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=94=D0=B8=D0=BB=D1=88=D0=BE=D0=B4=D0=B6=D0=BE=D0=BD=20?= =?utf8?q?=D0=9F=D0=BE=D1=88=D1=88=D0=BE=D0=B5=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 4 Oct 2019 10:01:51 +0300 Subject: [PATCH] [neurun] Schedule branch by branch instead of node (#6965) * [neurun] Schedule branch by branch instead of node Related issue: 6963 Schedule whole branch (that can be seen as a subgraph) recursively before moving to next node Signed-off-by: Poshshoev Dilshodzhon * Fix CI failure Signed-off-by: Poshshoev Dilshodzhon * Address offline review Signed-off-by: Poshshoev Dilshodzhon * Fix review feedbacks Signed-off-by: Poshshoev Dilshodzhon * Address feedbacks: rename scheduleRecursively Signed-off-by: Poshshoev Dilshodzhon * Fix documentation typo Signed-off-by: Poshshoev Dilshodzhon --- runtimes/neurun/core/src/compiler/HEScheduler.cc | 52 +++++++++++++++++++++++- runtimes/neurun/core/src/compiler/HEScheduler.h | 15 ++++++- runtimes/neurun/test/core/compiler/Scheduler.cc | 14 +++---- 3 files changed, 70 insertions(+), 11 deletions(-) diff --git a/runtimes/neurun/core/src/compiler/HEScheduler.cc b/runtimes/neurun/core/src/compiler/HEScheduler.cc index bbcb4d6..c2dec06 100644 --- a/runtimes/neurun/core/src/compiler/HEScheduler.cc +++ b/runtimes/neurun/core/src/compiler/HEScheduler.cc @@ -185,6 +185,43 @@ bool HEScheduler::isNodeProfiled(const model::Operation &node) return true; } +void HEScheduler::scheduleBranch(const model::OperationIndex &index, + model::OperationIndexMap &scheduled) +{ + auto loc_index = index; + const backend::Backend *parent_backend = nullptr; + while (true) + { + if (scheduled[loc_index]) + { + return; + } + if (!scheduleNode(loc_index, parent_backend)) + { + return; + } + scheduled[loc_index] = true; + parent_backend = _backend_resolver->getBackend(loc_index); + + const auto &node = _graph->operations().at(loc_index); + model::OperandIndex tmp; + /* get the only output operand, that is input of the next single operation + * and just this nodes output.*/ + if (node.getOutputs().size() != 1) + { + return; + } + const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin()); + loc_index = only_out_operand.getUses().list().front(); + /* verify, that next node is neither beginning nor ending node of a branch*/ + const auto &next_node = _graph->operations().at(loc_index); + if (!isMergable(*_graph, next_node)) + { + return; + } + } +} + std::unique_ptr HEScheduler::schedule(const graph::Graph &graph) { _graph = &graph; @@ -217,10 +254,14 @@ std::unique_ptr HEScheduler::schedule(const graph::Gr } } + model::OperationIndexMap visited; + graph.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) { + visited[index] = false; + }); // for each task select the backend with the smallest earliest finishing time(eft) for (const auto &rank : _rank_to_op) { - scheduleNode(rank.second); + scheduleBranch(rank.second, visited); } VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl; return std::move(_backend_resolver); @@ -416,7 +457,8 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend, return prev_op_ft; } -void HEScheduler::scheduleNode(const model::OperationIndex &index) +bool HEScheduler::scheduleNode(const model::OperationIndex &index, + const backend::Backend *parent_backend) { VERBOSE(HEScheduler::scheduleNode) << "scheduling (" << index.value() << ")" << std::endl; int64_t eft = std::numeric_limits::max(), selected_exec_time = 0; @@ -444,6 +486,11 @@ void HEScheduler::scheduleNode(const model::OperationIndex &index) throw std::runtime_error{"Fail to choose backend on scheduler"}; } + // this is part of a branch and it is assigned another backend + if (parent_backend && parent_backend != chosen_backend) + { + return false; + } for (const auto &it : selected_transfer_st_exec_time) { auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second); @@ -457,6 +504,7 @@ void HEScheduler::scheduleNode(const model::OperationIndex &index) VERBOSE(HEScheduler::scheduleNode) << "backend for " << node.getName() << " is " << chosen_backend->config()->id() << ". Its eft: " << eft << std::endl; + return true; } std::pair diff --git a/runtimes/neurun/core/src/compiler/HEScheduler.h b/runtimes/neurun/core/src/compiler/HEScheduler.h index ba4ebac..1657482 100644 --- a/runtimes/neurun/core/src/compiler/HEScheduler.h +++ b/runtimes/neurun/core/src/compiler/HEScheduler.h @@ -81,7 +81,7 @@ public: private: bool isNodeProfiled(const model::Operation &); - void scheduleNode(const model::OperationIndex &); + bool scheduleNode(const model::OperationIndex &, const backend::Backend *parent_backend); /** * @brief Get earliest starting time and execution time of an operation on a backend. * @@ -139,6 +139,19 @@ private: int64_t tryBackend(const model::Operation &node, const backend::Backend *backend); + /** + * @brief Schedule a node and its successor until: + * 1. there is no branching or connection of multiple branches + * 2. for subsequent nodes: other than predecessor's backend is prefered + * + * @param[in] index: index of an operation + * @param[in] scheduled: a map to check if this node has already been scheduled + * + * @return N/A + */ + void scheduleBranch(const model::OperationIndex &index, + model::OperationIndexMap &scheduled); + private: // This variable stores backend/node pairs with unknown execution time, and hints scheduler // whether it should assign these backends to these nodes: diff --git a/runtimes/neurun/test/core/compiler/Scheduler.cc b/runtimes/neurun/test/core/compiler/Scheduler.cc index cf82261..2f00629 100644 --- a/runtimes/neurun/test/core/compiler/Scheduler.cc +++ b/runtimes/neurun/test/core/compiler/Scheduler.cc @@ -453,14 +453,12 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) { // Increase execution time for GPU backend ExecTime et(_mock_backends); - /* for parallel executor: set a time, that is larger than branches_cnt*npu_exec_time - so that npu is prefered: the i+1 level node of the first branch will wait for npu - until it finishes the i-th nodes of all other branches in BFS order*/ - setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 2 + 1); - /* for parallel executor: set ET of FC larger than Mul's to be determinant: - if they are equal and scheduling is done in order mul1->FC1->FC2->mul2, - then for mul2 gpu is selected since NPU_ET*3 > GPU_ET(which is NPU_ET * 2 + 1)*/ - setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 2 + 2); + /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt * + * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the + * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter + * branching or scheduler assigns another backend to a node*/ + setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); + setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); et.uploadOperationsExecTime(); // Test scheduler -- 2.7.4