From b16504defe5d6e5570d5a147907ea35b80451c51 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?=
 =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= <ivan.vagin@samsung.com>
Date: Fri, 16 Aug 2019 13:12:49 +0900
Subject: [PATCH] [neurun] Refactor scheduler a little bit (#6636)

* Simplified makeRank method
* Renamed some variables
* Fixed some typos

Signed-off-by: Ivan Vagin <ivan.vagin@samsung.com>
---
 runtimes/neurun/core/src/compiler/Scheduler.cc | 96 ++++++++++++--------------
 runtimes/neurun/core/src/compiler/Scheduler.h  | 22 +++---
 2 files changed, 53 insertions(+), 65 deletions(-)
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc
index 6ee692a..bbdab80 100644
--- a/runtimes/neurun/core/src/compiler/Scheduler.cc
+++ b/runtimes/neurun/core/src/compiler/Scheduler.cc
@@ -23,6 +23,7 @@
 #include "backend/IConfig.h"
 #include "backend/IShapeFixer.h"
 #include "util/logging.h"
+#include "util/Utils.h"
 #include "exec/FunctionSequence.h"
 #include <cassert>
 #include <cmath>
@@ -72,9 +73,9 @@ static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend *
   {
     const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
     const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
-    /* Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
+    /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
     * Add with and without broadcasting*/
-    /*Quant is also supported: throws an exception in run(): in case of scheduling without warm-up
+    /*Quant is also unsupported: throws an exception in run(): in case of scheduling without warm-up
       it isn't catched by tryBackend()*/
     if (quant ||
         !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
@@ -132,7 +133,7 @@ void Scheduler::scheduleShufflingBackends()
       << "Started task scheduling: uses all backends to get more metrics for data transfer"
       << std::endl;
   size_t backend_ind = 0;
-  for (const auto &rank : _ranks)
+  for (const auto &rank : _rank_to_op)
   {
     VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl;
     const auto &node = _graph->operations().at(rank.second);
@@ -190,7 +191,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
   if (is_profiling)
   {
     bool profile_data_transfer = true;
-    const auto &node = _graph->operations().at(_ranks.begin()->second);
+    const auto &node = _graph->operations().at(_rank_to_op.begin()->second);
     const bool quant = isQuant(*_graph, node);
     const auto size = getOperationsFlattenedIOSize(*_graph, node);
     for (const auto *backend : all_backends)
@@ -211,7 +212,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
   }
 
   // for each task select the backend with the smallest earliest finishing time(eft)
-  for (const auto &rank : _ranks)
+  for (const auto &rank : _rank_to_op)
   {
     scheduleNode(rank.second);
   }
@@ -259,40 +260,31 @@ void Scheduler::makeRank()
 {
   VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl;
 
-  model::OperationIndexMap<int64_t> calculated_rank;
+  _graph->operations().iterate(
+      [&](const model::OperationIndex &index, const model::Operation &) { DFSMaxRank(index); });
 
-  const int NOT_SET = -1;
+  // Check that ranks are calculated for all operations(nodes)
   _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
-    calculated_rank[index] = NOT_SET;
+    UNUSED_RELEASE(index);
+    assert(_op_to_rank->find(index) != _op_to_rank->end());
   });
-
-  for (const auto &it : calculated_rank)
-  {
-    DFSMaxRank(it.first, calculated_rank);
-  }
-
-  // Rank for all of the operations(nodes) must be calculated.
-  assert(std::all_of(calculated_rank.begin(), calculated_rank.end(),
-                     [](const std::pair<const model::OperationIndex, int64_t> &v) {
-                       return v.second != NOT_SET;
-                     }));
   VERBOSE(Scheduler::makeRank) << "task prioritizing finished" << std::endl;
 }
 
-int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
-                              std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index)
 {
-  if (calculated_rank.at(index) >= 0)
-    return calculated_rank.at(index);
+  auto op_to_rank_it = _op_to_rank->find(index);
+  if (op_to_rank_it != _op_to_rank->end())
+    return op_to_rank_it->second;
 
   const auto all_backends = backend::BackendManager::instance().getAll();
   const auto &node = _graph->operations().at(index);
-  int64_t rank = 0, std = 0;
+  int64_t rank = 0;
   const bool quant = isQuant(*_graph, node);
   const auto size = getOperationsFlattenedIOSize(*_graph, node);
-  auto all_backends_size = static_cast<int64_t>(all_backends.size());
+  auto supported_backends_quantity = static_cast<int64_t>(all_backends.size());
 
-  const auto max_child_rank = DFSChildrensMaxRank(index, calculated_rank);
+  const auto max_child_rank = DFSChildrenMaxRank(index);
 
   // get average exec time of this op
   for (const auto &backend : all_backends)
@@ -309,13 +301,14 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
     else
     {
       // this operation isn't supported in this backend
-      --all_backends_size;
+      --supported_backends_quantity;
     }
   }
-  assert((all_backends_size > 0) && "Encountered unsupported op");
-  rank /= all_backends_size;
+  assert((supported_backends_quantity > 0) && "Encountered unsupported op");
+  rank /= supported_backends_quantity;
 
   // get standard deviation
+  int64_t std = 0;
   for (const auto backend : all_backends)
   {
     const auto exec_time = getTime(backend, node.getName(), quant, size);
@@ -324,27 +317,24 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
       std += (exec_time - rank) * (exec_time - rank);
     }
   }
-  std /= all_backends_size;
+  std /= supported_backends_quantity;
   if (std > 0)
   {
     std = static_cast<int>(std::sqrt(std));
     rank *= std;
   }
   rank += max_child_rank;
-  calculated_rank[index] = rank;
 
   assert(rank >= 0);
-  _ranks.emplace(rank, index);
-  _indexed_ranks->emplace(index, rank);
+  _rank_to_op.emplace(rank, index);
+  _op_to_rank->emplace(index, rank);
   VERBOSE(Scheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.getName()
                                  << " is " << rank << std::endl;
 
   return rank;
 }
 
-int64_t
-Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
-                               std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index)
 {
   const auto all_backends = backend::BackendManager::instance().getAll();
   const auto &node = _graph->operations().at(index);
@@ -353,7 +343,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
   {
     const auto &operand = _graph->operands().at(output);
     const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
-    // avarage data transfer cost of this operand's data
+    // average data transfer cost of this operand's data
     int64_t avg_transfer_cost = 1;
     for (const auto *backend : all_backends)
     {
@@ -377,7 +367,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
     avg_transfer_cost /= all_backends.size();
     for (const auto &use : operand.getUses().list())
     {
-      const auto cur_child_rank = DFSMaxRank(use, calculated_rank);
+      const auto cur_child_rank = DFSMaxRank(use);
       max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
     }
   }
@@ -433,7 +423,7 @@ void Scheduler::scheduleNode(const model::OperationIndex &index)
         {prev_op_ft + it.second, prev_op_ft});
   }
 
-  _parents_eft[index] = eft;
+  _ops_eft[index] = eft;
   _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
   _backend_resolver->setBackend(index, chosen_backend);
 
@@ -448,7 +438,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
 {
   const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
   const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR);
-  // Permutation will cause creating a seperate subgraph that contains just this permutation node.
+  // Permutation will cause creating a separate subgraph that contains just this permutation node.
   // This isn't needed for Linear executor since it doesn't use subgraphs
   // Number 1 ms is picked experimentally
   int64_t permute_fine = 1000;
@@ -476,12 +466,12 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
     exec_time *= CPU_DELAY;
   }
 
-  // get max eft of direct (one lavel above) predecessors
+  // get max eft of direct (one level above) predecessors
   auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
 
   int64_t total_transfer_cost = 0;
   std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
-  // Find free time for data transfering and insert it into backend taskset. This is needed:
+  // Find free time for data transferring and insert it into backend taskset. This is needed:
   //  1. Time for multiple permutations for this node's input is found correctly
   //  2. If backend==cpu, then free time for this node must come after permutations
   auto cpu_backend = backend::BackendManager::instance().get("cpu");
@@ -537,23 +527,23 @@ int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Oper
                               std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 {
   int64_t max_pred_eft = 0;
-  for (const auto &input : node.getInputs())
+  for (const auto &input_operand_idx : node.getInputs())
   {
-    const auto &operand = _graph->operands().at(input);
-    const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
-    // operations, whose output is current node's this input operand
-    for (const auto &defs : operand.getDef().list())
+    const auto &input_operand = _graph->operands().at(input_operand_idx);
+    const bool quant = input_operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
+
+    for (const auto &input_node_idx : input_operand.getDef().list())
     {
-      // Data transfer cost from this parent's backend to current node's backend:
-      auto parent_backend = _backend_resolver->getBackend(defs);
+      // Data transfer cost from parent's node backend to current node's backend:
+      auto parent_backend = _backend_resolver->getBackend(input_node_idx);
 
-      max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
+      max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
       if (parent_backend != backend)
       {
-        // Multiply operand size by 2 because size must discribe input+output size
+        // Multiply operand size by 2 because size must describe input+output size
         int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
-                                        operand.info().total_size() * 2);
-        transfer_st_exec_time.emplace(_parents_eft.at(defs), transfer_cost);
+                                        input_operand.info().total_size() * 2);
+        transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
       }
     }
   }
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h
index 2bd5a19..0dd8d80 100644
--- a/runtimes/neurun/core/src/compiler/Scheduler.h
+++ b/runtimes/neurun/core/src/compiler/Scheduler.h
@@ -48,8 +48,8 @@ public:
    * @param[in] backend_resolver backend resolver
    */
   Scheduler(const neurun::model::Operands &operands)
-      : _run_cache{}, _backends_avail_time{}, _parents_eft{},
-        _indexed_ranks{std::make_shared<model::OperationIndexMap<int64_t>>()}
+      : _run_cache{}, _backends_avail_time{}, _ops_eft{},
+        _op_to_rank{std::make_shared<model::OperationIndexMap<int64_t>>()}
   {
     _backend_resolver = nnfw::cpp14::make_unique<compiler::BackendResolver>(operands);
     _exec_time =
@@ -65,7 +65,7 @@ public:
    *          https://www.hindawi.com/journals/sp/2016/3676149/
    */
   std::unique_ptr<compiler::BackendResolver> schedule(const graph::Graph &graph) final;
-  std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _indexed_ranks; }
+  std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
 
 private:
   void scheduleNode(const model::OperationIndex &);
@@ -89,7 +89,7 @@ private:
    *
    * @param[in] backend: backend, for which to return the time
    * @param[in] node: node to get eft of parents
-   * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
+   * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
    *
    * @return earliest finishing time of parent nodes
    */
@@ -98,11 +98,9 @@ private:
 
   void makeRank();
 
-  int64_t DFSMaxRank(const model::OperationIndex &index,
-                     std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+  int64_t DFSMaxRank(const model::OperationIndex &index);
 
-  int64_t DFSChildrensMaxRank(const model::OperationIndex &index,
-                              std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+  int64_t DFSChildrenMaxRank(const model::OperationIndex &index);
   /**
    * @brief   Returns the time, when backend is available for at least given amount of time.
    *
@@ -111,7 +109,7 @@ private:
    *
    * @param[in] backend backend, for which to return the time
    * @param[in] starting_time time, starting which to look for gap
-   * @param[in] time_amount amound of the time, for which to look gap
+   * @param[in] time_amount amount of the time, for which to look gap
    *
    * @return time, when backend has at least time_amount free time
    */
@@ -129,9 +127,9 @@ private:
   std::unordered_map<const backend::Backend *, std::unordered_map<std::string, int>> _run_cache;
   // Finishing and starting time of each backend
   std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
-  model::OperationIndexMap<int64_t> _parents_eft;
-  std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _ranks;
-  std::shared_ptr<model::OperationIndexMap<int64_t>> _indexed_ranks;
+  model::OperationIndexMap<int64_t> _ops_eft;
+  std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _rank_to_op;
+  std::shared_ptr<model::OperationIndexMap<int64_t>> _op_to_rank;
   std::unique_ptr<compiler::BackendResolver> _backend_resolver;
   std::unique_ptr<backend::ExecTime> _exec_time;
   const graph::Graph *_graph{nullptr};
-- 
2.7.4