[neurun] Refactor scheduler a little bit (#6636)

author Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>

Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)

committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>

Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)
author Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>
Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)
committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc

index 6ee692a..bbdab80 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.cc
+++ b/runtimes/neurun/core/src/compiler/Scheduler.cc
@@ -23,6 +23,7 @@
  #include "backend/IConfig.h"
  #include "backend/IShapeFixer.h"
  #include "util/logging.h"
+#include "util/Utils.h"
  #include "exec/FunctionSequence.h"
  #include <cassert>
  #include <cmath>
@@ -72,9 +73,9 @@ static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend *
    {
      const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
      const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
-    /* Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
+    /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
      * Add with and without broadcasting*/
-    /*Quant is also supported: throws an exception in run(): in case of scheduling without warm-up
+    /*Quant is also unsupported: throws an exception in run(): in case of scheduling without warm-up
        it isn't catched by tryBackend()*/
      if (quant ||
          !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
@@ -132,7 +133,7 @@ void Scheduler::scheduleShufflingBackends()
        << "Started task scheduling: uses all backends to get more metrics for data transfer"
        << std::endl;
    size_t backend_ind = 0;
-  for (const auto &rank : _ranks)
+  for (const auto &rank : _rank_to_op)
    {
      VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl;
      const auto &node = _graph->operations().at(rank.second);
@@ -190,7 +191,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
    if (is_profiling)
    {
      bool profile_data_transfer = true;
-    const auto &node = _graph->operations().at(_ranks.begin()->second);
+    const auto &node = _graph->operations().at(_rank_to_op.begin()->second);
      const bool quant = isQuant(*_graph, node);
      const auto size = getOperationsFlattenedIOSize(*_graph, node);
      for (const auto *backend : all_backends)
@@ -211,7 +212,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
    }
  
    // for each task select the backend with the smallest earliest finishing time(eft)
-  for (const auto &rank : _ranks)
+  for (const auto &rank : _rank_to_op)
    {
      scheduleNode(rank.second);
    }
@@ -259,40 +260,31 @@ void Scheduler::makeRank()
  {
    VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl;
  
-  model::OperationIndexMap<int64_t> calculated_rank;
+  _graph->operations().iterate(
+      [&](const model::OperationIndex &index, const model::Operation &) { DFSMaxRank(index); });
  
-  const int NOT_SET = -1;
+  // Check that ranks are calculated for all operations(nodes)
    _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
-    calculated_rank[index] = NOT_SET;
+    UNUSED_RELEASE(index);
+    assert(_op_to_rank->find(index) != _op_to_rank->end());
    });
-
-  for (const auto &it : calculated_rank)
-  {
-    DFSMaxRank(it.first, calculated_rank);
-  }
-
-  // Rank for all of the operations(nodes) must be calculated.
-  assert(std::all_of(calculated_rank.begin(), calculated_rank.end(),
-                     [](const std::pair<const model::OperationIndex, int64_t> &v) {
-                       return v.second != NOT_SET;
-                     }));
    VERBOSE(Scheduler::makeRank) << "task prioritizing finished" << std::endl;
  }
  
-int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
-                              std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index)
  {
-  if (calculated_rank.at(index) >= 0)
-    return calculated_rank.at(index);
+  auto op_to_rank_it = _op_to_rank->find(index);
+  if (op_to_rank_it != _op_to_rank->end())
+    return op_to_rank_it->second;
  
    const auto all_backends = backend::BackendManager::instance().getAll();
    const auto &node = _graph->operations().at(index);
-  int64_t rank = 0, std = 0;
+  int64_t rank = 0;
    const bool quant = isQuant(*_graph, node);
    const auto size = getOperationsFlattenedIOSize(*_graph, node);
-  auto all_backends_size = static_cast<int64_t>(all_backends.size());
+  auto supported_backends_quantity = static_cast<int64_t>(all_backends.size());
  
-  const auto max_child_rank = DFSChildrensMaxRank(index, calculated_rank);
+  const auto max_child_rank = DFSChildrenMaxRank(index);
  
    // get average exec time of this op
    for (const auto &backend : all_backends)
@@ -309,13 +301,14 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
      else
      {
        // this operation isn't supported in this backend
-      --all_backends_size;
+      --supported_backends_quantity;
      }
    }
-  assert((all_backends_size > 0) && "Encountered unsupported op");
-  rank /= all_backends_size;
+  assert((supported_backends_quantity > 0) && "Encountered unsupported op");
+  rank /= supported_backends_quantity;
  
    // get standard deviation
+  int64_t std = 0;
    for (const auto backend : all_backends)
    {
      const auto exec_time = getTime(backend, node.getName(), quant, size);
@@ -324,27 +317,24 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
        std += (exec_time - rank) * (exec_time - rank);
      }
    }
-  std /= all_backends_size;
+  std /= supported_backends_quantity;
    if (std > 0)
    {
      std = static_cast<int>(std::sqrt(std));
      rank *= std;
    }
    rank += max_child_rank;
-  calculated_rank[index] = rank;
  
    assert(rank >= 0);
-  _ranks.emplace(rank, index);
-  _indexed_ranks->emplace(index, rank);
+  _rank_to_op.emplace(rank, index);
+  _op_to_rank->emplace(index, rank);
    VERBOSE(Scheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.getName()
                                   << " is " << rank << std::endl;
  
    return rank;
  }
  
-int64_t
-Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
-                               std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index)
  {
    const auto all_backends = backend::BackendManager::instance().getAll();
    const auto &node = _graph->operations().at(index);
@@ -353,7 +343,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
    {
      const auto &operand = _graph->operands().at(output);
      const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
-    // avarage data transfer cost of this operand's data
+    // average data transfer cost of this operand's data
      int64_t avg_transfer_cost = 1;
      for (const auto *backend : all_backends)
      {
@@ -377,7 +367,7 @@ Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
      avg_transfer_cost /= all_backends.size();
      for (const auto &use : operand.getUses().list())
      {
-      const auto cur_child_rank = DFSMaxRank(use, calculated_rank);
+      const auto cur_child_rank = DFSMaxRank(use);
        max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
      }
    }
@@ -433,7 +423,7 @@ void Scheduler::scheduleNode(const model::OperationIndex &index)
          {prev_op_ft + it.second, prev_op_ft});
    }
  
-  _parents_eft[index] = eft;
+  _ops_eft[index] = eft;
    _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
    _backend_resolver->setBackend(index, chosen_backend);
  
@@ -448,7 +438,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
  {
    const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
    const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR);
-  // Permutation will cause creating a seperate subgraph that contains just this permutation node.
+  // Permutation will cause creating a separate subgraph that contains just this permutation node.
    // This isn't needed for Linear executor since it doesn't use subgraphs
    // Number 1 ms is picked experimentally
    int64_t permute_fine = 1000;
@@ -476,12 +466,12 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
      exec_time *= CPU_DELAY;
    }
  
-  // get max eft of direct (one lavel above) predecessors
+  // get max eft of direct (one level above) predecessors
    auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
  
    int64_t total_transfer_cost = 0;
    std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
-  // Find free time for data transfering and insert it into backend taskset. This is needed:
+  // Find free time for data transferring and insert it into backend taskset. This is needed:
    //  1. Time for multiple permutations for this node's input is found correctly
    //  2. If backend==cpu, then free time for this node must come after permutations
    auto cpu_backend = backend::BackendManager::instance().get("cpu");
@@ -537,23 +527,23 @@ int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Oper
                                std::multimap<int64_t, int64_t> &transfer_st_exec_time)
  {
    int64_t max_pred_eft = 0;
-  for (const auto &input : node.getInputs())
+  for (const auto &input_operand_idx : node.getInputs())
    {
-    const auto &operand = _graph->operands().at(input);
-    const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
-    // operations, whose output is current node's this input operand
-    for (const auto &defs : operand.getDef().list())
+    const auto &input_operand = _graph->operands().at(input_operand_idx);
+    const bool quant = input_operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
+
+    for (const auto &input_node_idx : input_operand.getDef().list())
      {
-      // Data transfer cost from this parent's backend to current node's backend:
-      auto parent_backend = _backend_resolver->getBackend(defs);
+      // Data transfer cost from parent's node backend to current node's backend:
+      auto parent_backend = _backend_resolver->getBackend(input_node_idx);
  
-      max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
+      max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
        if (parent_backend != backend)
        {
-        // Multiply operand size by 2 because size must discribe input+output size
+        // Multiply operand size by 2 because size must describe input+output size
          int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
-                                        operand.info().total_size() * 2);
-        transfer_st_exec_time.emplace(_parents_eft.at(defs), transfer_cost);
+                                        input_operand.info().total_size() * 2);
+        transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
        }
      }
    }
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h

index 2bd5a19..0dd8d80 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.h
+++ b/runtimes/neurun/core/src/compiler/Scheduler.h
@@ -48,8 +48,8 @@ public:
     * @param[in] backend_resolver backend resolver
     */
    Scheduler(const neurun::model::Operands &operands)
-      : _run_cache{}, _backends_avail_time{}, _parents_eft{},
-        _indexed_ranks{std::make_shared<model::OperationIndexMap<int64_t>>()}
+      : _run_cache{}, _backends_avail_time{}, _ops_eft{},
+        _op_to_rank{std::make_shared<model::OperationIndexMap<int64_t>>()}
    {
      _backend_resolver = nnfw::cpp14::make_unique<compiler::BackendResolver>(operands);
      _exec_time =
@@ -65,7 +65,7 @@ public:
     *          https://www.hindawi.com/journals/sp/2016/3676149/
     */
    std::unique_ptr<compiler::BackendResolver> schedule(const graph::Graph &graph) final;
-  std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _indexed_ranks; }
+  std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
  
  private:
    void scheduleNode(const model::OperationIndex &);
@@ -89,7 +89,7 @@ private:
     *
     * @param[in] backend: backend, for which to return the time
     * @param[in] node: node to get eft of parents
-   * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
+   * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
     *
     * @return earliest finishing time of parent nodes
     */
@@ -98,11 +98,9 @@ private:
  
    void makeRank();
  
-  int64_t DFSMaxRank(const model::OperationIndex &index,
-                     std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+  int64_t DFSMaxRank(const model::OperationIndex &index);
  
-  int64_t DFSChildrensMaxRank(const model::OperationIndex &index,
-                              std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+  int64_t DFSChildrenMaxRank(const model::OperationIndex &index);
    /**
     * @brief   Returns the time, when backend is available for at least given amount of time.
     *
@@ -111,7 +109,7 @@ private:
     *
     * @param[in] backend backend, for which to return the time
     * @param[in] starting_time time, starting which to look for gap
-   * @param[in] time_amount amound of the time, for which to look gap
+   * @param[in] time_amount amount of the time, for which to look gap
     *
     * @return time, when backend has at least time_amount free time
     */
@@ -129,9 +127,9 @@ private:
    std::unordered_map<const backend::Backend *, std::unordered_map<std::string, int>> _run_cache;
    // Finishing and starting time of each backend
    std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
-  model::OperationIndexMap<int64_t> _parents_eft;
-  std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _ranks;
-  std::shared_ptr<model::OperationIndexMap<int64_t>> _indexed_ranks;
+  model::OperationIndexMap<int64_t> _ops_eft;
+  std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _rank_to_op;
+  std::shared_ptr<model::OperationIndexMap<int64_t>> _op_to_rank;
    std::unique_ptr<compiler::BackendResolver> _backend_resolver;
    std::unique_ptr<backend::ExecTime> _exec_time;
    const graph::Graph *_graph{nullptr};
author	Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>
	Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)
committer	이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
	Fri, 16 Aug 2019 04:12:49 +0000 (13:12 +0900)
runtimes/neurun/core/src/compiler/Scheduler.cc		patch \| blob \| history
runtimes/neurun/core/src/compiler/Scheduler.h		patch \| blob \| history