[neurun] Add fine for scheduling into another backend (#5880)

author Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>

Mon, 29 Jul 2019 12:54:11 +0000 (15:54 +0300)

committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>

Mon, 29 Jul 2019 12:54:11 +0000 (21:54 +0900)
author Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
Mon, 29 Jul 2019 12:54:11 +0000 (15:54 +0300)
committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Mon, 29 Jul 2019 12:54:11 +0000 (21:54 +0900)
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc

index 6835609..041d5c5 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.cc
+++ b/runtimes/neurun/core/src/compiler/Scheduler.cc
@@ -100,6 +100,30 @@ static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend *
    return false;
  }
  
+// if a node can be merged into subgraph
+static bool isMergable(const graph::Graph &graph, const model::Operation &node)
+{
+  size_t prev_op_cnt = 0;
+  for (const auto &input : node.getInputs())
+  {
+    // only valid_inputs
+    const auto &operand = graph.operands().at(input);
+    if (operand.isConstant())
+      continue;
+
+    // This operand is input of operation, not weight or bias
+    if (operand.getDef().list().size() > 0)
+      ++prev_op_cnt;
+
+    // Current node has multiple inputs as concat or at the beginning of the separated branch
+    if (prev_op_cnt > 1 || operand.getUses().list().size() > 1)
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
  void Scheduler::scheduleShufflingBackends()
  {
    const auto all_backends = _backend_resolver->getAllBackends();
@@ -359,9 +383,9 @@ int64_t Scheduler::backendAvailableTime(const backend::Backend *backend,
    // finishing time of an op, that will come before current op
    auto prev_op_ft = starting_time;
    // until reach the "hole/gap", that is enough to run this op
-  while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft < time_amount)
+  while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
    {
-    prev_op_ft = next_op_fst->first;
+    prev_op_ft = next_op_fst->first + 1;
      ++next_op_fst;
    }
    return prev_op_ft;
@@ -414,6 +438,10 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
                            std::multimap<int64_t, int64_t> &transfer_st_exec_time)
  {
    const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
+  // Permutation will cause creating a seperate subgraph that contains just this permutation node.
+  // This isn't needed for Linear executor since it doesn't use subgraphs
+  // Number 1 ms is picked experimentally
+  int64_t permute_fine = 1000;
    // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
    // permutation on other branches or non-nnfw specific tasks and have to wait for it.
    // Number 2 is picked experimentally
@@ -421,6 +449,12 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
    const auto &node = _graph->operations().at(index);
    const bool quant = isQuant(*_graph, node);
    const auto size = getOperationsFlattenedIOSize(*_graph, node);
+  // if this node can be part of a subgraph, then assigning different backend will cause creating
+  // another subgraph
+  if (isMergable(*_graph, node))
+  {
+    permute_fine *= 2;
+  }
    if (isWorkaroundSkip(*_graph, backend, node, quant))
    {
      return {_exec_time->getMax(), _exec_time->getMax()};
@@ -433,7 +467,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
    }
  
    // get max eft of direct (one lavel above) predecessors
-  auto max_pred_eft = predMaxEFT(backend, node, quant, transfer_st_exec_time);
+  auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
  
    int64_t total_transfer_cost = 0;
    std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
@@ -445,6 +479,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
      if (!is_linear_exec)
      {
        it.second *= CPU_DELAY;
+      it.second += permute_fine;
      }
      total_transfer_cost += it.second;
  
@@ -469,29 +504,31 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
    // In case linear executor measure just exec time and data transfer time
    if (is_linear_exec)
    {
-    VERBOSE(Scheduler::scheduleNode)
+    VERBOSE(Scheduler::ESTAndExecTime)
          << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
          << " on " << backend->config()->id() << " is " << exec_time
          << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+
      return {total_transfer_cost, exec_time};
    }
-  VERBOSE(Scheduler::scheduleNode) << "exec_time of (" << index.value() << ") " << node.getName()
-                                   << " quant==" << quant << " on " << backend->config()->id()
-                                   << ": " << exec_time
-                                   << " microseconds. Backend available time: " << prev_op_ft
-                                   << " Parent's max eft: " << max_pred_eft - total_transfer_cost
-                                   << " data transfer cost: " << total_transfer_cost << std::endl;
+  VERBOSE(Scheduler::ESTAndExecTime) << "exec_time of (" << index.value() << ") " << node.getName()
+                                     << " quant==" << quant << " on " << backend->config()->id()
+                                     << ": " << exec_time
+                                     << " microseconds. Backend available time: " << prev_op_ft
+                                     << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+                                     << " data transfer cost: " << total_transfer_cost << std::endl;
  
    return {prev_op_ft, exec_time};
  }
  
  int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Operation &node,
-                              bool quant, std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+                              std::multimap<int64_t, int64_t> &transfer_st_exec_time)
  {
    int64_t max_pred_eft = 0;
    for (const auto &input : node.getInputs())
    {
      const auto &operand = _graph->operands().at(input);
+    const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
      // operations, whose output is current node's this input operand
      for (const auto &defs : operand.getDef().list())
      {
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h

index 5ce6c1a..b87ad1e 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.h
+++ b/runtimes/neurun/core/src/compiler/Scheduler.h
@@ -89,12 +89,11 @@ private:
     *
     * @param[in] backend: backend, for which to return the time
     * @param[in] node: node to get eft of parents
-   * @param[in] quant: if input data is quantized
     * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
     *
     * @return earliest finishing time of parent nodes
     */
-  int64_t predMaxEFT(const backend::Backend *backend, const model::Operation &node, bool quant,
+  int64_t predMaxEFT(const backend::Backend *backend, const model::Operation &node,
                       std::multimap<int64_t, int64_t> &transfer_st_exec_time);
  
    void makeRank();
author	Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
	Mon, 29 Jul 2019 12:54:11 +0000 (15:54 +0300)
committer	이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
	Mon, 29 Jul 2019 12:54:11 +0000 (21:54 +0900)
runtimes/neurun/core/src/compiler/Scheduler.cc		patch \| blob \| history
runtimes/neurun/core/src/compiler/Scheduler.h		patch \| blob \| history