[neurun] Schedule branch by branch instead of node (#6965)

author Дилшоджон Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>

Fri, 4 Oct 2019 07:01:51 +0000 (10:01 +0300)

committer 이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>

Fri, 4 Oct 2019 07:01:51 +0000 (16:01 +0900)
author Дилшоджон Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
Fri, 4 Oct 2019 07:01:51 +0000 (10:01 +0300)
committer 이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Fri, 4 Oct 2019 07:01:51 +0000 (16:01 +0900)
diff --git a/runtimes/neurun/core/src/compiler/HEScheduler.cc b/runtimes/neurun/core/src/compiler/HEScheduler.cc

index bbcb4d6..c2dec06 100644 (file)
--- a/runtimes/neurun/core/src/compiler/HEScheduler.cc
+++ b/runtimes/neurun/core/src/compiler/HEScheduler.cc
@@ -185,6 +185,43 @@ bool HEScheduler::isNodeProfiled(const model::Operation &node)
    return true;
  }
  
+void HEScheduler::scheduleBranch(const model::OperationIndex &index,
+                                 model::OperationIndexMap<bool> &scheduled)
+{
+  auto loc_index = index;
+  const backend::Backend *parent_backend = nullptr;
+  while (true)
+  {
+    if (scheduled[loc_index])
+    {
+      return;
+    }
+    if (!scheduleNode(loc_index, parent_backend))
+    {
+      return;
+    }
+    scheduled[loc_index] = true;
+    parent_backend = _backend_resolver->getBackend(loc_index);
+
+    const auto &node = _graph->operations().at(loc_index);
+    model::OperandIndex tmp;
+    /* get the only output operand, that is input of the next single operation
+     *   and just this nodes output.*/
+    if (node.getOutputs().size() != 1)
+    {
+      return;
+    }
+    const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
+    loc_index = only_out_operand.getUses().list().front();
+    /* verify, that next node is neither beginning nor ending node of a branch*/
+    const auto &next_node = _graph->operations().at(loc_index);
+    if (!isMergable(*_graph, next_node))
+    {
+      return;
+    }
+  }
+}
+
  std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const graph::Graph &graph)
  {
    _graph = &graph;
@@ -217,10 +254,14 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const graph::Gr
      }
    }
  
+  model::OperationIndexMap<bool> visited;
+  graph.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
+    visited[index] = false;
+  });
    // for each task select the backend with the smallest earliest finishing time(eft)
    for (const auto &rank : _rank_to_op)
    {
-    scheduleNode(rank.second);
+    scheduleBranch(rank.second, visited);
    }
    VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
    return std::move(_backend_resolver);
@@ -416,7 +457,8 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
    return prev_op_ft;
  }
  
-void HEScheduler::scheduleNode(const model::OperationIndex &index)
+bool HEScheduler::scheduleNode(const model::OperationIndex &index,
+                               const backend::Backend *parent_backend)
  {
    VERBOSE(HEScheduler::scheduleNode) << "scheduling (" << index.value() << ")" << std::endl;
    int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
@@ -444,6 +486,11 @@ void HEScheduler::scheduleNode(const model::OperationIndex &index)
      throw std::runtime_error{"Fail to choose backend on scheduler"};
    }
  
+  // this is part of a branch and it is assigned another backend
+  if (parent_backend && parent_backend != chosen_backend)
+  {
+    return false;
+  }
    for (const auto &it : selected_transfer_st_exec_time)
    {
      auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
@@ -457,6 +504,7 @@ void HEScheduler::scheduleNode(const model::OperationIndex &index)
    VERBOSE(HEScheduler::scheduleNode) << "backend for " << node.getName() << " is "
                                       << chosen_backend->config()->id() << ". Its eft: " << eft
                                       << std::endl;
+  return true;
  }
  
  std::pair<int64_t, int64_t>
diff --git a/runtimes/neurun/core/src/compiler/HEScheduler.h b/runtimes/neurun/core/src/compiler/HEScheduler.h

index ba4ebac..1657482 100644 (file)
--- a/runtimes/neurun/core/src/compiler/HEScheduler.h
+++ b/runtimes/neurun/core/src/compiler/HEScheduler.h
@@ -81,7 +81,7 @@ public:
  private:
    bool isNodeProfiled(const model::Operation &);
  
-  void scheduleNode(const model::OperationIndex &);
+  bool scheduleNode(const model::OperationIndex &, const backend::Backend *parent_backend);
    /**
     * @brief   Get earliest starting time and execution time of an operation on a backend.
     *
@@ -139,6 +139,19 @@ private:
  
    int64_t tryBackend(const model::Operation &node, const backend::Backend *backend);
  
+  /**
+   * @brief   Schedule a node and its successor until:
+   *            1. there is no branching or connection of multiple branches
+   *            2. for subsequent nodes: other than predecessor's backend is prefered
+   *
+   * @param[in] index: index of an operation
+   * @param[in] scheduled: a map to check if this node has already been scheduled
+   *
+   * @return N/A
+   */
+  void scheduleBranch(const model::OperationIndex &index,
+                      model::OperationIndexMap<bool> &scheduled);
+
  private:
    // This variable stores backend/node pairs with unknown execution time, and hints scheduler
    // whether it should assign these backends to these nodes:
diff --git a/runtimes/neurun/test/core/compiler/Scheduler.cc b/runtimes/neurun/test/core/compiler/Scheduler.cc

index cf82261..2f00629 100644 (file)
--- a/runtimes/neurun/test/core/compiler/Scheduler.cc
+++ b/runtimes/neurun/test/core/compiler/Scheduler.cc
@@ -453,14 +453,12 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
    {
      // Increase execution time for GPU backend
      ExecTime et(_mock_backends);
-    /* for parallel executor: set a time, that is larger than branches_cnt*npu_exec_time
-        so that npu is prefered: the i+1 level node of the first branch will wait for npu
-        until it finishes the i-th nodes of all other branches in BFS order*/
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 2 + 1);
-    /* for parallel executor: set ET of FC larger than Mul's to be determinant:
-        if they are equal and scheduling is done in order mul1->FC1->FC2->mul2,
-        then for mul2 gpu is selected since NPU_ET*3 > GPU_ET(which is NPU_ET * 2 + 1)*/
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 2 + 2);
+    /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+     * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+     * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+     * branching or scheduler assigns another backend to a node*/
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
      et.uploadOperationsExecTime();
  
      // Test scheduler
author	Дилшоджон Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
	Fri, 4 Oct 2019 07:01:51 +0000 (10:01 +0300)
committer	이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
	Fri, 4 Oct 2019 07:01:51 +0000 (16:01 +0900)
runtimes/neurun/core/src/compiler/HEScheduler.cc		patch \| blob \| history
runtimes/neurun/core/src/compiler/HEScheduler.h		patch \| blob \| history
runtimes/neurun/test/core/compiler/Scheduler.cc		patch \| blob \| history