Code refactoring Scheduler except makeRank method (#5822)

author Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>

Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)

committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>

Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)
author Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)
committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc

index 13f23db..363e839 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.cc
+++ b/runtimes/neurun/core/src/compiler/Scheduler.cc
@@ -100,7 +100,7 @@ static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend *
    return false;
  }
  
-void Scheduler::scheduleShufflingBackends(const graph::Graph &graph)
+void Scheduler::scheduleShufflingBackends()
  {
    const auto all_backends = _backend_resolver->getAllBackends();
    VERBOSE(Scheduler::scheduleNode)
@@ -110,9 +110,9 @@ void Scheduler::scheduleShufflingBackends(const graph::Graph &graph)
    for (const auto &rank : _ranks)
    {
      VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl;
-    const auto &node = graph.operations().at(rank.second);
-    bool quant = isQuant(graph, node);
-    auto size = getOperationsFlattenedIOSize(graph, node);
+    const auto &node = _graph->operations().at(rank.second);
+    const bool quant = isQuant(*_graph, node);
+    const auto size = getOperationsFlattenedIOSize(*_graph, node);
      for (size_t i = 0;; ++i)
      {
        if (i == all_backends.size())
@@ -125,12 +125,12 @@ void Scheduler::scheduleShufflingBackends(const graph::Graph &graph)
        {
          backend_ind = 0;
        }
-      if (isWorkaroundSkip(graph, all_backends[backend_ind], node, quant))
+      if (isWorkaroundSkip(*_graph, all_backends[backend_ind], node, quant))
        {
          ++backend_ind;
          continue;
        }
-      auto exec_time =
+      const auto exec_time =
            _exec_time->getOperationExecTime(all_backends[backend_ind], node.getName(), quant, size);
        // Scheduling to measure data transfer must be done after measuring all backends seperately
        assert(exec_time != _exec_time->NOT_FOUND);
@@ -150,9 +150,10 @@ void Scheduler::scheduleShufflingBackends(const graph::Graph &graph)
  
  std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Graph &graph)
  {
+  _graph = &graph;
    VERBOSE(Scheduler::schedule) << "task scheduling started" << std::endl;
    // Make ranks and save in descending order
-  makeRank(graph);
+  makeRank();
  
    const auto all_backends = _backend_resolver->getAllBackends();
    for (const auto *backend : all_backends)
@@ -160,16 +161,16 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
      _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
    }
  
-  bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
+  const bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
    if (is_profiling)
    {
      bool profile_data_transfer = true;
-    const auto &node = graph.operations().at(_ranks.begin()->second);
-    bool quant = isQuant(graph, node);
-    auto size = getOperationsFlattenedIOSize(graph, node);
+    const auto &node = _graph->operations().at(_ranks.begin()->second);
+    const bool quant = isQuant(*_graph, node);
+    const auto size = getOperationsFlattenedIOSize(*_graph, node);
      for (const auto *backend : all_backends)
      {
-      auto exec_time = _exec_time->getOperationExecTime(backend, node.getName(), quant, size);
+      const auto exec_time = _exec_time->getOperationExecTime(backend, node.getName(), quant, size);
        if (exec_time == _exec_time->NOT_FOUND)
        {
          profile_data_transfer = false;
@@ -178,7 +179,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
      }
      if (profile_data_transfer)
      {
-      scheduleShufflingBackends(graph);
+      scheduleShufflingBackends();
        VERBOSE(Scheduler::schedule) << "task scheduling finished" << std::endl;
        return nnfw::cpp14::make_unique<BackendResolver>(*_backend_resolver);
      }
@@ -187,7 +188,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
    // for each task select the backend with the smallest earliest finishing time(eft)
    for (const auto &rank : _ranks)
    {
-    scheduleNode(graph, rank.second);
+    scheduleNode(rank.second);
    }
    VERBOSE(Scheduler::schedule) << "task scheduling finished" << std::endl;
    return nnfw::cpp14::make_unique<BackendResolver>(*_backend_resolver);
@@ -196,7 +197,7 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
  int64_t Scheduler::getTime(const backend::Backend *backend, const std::string &operation,
                             bool quant, uint32_t size)
  {
-  auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
+  const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
    if (time != _exec_time->NOT_FOUND)
    {
      return time;
@@ -230,7 +231,7 @@ int64_t Scheduler::tryBackend(const model::Operation &node, const backend::Backe
  }
  
  // TODO: Too large function: need code refactoring
-void Scheduler::makeRank(const graph::Graph &model)
+void Scheduler::makeRank()
  {
    VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl;
  
@@ -239,7 +240,7 @@ void Scheduler::makeRank(const graph::Graph &model)
    std::unordered_map<model::OperationIndex, int64_t> calculated_rank;
  
    const int NOT_SET = -1;
-  model.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
+  _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
      calculated_rank[index] = NOT_SET;
    });
  
@@ -247,7 +248,7 @@ void Scheduler::makeRank(const graph::Graph &model)
        [&](const model::OperationIndex &index, const model::Operation &node) -> int64_t {
      int64_t rank = 0, max_child_rank = 0, std = 0;
  
-    bool quant = isQuant(model, node);
+    const bool quant = isQuant(*_graph, node);
  
      if (calculated_rank.at(index) >= 0)
        return calculated_rank.at(index);
@@ -255,7 +256,7 @@ void Scheduler::makeRank(const graph::Graph &model)
      // get children's max rank
      for (const auto &output : node.getOutputs())
      {
-      const auto &operand = model.operands().at(output);
+      const auto &operand = _graph->operands().at(output);
        // avarage data transfer cost of this operand's data
        int64_t avg_transfer_cost = 1;
        for (const auto *backend : all_backends)
@@ -282,12 +283,12 @@ void Scheduler::makeRank(const graph::Graph &model)
        avg_transfer_cost /= all_backends.size();
        for (const auto &use : operand.getUses().list())
        {
-        auto cur_child_rank = dfs_recursive(use, model.operations().at(use));
+        const auto cur_child_rank = dfs_recursive(use, _graph->operations().at(use));
          max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
        }
      }
  
-    auto size = getOperationsFlattenedIOSize(model, node);
+    const auto size = getOperationsFlattenedIOSize(*_graph, node);
      auto all_backends_size = static_cast<int64_t>(all_backends.size());
      // get average exec time of this op
      for (const auto &backend : all_backends)
@@ -313,7 +314,7 @@ void Scheduler::makeRank(const graph::Graph &model)
      // get standard deviation
      for (const auto backend : all_backends)
      {
-      auto exec_time = getTime(backend, node.getName(), quant, size);
+      const auto exec_time = getTime(backend, node.getName(), quant, size);
        if (exec_time < _exec_time->getMax())
        {
          std += (exec_time - rank) * (exec_time - rank);
@@ -338,7 +339,7 @@ void Scheduler::makeRank(const graph::Graph &model)
    };
    for (const auto &it : calculated_rank)
    {
-    const auto &node = model.operations().at(it.first);
+    const auto &node = _graph->operations().at(it.first);
      dfs_recursive(it.first, node);
    }
  
@@ -352,7 +353,7 @@ void Scheduler::makeRank(const graph::Graph &model)
  int64_t Scheduler::backendAvailableTime(const backend::Backend *backend,
                                          const int64_t &starting_time, const int64_t &time_amount)
  {
-  auto backend_times = _backends_avail_time.at(backend);
+  const auto backend_times = _backends_avail_time.at(backend);
    // finishing and starting times of an op, that will come after current op
    auto next_op_fst = backend_times.upper_bound(starting_time);
    // finishing time of an op, that will come before current op
@@ -366,131 +367,148 @@ int64_t Scheduler::backendAvailableTime(const backend::Backend *backend,
    return prev_op_ft;
  }
  
-// TODO: Too large function: need code refactoring
-void Scheduler::scheduleNode(const graph::Graph &model, const model::OperationIndex &index)
+void Scheduler::scheduleNode(const model::OperationIndex &index)
  {
-  const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
-  // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
-  // permutation on other branches or non-nnfw specific tasks and have to wait for it.
-  // Number 2 is picked experimentally
-  const auto CPU_DELAY = 2;
    VERBOSE(Scheduler::scheduleNode) << "scheduling (" << index.value() << ")" << std::endl;
+
    const auto all_backends = _backend_resolver->getAllBackends();
    int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
-  const auto &node = model.operations().at(index);
-  const auto &obj = model.operands().at(node.getInputs().at(0));
-  bool quant = isQuant(model, node);
+  const auto &node = _graph->operations().at(index);
  
-  auto size = getOperationsFlattenedIOSize(model, node);
-  std::multimap<int64_t, int64_t> selected_permute_avail_time;
+  std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
    // select the backend with the smallest eft of this task
    const backend::Backend *chosen_backend = nullptr;
    for (const auto *backend : all_backends)
    {
-    std::multimap<int64_t, int64_t> permute_avail_time;
-    if (isWorkaroundSkip(model, backend, node, quant))
-    {
-      continue;
-    }
-    // get average exec time of the op on this backend
-    auto exec_time = getTime(backend, node.getName(), quant, size);
-    if (backend->config()->id() == "cpu" && !is_linear_exec)
-    {
-      exec_time *= CPU_DELAY;
-    }
-    // get max eft of direct (one lavel above) predecessors
-    int64_t max_pred_eft = 0;
-    int64_t total_transfer_cost = 0;
-    for (const auto &input : node.getInputs())
-    {
-      const auto &operand = model.operands().at(input);
-      // operations, whose output is current node's this input operand
-      for (const auto &defs : operand.getDef().list())
-      {
-        // Data transfer cost from this parent's backend to current node's backend:
-        auto parent_backend = _backend_resolver->getBackend(defs);
-
-        if (parent_backend == backend)
-        {
-          max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
-        }
-        else
-        {
-          // Multiply obj size by 2 because size my discribe input+output size
-          int64_t transfer_cost =
-              getTime(parent_backend, backend->config()->id(), quant, obj.info().total_size() * 2);
-          if (!is_linear_exec)
-          {
-            transfer_cost *= CPU_DELAY;
-          }
-          total_transfer_cost += transfer_cost;
-          permute_avail_time.insert({_parents_eft.at(defs), transfer_cost});
-        }
-      }
-    }
-
-    std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
-    // Find free time for data transfering and insert it into backend taskset. This is needed:
-    //  1. Time for multiple permutations for this node's input is found correctly
-    //  2. If backend==cpu, then free time for this node must come after permutations
-    for (const auto &it : permute_avail_time)
-    {
-      auto prev_op_ft =
-          backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
-      max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
-
-      auto tmp = _backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
-          {prev_op_ft + it.second, prev_op_ft});
-      inserted_permutations.push_back(tmp.first);
-    }
-    // find the hole/gap, where this op can be put or the finishing time of the last assigned op
-    auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
+    std::multimap<int64_t, int64_t> transfer_st_exec_time;
+    const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
  
-    // Remove inserted permutation from cpu's task set
-    for (const auto &it : inserted_permutations)
-    {
-      _backends_avail_time[_backend_resolver->getBackend("cpu")].erase(it);
-    }
-    // In case linear executor measure just exec time and data transfer time
-    if (is_linear_exec)
-    {
-      prev_op_ft = total_transfer_cost;
-      VERBOSE(Scheduler::scheduleNode)
-          << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
-          << " on " << backend->config()->id() << " is " << exec_time
-          << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
-    }
-    else
+    if (eft > est_and_et.first + est_and_et.second)
      {
-      VERBOSE(Scheduler::scheduleNode)
-          << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
-          << " on " << backend->config()->id() << ": " << exec_time
-          << " microseconds. Backend available time: " << prev_op_ft
-          << " Parent's max eft: " << max_pred_eft - total_transfer_cost
-          << " data transfer cost: " << total_transfer_cost << std::endl;
-    }
-    if (eft > prev_op_ft + exec_time)
-    {
-      eft = prev_op_ft + exec_time;
-      selected_exec_time = exec_time;
+      eft = est_and_et.first + est_and_et.second;
+      selected_exec_time = est_and_et.second;
        chosen_backend = backend;
-      selected_permute_avail_time = permute_avail_time;
+      selected_transfer_st_exec_time = transfer_st_exec_time;
      }
    }
-  for (const auto &it : selected_permute_avail_time)
+
+  for (const auto &it : selected_transfer_st_exec_time)
    {
      auto prev_op_ft =
          backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
      _backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
          {prev_op_ft + it.second, prev_op_ft});
    }
-  VERBOSE(Scheduler::scheduleNode) << "backend for " << node.getName() << " is "
-                                   << chosen_backend->config()->id() << ". Its eft: " << eft
-                                   << std::endl;
  
    _parents_eft[index] = eft;
    _backends_avail_time[chosen_backend].insert({eft, eft - selected_exec_time});
    _backend_resolver->setBackend(index, chosen_backend);
+
+  VERBOSE(Scheduler::scheduleNode) << "backend for " << node.getName() << " is "
+                                   << chosen_backend->config()->id() << ". Its eft: " << eft
+                                   << std::endl;
+}
+
+std::pair<int64_t, int64_t>
+Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::OperationIndex &index,
+                          std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+  const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
+  // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
+  // permutation on other branches or non-nnfw specific tasks and have to wait for it.
+  // Number 2 is picked experimentally
+  const int64_t CPU_DELAY = 2;
+  const auto &node = _graph->operations().at(index);
+  const bool quant = isQuant(*_graph, node);
+  const auto size = getOperationsFlattenedIOSize(*_graph, node);
+  if (isWorkaroundSkip(*_graph, backend, node, quant))
+  {
+    return {_exec_time->getMax(), _exec_time->getMax()};
+  }
+  // get average exec time of the op on this backend
+  auto exec_time = getTime(backend, node.getName(), quant, size);
+  if (backend->config()->id() == "cpu" && !is_linear_exec)
+  {
+    exec_time *= CPU_DELAY;
+  }
+
+  // get max eft of direct (one lavel above) predecessors
+  auto max_pred_eft = predMaxEFT(backend, node, quant, transfer_st_exec_time);
+
+  int64_t total_transfer_cost = 0;
+  std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
+  // Find free time for data transfering and insert it into backend taskset. This is needed:
+  //  1. Time for multiple permutations for this node's input is found correctly
+  //  2. If backend==cpu, then free time for this node must come after permutations
+  for (auto &it : transfer_st_exec_time)
+  {
+    if (!is_linear_exec)
+    {
+      it.second *= CPU_DELAY;
+    }
+    total_transfer_cost += it.second;
+
+    const auto prev_op_ft =
+        backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
+
+    max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
+
+    const auto tmp = _backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
+        {prev_op_ft + it.second, prev_op_ft});
+    inserted_permutations.push_back(tmp.first);
+  }
+  // find the hole/gap, where this op can be put or the finishing time of the last assigned op
+  auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
+
+  // Remove inserted permutation from cpu's task set
+  for (const auto &it : inserted_permutations)
+  {
+    _backends_avail_time[_backend_resolver->getBackend("cpu")].erase(it);
+  }
+
+  // In case linear executor measure just exec time and data transfer time
+  if (is_linear_exec)
+  {
+    VERBOSE(Scheduler::scheduleNode)
+        << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
+        << " on " << backend->config()->id() << " is " << exec_time
+        << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+    return {total_transfer_cost, exec_time};
+  }
+  VERBOSE(Scheduler::scheduleNode) << "exec_time of (" << index.value() << ") " << node.getName()
+                                   << " quant==" << quant << " on " << backend->config()->id()
+                                   << ": " << exec_time
+                                   << " microseconds. Backend available time: " << prev_op_ft
+                                   << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+                                   << " data transfer cost: " << total_transfer_cost << std::endl;
+
+  return {prev_op_ft, exec_time};
+}
+
+int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Operation &node,
+                              bool quant, std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+  int64_t max_pred_eft = 0;
+  for (const auto &input : node.getInputs())
+  {
+    const auto &operand = _graph->operands().at(input);
+    // operations, whose output is current node's this input operand
+    for (const auto &defs : operand.getDef().list())
+    {
+      // Data transfer cost from this parent's backend to current node's backend:
+      auto parent_backend = _backend_resolver->getBackend(defs);
+
+      max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
+      if (parent_backend != backend)
+      {
+        // Multiply operand size by 2 because size must discribe input+output size
+        int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
+                                        operand.info().total_size() * 2);
+        transfer_st_exec_time.insert({_parents_eft.at(defs), transfer_cost});
+      }
+    }
+  }
+  return max_pred_eft;
  }
  
  } // namespace compiler
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h

index 37e9248..7026bf6 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.h
+++ b/runtimes/neurun/core/src/compiler/Scheduler.h
@@ -65,11 +65,40 @@ public:
     * @note    The main idea is taken from HSIP algo:
     *          https://www.hindawi.com/journals/sp/2016/3676149/
     */
-  std::unique_ptr<compiler::BackendResolver> schedule(const graph::Graph &model) final;
+  std::unique_ptr<compiler::BackendResolver> schedule(const graph::Graph &graph) final;
    std::shared_ptr<OpRanks> getIndexedRanks() { return _indexed_ranks; }
  
  private:
-  void makeRank(const graph::Graph &model);
+  void scheduleNode(const model::OperationIndex &);
+  /**
+   * @brief   Get earliest starting time and execution time of an operation on a backend.
+   *
+   * @note  Returns a time when operation's inputs are ready and backend is available
+   *        It also returns exec time. If this is "cpu" backend, then exec_time*CPU_DELAY
+   *
+   * @param[in] backend: backend, for which to return the time
+   * @param[in] index: index of an operation
+   * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
+   *
+   * @return earliest starting tme and execution time
+   */
+  std::pair<int64_t, int64_t>
+  ESTAndExecTime(const backend::Backend *backend, const model::OperationIndex &index,
+                 std::multimap<int64_t, int64_t> &transfer_st_exec_time);
+  /**
+   * @brief   Returns the latest finishing time of parents of a node.
+   *
+   * @param[in] backend: backend, for which to return the time
+   * @param[in] node: node to get eft of parents
+   * @param[in] quant: if input data is quantized
+   * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
+   *
+   * @return earliest finishing time of parent nodes
+   */
+  int64_t predMaxEFT(const backend::Backend *backend, const model::Operation &node, bool quant,
+                     std::multimap<int64_t, int64_t> &transfer_st_exec_time);
+
+  void makeRank();
    /**
     * @brief   Returns the time, when backend is available for at least given amount of time.
     *
@@ -79,6 +108,8 @@ private:
     * @param[in] backend backend, for which to return the time
     * @param[in] starting_time time, starting which to look for gap
     * @param[in] time_amount amound of the time, for which to look gap
+   *
+   * @return time, when backend has at least time_amount free time
     */
    int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time,
                                 const int64_t &time_amount);
@@ -86,8 +117,7 @@ private:
    int64_t getTime(const backend::Backend *backend, const std::string &operation, bool quant,
                    uint32_t size);
  
-  void scheduleNode(const graph::Graph &model, const model::OperationIndex &);
-  void scheduleShufflingBackends(const graph::Graph &graph);
+  void scheduleShufflingBackends();
  
    int64_t tryBackend(const model::Operation &node, const backend::Backend *backend);
  
@@ -100,6 +130,7 @@ private:
    std::shared_ptr<OpRanks> _indexed_ranks;
    std::unique_ptr<compiler::BackendResolver> _backend_resolver;
    std::unique_ptr<backend::ExecTime> _exec_time;
+  const graph::Graph *_graph{nullptr};
  };
  
  } // namespace compiler
author	Дилшоджон Умронхонович Пошшоев/AI Tools Lab /SRR/Engineer/삼성전자 <d.poshshoev@samsung.com>
	Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)
committer	이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
	Thu, 25 Jul 2019 05:29:02 +0000 (14:29 +0900)
runtimes/neurun/core/src/compiler/Scheduler.cc		patch \| blob \| history
runtimes/neurun/core/src/compiler/Scheduler.h		patch \| blob \| history