[neurun] Prettified HE scheduler a little bit (#6982)

author Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>

Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)

committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>

Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)
author Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>
Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)
committer 이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.cc b/runtimes/neurun/core/src/compiler/Scheduler.cc

index 0032a69..6a7ae58 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.cc
+++ b/runtimes/neurun/core/src/compiler/Scheduler.cc
@@ -20,7 +20,6 @@
  #include "util/ConfigSource.h"
  #include "compiler/IExecutionBuilder.h"
  #include "compiler/BackendResolver.h"
-#include "backend/IConfig.h"
  #include "backend/IShapeFixer.h"
  #include "util/logging.h"
  #include "util/Utils.h"
@@ -227,40 +226,54 @@ std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Grap
    return std::move(_backend_resolver);
  }
  
-int64_t Scheduler::getTime(const backend::Backend *backend, const std::string &operation,
-                           bool quant, uint32_t size)
+int64_t Scheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
+                             bool quant, uint32_t size)
  {
    const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
    if (time != _exec_time->NOT_FOUND)
-  {
      return time;
-  };
-  return _run_cache.at(backend).at(operation);
+
+  return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
+}
+
+int64_t Scheduler::getPermuteTime(const backend::Backend *src_backend,
+                                  const backend::Backend *dst_backend, bool quant, uint32_t size)
+{
+  const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
+  if (time != _exec_time->NOT_FOUND)
+    return time;
+
+  // Makes the scheduler prefer keeping computations on one backend
+  return size / 200;
  }
  
  int64_t Scheduler::tryBackend(const model::Operation &node, const backend::Backend *backend)
  {
-  auto iter = _run_cache.find(backend);
-  if (iter != _run_cache.end())
+  auto iter = _is_supported.find(backend);
+  if (iter != _is_supported.end())
    {
      auto it2 = iter->second.find(node.getName());
      if (it2 != iter->second.end())
      {
-      return _run_cache[backend][node.getName()];
+      return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax();
      }
    }
    try
    {
      _backend_resolver->getBackendContext(backend)->shape_fixer->fix(node);
-    // always prefer the one, that is supported
-    _run_cache[backend][node.getName()] = 1;
+
+    if (!util::getConfigBool(util::config::PROFILING_MODE))
+      throw std::runtime_error("You are trying to run heterogeneous scheduler with disabled "
+                               "profiling mode, while there is no profiling information about some "
+                               "nodes. Run scheduler with enabled profiling mode first.");
+
+    _is_supported[backend][node.getName()] = true;
    }
    catch (std::runtime_error &e)
    {
-    // Put to max so that during scheduling supported backend will be selected
-    _run_cache[backend][node.getName()] = _exec_time->getMax();
+    _is_supported[backend][node.getName()] = false;
    }
-  return _run_cache[backend][node.getName()];
+  return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax();
  }
  
  void Scheduler::makeRank()
@@ -317,7 +330,7 @@ int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index)
    int64_t std = 0;
    for (const auto backend : _all_backends)
    {
-    const auto exec_time = getTime(backend, node.getName(), quant, size);
+    const auto exec_time = getOpTime(backend, node.getName(), quant, size);
      if (exec_time < _exec_time->getMax())
      {
        std += (exec_time - rank) * (exec_time - rank);
@@ -362,9 +375,8 @@ int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index)
              _exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size());
          if (transfer_cost == _exec_time->NOT_FOUND)
          {
-          // makes the scheduler prefer keeping computations on one backend
+          // Makes the scheduler prefer keeping computations on one backend
            transfer_cost = operand.info().total_size() / 100;
-          _run_cache[backend][other_backend->config()->id()] = transfer_cost;
          }
          avg_transfer_cost += transfer_cost;
        }
@@ -464,7 +476,7 @@ Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::Operatio
      return {_exec_time->getMax(), _exec_time->getMax()};
    }
    // get average exec time of the op on this backend
-  auto exec_time = getTime(backend, node.getName(), quant, size);
+  auto exec_time = getOpTime(backend, node.getName(), quant, size);
    if (backend->config()->id() == "cpu" && is_parallel_exec)
    {
      exec_time *= CPU_DELAY;
@@ -548,8 +560,8 @@ int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Oper
        if (parent_backend != backend)
        {
          // Multiply operand size by 2 because size must describe input+output size
-        int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
-                                        input_operand.info().total_size() * 2);
+        int64_t transfer_cost =
+            getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
          transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
        }
      }
diff --git a/runtimes/neurun/core/src/compiler/Scheduler.h b/runtimes/neurun/core/src/compiler/Scheduler.h

index a245171..80d6eaf 100644 (file)
--- a/runtimes/neurun/core/src/compiler/Scheduler.h
+++ b/runtimes/neurun/core/src/compiler/Scheduler.h
@@ -48,7 +48,7 @@ public:
     * @param[in] backend_resolver backend resolver
     */
    Scheduler(const neurun::model::Operands &operands, std::vector<const backend::Backend *> backends)
-      : _run_cache{}, _backends_avail_time{}, _ops_eft{},
+      : _is_supported{}, _backends_avail_time{}, _ops_eft{},
          _op_to_rank{std::make_shared<model::OperationIndexMap<int64_t>>()},
          _all_backends(std::move(backends))
    {
@@ -126,15 +126,22 @@ private:
    int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time,
                                 const int64_t &time_amount);
  
-  int64_t getTime(const backend::Backend *backend, const std::string &operation, bool quant,
-                  uint32_t size);
+  int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant,
+                    uint32_t size);
+
+  int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend,
+                         bool quant, uint32_t size);
  
    void scheduleShufflingBackends();
  
    int64_t tryBackend(const model::Operation &node, const backend::Backend *backend);
  
  private:
-  std::unordered_map<const backend::Backend *, std::unordered_map<std::string, int>> _run_cache;
+  // This variable stores backend/node pairs with unknown execution time, and hints scheduler
+  // whether it should assign these backends to these nodes:
+  // * It stores false for unsupported nodes
+  // * During rank calculation with enabled profiling mode it stores true for supported nodes
+  std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
    // Finishing and starting time of each backend
    std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
    model::OperationIndexMap<int64_t> _ops_eft;
author	Ivan Vagin/AI Tools Lab /SRR/Engineer/삼성전자 <ivan.vagin@samsung.com>
	Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)
committer	이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
	Fri, 30 Aug 2019 01:43:27 +0000 (10:43 +0900)
runtimes/neurun/core/src/compiler/Scheduler.cc		patch \| blob \| history
runtimes/neurun/core/src/compiler/Scheduler.h		patch \| blob \| history