[GraphRuntime] Debug graph runtime (#3232)

author hlu1 <14827759+hlu1@users.noreply.github.com>

Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)

committer Tianqi Chen <tqchen@users.noreply.github.com>

Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)
author hlu1 <14827759+hlu1@users.noreply.github.com>
Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)
committer Tianqi Chen <tqchen@users.noreply.github.com>
Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)
diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py

index c53a2c2..882364d 100644 (file)
--- a/python/tvm/contrib/debugger/debug_result.py
+++ b/python/tvm/contrib/debugger/debug_result.py
@@ -207,10 +207,8 @@ class DebugResult(object):
      def display_debug_result(self):
          """Displays the debugger result"
          """
-        header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \
-                    "End Time", "Shape", "Inputs", "Outputs"]
-        lines = ["---------", "---", "--------", "-------", "----------", \
-                    "--------", "-----", "------", "-------"]
+        header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", "Inputs", "Outputs"]
+        lines = ["---------", "---", "--------", "-------", "-----", "------", "-------"]
          eid = 0
          data = []
          total_time = sum(time[0] for time in self._time_list)
@@ -223,12 +221,11 @@ class DebugResult(object):
                      continue
                  name = node['name']
                  shape = str(self._output_tensor_list[eid].shape)
-                time_us = round(time[0] * 1000000, 2)
-                time_percent = round(((time[0] / total_time) * 100), 2)
+                time_us = round(time[0] * 1000000, 3)
+                time_percent = round(((time[0] / total_time) * 100), 3)
                  inputs = str(node['attrs']['num_inputs'])
                  outputs = str(node['attrs']['num_outputs'])
-                node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \
-                             shape, inputs, outputs]
+                node_data = [name, op, time_us, time_percent, shape, inputs, outputs]
                  data.append(node_data)
                  eid += 1
          fmt = ""
diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py

index 01cda35..f77a927 100644 (file)
--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -19,7 +19,6 @@
  import os
  import tempfile
  import shutil
-from datetime import datetime
  from tvm._ffi.base import string_types
  from tvm._ffi.function import get_global_func
  from tvm.contrib import graph_runtime
@@ -30,6 +29,7 @@ from . import debug_result
  _DUMP_ROOT_PREFIX = "tvmdbg_"
  _DUMP_PATH_PREFIX = "_tvmdbg_"
  
+
  def create(graph_json_str, libmod, ctx, dump_root=None):
      """Create a runtime executor module given a graph and module.
  
@@ -62,17 +62,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None):
      try:
          fcreate = get_global_func("tvm.graph_runtime_debug.create")
      except ValueError:
-        raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
-                         "config.cmake and rebuild TVM to enable debug mode")
+        raise ValueError(
+            "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
+            "config.cmake and rebuild TVM to enable debug mode"
+        )
  
      ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx)
      if num_rpc_ctx == len(ctx):
          libmod = rpc_base._ModuleHandle(libmod)
          try:
-            fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime_debug.remote_create")
+            fcreate = ctx[0]._rpc_sess.get_function(
+                "tvm.graph_runtime_debug.remote_create"
+            )
          except ValueError:
-            raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
-                             "config.cmake and rebuild TVM to enable debug mode")
+            raise ValueError(
+                "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
+                "config.cmake and rebuild TVM to enable debug mode"
+            )
      func_obj = fcreate(graph_json_str, libmod, *device_type_id)
      return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root)
  
@@ -100,10 +106,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
          To select which folder the outputs should be kept.
          None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
      """
+
      def __init__(self, module, ctx, graph_json_str, dump_root):
          self._dump_root = dump_root
          self._dump_path = None
-        self._debug_run = module["debug_run"]
          self._get_output_by_layer = module["get_output_by_layer"]
          self._run_individual = module["run_individual"]
          graph_runtime.GraphModule.__init__(self, module)
@@ -181,13 +187,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
          Time consumed for each execution will be set as debug output.
  
          """
-        self.debug_datum._time_list = []
-
+        self.debug_datum._time_list = [
+            [float(t) * 1e-6] for t in self.run_individual(10, 1, 1)
+        ]
          for i, node in enumerate(self.debug_datum.get_graph_nodes()):
-            start_time = datetime.now().time()
-            time_stamp = self._debug_run(i)
-            end_time = datetime.now().time()
-            self.debug_datum._time_list.append([time_stamp, start_time, end_time])
              num_outputs = self.debug_datum.get_graph_node_output_num(node)
              for j in range(num_outputs):
                  out_tensor = self._get_output_by_layer(i, j)
@@ -212,8 +215,13 @@ class GraphModuleDebug(graph_runtime.GraphModule):
                  ret = output_tensors[node]
              except:
                  node_list = output_tensors.keys()
-                raise RuntimeError("Node " + node + " not found, available nodes are: "
-                                   + str(node_list) + ".")
+                raise RuntimeError(
+                    "Node "
+                    + node
+                    + " not found, available nodes are: "
+                    + str(node_list)
+                    + "."
+                )
          elif isinstance(node, int):
              output_tensors = self.debug_datum._output_tensor_list
              ret = output_tensors[node]
@@ -242,7 +250,9 @@ class GraphModuleDebug(graph_runtime.GraphModule):
          self.debug_datum.display_debug_result()
  
      def run_individual(self, number, repeat=1, min_repeat_ms=0):
-        self._run_individual(number, repeat, min_repeat_ms)
+        ret = self._run_individual(number, repeat, min_repeat_ms)
+        return ret.strip(",").split(",") if ret else []
+
  
      def exit(self):
          """Exits the dump folder and all its contents"""
diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc

index 560bf3d..2b26ae5 100644 (file)
--- a/src/runtime/graph/debug/graph_runtime_debug.cc
+++ b/src/runtime/graph/debug/graph_runtime_debug.cc
@@ -6,9 +6,9 @@
   * to you under the Apache License, Version 2.0 (the
   * "License"); you may not use this file except in compliance
   * with the License.  You may obtain a copy of the License at
- * 
+ *
   *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
   * Unless required by applicable law or agreed to in writing,
   * software distributed under the License is distributed on an
   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,7 +24,9 @@
  #include <tvm/runtime/packed_func.h>
  #include <tvm/runtime/registry.h>
  #include <tvm/runtime/ndarray.h>
+
  #include <chrono>
+#include <sstream>
  #include "../graph_runtime.h"
  
  namespace tvm {
@@ -39,40 +41,23 @@ namespace runtime {
  class GraphRuntimeDebug : public GraphRuntime {
   public:
    /*!
-   * \brief Run each operation and get the output.
-   * \param index The index of op which needs to be run.
-   * \return the elapsed time.
-   */
-  double DebugRun(size_t index) {
-    CHECK(index < op_execs_.size());
-    TVMContext ctx = data_entry_[entry_id(index, 0)]->ctx;
-    auto tbegin = std::chrono::high_resolution_clock::now();
-    if (op_execs_[index]) {
-      op_execs_[index]();
-    }
-    TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
-    auto tend = std::chrono::high_resolution_clock::now();
-    double time = std::chrono::duration_cast<std::chrono::duration<double> >(
-        tend - tbegin).count();
-    return time;
-  }
-
-  /*!
-   * \brief Run each operation in the graph and print out the runtime per op.
+   * \brief Run each operation in the graph and get the time per op for all ops.
     * \param number The number of times to run this function for taking average.
     * \param repeat The number of times to repeat the measurement.
-            In total, the function will be invoked (1 + number x repeat) times,
-            where the first one is warmed up and will be discarded in case
-            there is lazy initialization.
+   *        In total, the function will be invoked (1 + number x repeat) times,
+   *        where the first one is warmed up and will be discarded in case
+   *        there is lazy initialization.
     * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
-            By default, one `repeat` contains `number` runs. If this parameter is set,
-            the parameters `number` will be dynamically adjusted to meet the
-            minimum duration requirement of one `repeat`.
+   *        By default, one `repeat` contains `number` runs. If this parameter is set,
+   *        the parameters `number` will be dynamically adjusted to meet the
+   *        minimum duration requirement of one `repeat`.
+   * \return Comma seperated string containing the elapsed time per op for the last
+   *         iteration only, because returning a long string over rpc can be expensive.
     */
-  void RunIndividual(int number, int repeat, int min_repeat_ms) {
+  std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
      // warmup run
      GraphRuntime::Run();
-
+    std::ostringstream os;
      std::vector<double> time_per_op(op_execs_.size(), 0);
      for (int i = 0; i < repeat; ++i) {
        std::chrono::time_point<
@@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime {
                auto op_tend = std::chrono::high_resolution_clock::now();
                double op_duration = std::chrono::duration_cast<
                    std::chrono::duration<double> >(op_tend - op_tbegin).count();
-              time_per_op[index] += op_duration * 1000;  // ms
+              time_per_op[index] += op_duration * 1e6;  // us
              }
            }
          }
@@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime {
              (tend - tbegin).count() * 1000;
        } while (duration_ms < min_repeat_ms);
  
-      LOG(INFO) << "Repeat: " << i;
+      LOG(INFO) << "Iteration: " << i;
        int op = 0;
        for (size_t index = 0; index < time_per_op.size(); index++) {
          if (op_execs_[index]) {
            time_per_op[index] /= number;
            LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
-            << time_per_op[index] << " ms/iter";
+            << time_per_op[index] << " us/iter";
          }
        }
      }
+    for (size_t index = 0; index < time_per_op.size(); index++) {
+      os << time_per_op[index] << ",";
+    }
+    return os.str();
    }
  
    /*!
@@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
      const std::string& name,
      const std::shared_ptr<ModuleNode>& sptr_to_self) {
    // return member functions during query.
-  if (name == "debug_run") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        *rv = this->DebugRun(static_cast<size_t>(args[0].operator int64_t()));
-      });
-  } else if (name == "get_output_by_layer") {
+  if (name == "get_output_by_layer") {
      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
          *rv = this->GetOutputByLayer(args[0], args[1]);
        });
@@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
        CHECK_GT(number, 0);
        CHECK_GT(repeat, 0);
        CHECK_GE(min_repeat_ms, 0);
-      this->RunIndividual(number, repeat, min_repeat_ms);
+      *rv = this->RunIndividual(number, repeat, min_repeat_ms);
      });
    } else {
      return GraphRuntime::GetFunction(name, sptr_to_self);
diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py

index 3de2707..717b23c 100644 (file)
--- a/tests/python/unittest/test_runtime_graph_debug.py
+++ b/tests/python/unittest/test_runtime_graph_debug.py
@@ -100,9 +100,6 @@ def test_graph_simple():
          out = mod.get_output(0, tvm.nd.empty((n,)))
          np.testing.assert_equal(out.asnumpy(), a + 1)
  
-        #test individual run
-        mod.run_individual(20, 2, 1)
-
          mod.exit()
          #verify dump root delete after cleanup
          assert(not os.path.exists(directory))
@@ -129,7 +126,6 @@ def test_graph_simple():
          mod.run(x=tvm.nd.array(a, ctx))
          out = tvm.nd.empty((n,), ctx=ctx)
          out = mod.get_output(0, out)
-        mod.run_individual(20, 2, 1)
          np.testing.assert_equal(out.asnumpy(), a + 1)
  
      check_verify()
author	hlu1 <14827759+hlu1@users.noreply.github.com>
	Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)
committer	Tianqi Chen <tqchen@users.noreply.github.com>
	Thu, 23 May 2019 17:13:11 +0000 (10:13 -0700)
python/tvm/contrib/debugger/debug_result.py		patch \| blob \| history
python/tvm/contrib/debugger/debug_runtime.py		patch \| blob \| history
src/runtime/graph/debug/graph_runtime_debug.cc		patch \| blob \| history
tests/python/unittest/test_runtime_graph_debug.py		patch \| blob \| history