[relay] use time_evaluator for measurement (#4191)

author Zhi <5145158+zhiics@users.noreply.github.com>

Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)

committer Tianqi Chen <tqchen@users.noreply.github.com>

Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)
author Zhi <5145158+zhiics@users.noreply.github.com>
Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)
committer Tianqi Chen <tqchen@users.noreply.github.com>
Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)
diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py

index b1d8b9c..cfb3fd4 100644 (file)
--- a/tests/python/relay/benchmarking/benchmark_vm.py
+++ b/tests/python/relay/benchmarking/benchmark_vm.py
@@ -21,16 +21,20 @@ import tvm
  from tvm.contrib import graph_runtime
  from tvm import relay
  from tvm.relay import testing
+from tvm.relay import vm
+from tvm.relay import vmobj as _obj
  
  
  def benchmark_execution(mod,
                          params,
-                        measure=False,
+                        measure=True,
                          data_shape=(1, 3, 224, 224),
                          out_shape=(1, 1000),
-                        dtype='float32'):
-    def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
-        with relay.build_config(opt_level=1):
+                        dtype='float32',
+                        model="unknown"):
+    def get_graph_runtime_output(mod, data, params, target, ctx,
+                                 dtype='float32', number=2, repeat=20):
+        with relay.build_config(opt_level=3):
              graph, lib, params = relay.build(mod, target, params=params)
  
          m = graph_runtime.create(graph, lib, ctx)
@@ -41,18 +45,34 @@ def benchmark_execution(mod,
          out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
  
          if measure:
-            print("Evaluate graph runtime inference time cost...")
+            print("Evaluate graph runtime inference cost of {} on "
+                  "{}".format(model, repr(ctx)))
              ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
              # Measure in millisecond.
              prof_res = np.array(ftimer().results) * 1000
-            print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
+            print("Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)" %
                    (np.mean(prof_res), np.std(prof_res)))
  
          return out.asnumpy()
  
-    def get_tvm_vm_output(mod, data, params, target, ctx, dtype='float32'):
-        ex = relay.create_executor('vm', mod=mod, ctx=ctx)
-        result = ex.evaluate()(data, **params)
+    def get_vm_output(mod, data, params, target, ctx, dtype='float32',
+                      number=2, repeat=20):
+        with relay.build_config(opt_level=3):
+            exe = vm.compile(mod, target, params=params)
+            rly_vm = vm.VirtualMachine(exe)
+            rly_vm.init(ctx)
+            result = rly_vm.run(data)
+
+        if measure:
+            print("Evaluate vm inference cost of {} on {}".format(model,
+                                                                  repr(ctx)))
+            ftimer = rly_vm.mod.time_evaluator("invoke", ctx, number=number,
+                                               repeat=repeat)
+            # Measure in millisecond.
+            prof_res = np.array(ftimer("main", _obj.Tensor(data)).results) * 1000
+            print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
+                  (np.mean(prof_res), np.std(prof_res)))
+            
          return result.asnumpy().astype(dtype)
  
      # random input
@@ -60,41 +80,46 @@ def benchmark_execution(mod,
      target = "llvm"
      ctx = tvm.cpu(0)
  
-    tvm_out = get_tvm_output(mod, tvm.nd.array(data.astype(dtype)), params,
-                             target, ctx, dtype)
-    vm_out = get_tvm_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
-                               target, ctx, dtype)
+    tvm_out = get_graph_runtime_output(mod, tvm.nd.array(data.astype(dtype)),
+                                       params, target, ctx, dtype)
+    vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
+                           target, ctx, dtype)
      tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5)
  
  
  def test_mlp():
      image_shape = (1, 1, 28, 28)
      mod, params = testing.mlp.get_workload(1)
-    benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10))
+    benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10),
+                       model="mlp")
  
  
  def test_vgg():
      for n in [11, 16]:
          mod, params = testing.vgg.get_workload(1, num_layers=n)
-        benchmark_execution(mod, params)
+        model = "vgg" + str(n)
+        benchmark_execution(mod, params, model=model)
  
  
  def test_resnet():
      for n in [18, 50]:
          mod, params = testing.resnet.get_workload(batch_size=1, num_layers=n)
-        benchmark_execution(mod, params, True)
+        model = "resnet" + str(n)
+        benchmark_execution(mod, params, model=model)
  
  
  def test_squeezenet():
      for version in ['1.0', '1.1']:
          mod, params = testing.squeezenet.get_workload(version=version)
-        benchmark_execution(mod, params)
+        model = "squeezenet" + version
+        benchmark_execution(mod, params, model=model)
  
  
  def test_inception_v3():
      image_shape = (3, 299, 299)
      mod, params = testing.inception_v3.get_workload(image_shape=image_shape)
-    benchmark_execution(mod, params, data_shape=(1, 3, 299, 299))
+    benchmark_execution(mod, params, data_shape=(1, 3, 299, 299),
+                        model="inception_v3")
  
  
  def test_dqn():
@@ -112,7 +137,7 @@ def test_dcgan():
  
  def test_mobilenet():
      mod, params = testing.mobilenet.get_workload(batch_size=1)
-    benchmark_execution(mod, params)
+    benchmark_execution(mod, params, model="mobilenet")
  
  # TODO: enable when the low building performance (several minutes) fixed.
  def test_mobilenet_nhwc():
@@ -124,7 +149,7 @@ def test_mobilenet_nhwc():
  
  def test_densenet():
      mod, params = testing.densenet.get_workload(batch_size=1)
-    benchmark_execution(mod, params)
+    benchmark_execution(mod, params, model="densenet")
  
  
  if __name__ == '__main__':
author	Zhi <5145158+zhiics@users.noreply.github.com>
	Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)
committer	Tianqi Chen <tqchen@users.noreply.github.com>
	Thu, 24 Oct 2019 16:56:58 +0000 (09:56 -0700)