Add a collect_trace option to run_op_benchmark for cases when callers just want
authorAnna R <annarev@google.com>
Thu, 3 May 2018 00:41:26 +0000 (17:41 -0700)
committerTensorFlower Gardener <gardener@tensorflow.org>
Thu, 3 May 2018 00:44:23 +0000 (17:44 -0700)
to pass RunOptions.FULL_TRACE but don't want to store trace in extras.

PiperOrigin-RevId: 195181533

tensorflow/python/kernel_tests/benchmark_test.py
tensorflow/python/platform/benchmark.py

index 6233436..78b6e38 100644 (file)
@@ -67,7 +67,7 @@ class TestReportingBenchmark(test.Benchmark):
     with session.Session() as sess:
       a = constant_op.constant(0.0)
       a_plus_a = a + a
-      self.run_op_benchmark(
+      return self.run_op_benchmark(
           sess, a_plus_a, min_iters=1000, store_trace=True, name="op_benchmark")
 
 
@@ -148,7 +148,7 @@ class BenchmarkTest(test.TestCase):
       reporting = TestReportingBenchmark()
       reporting.benchmarkReport1()  # This should write
       reporting.benchmarkReport2()  # This should write
-      reporting.benchmark_times_an_op()  # This should write
+      benchmark_values3 = reporting.benchmark_times_an_op()  # This should write
 
       # Check the files were written
       self.assertTrue(gfile.Exists(expected_output_file))
@@ -186,8 +186,12 @@ class BenchmarkTest(test.TestCase):
       self.assertEquals(expected_3.name, read_benchmark_3.name)
       self.assertEquals(expected_3.iters, read_benchmark_3.iters)
       self.assertGreater(read_benchmark_3.wall_time, 0)
-      full_trace = read_benchmark_3.extras["full_trace_chrome_format"]
-      json_trace = json.loads(full_trace.string_value)
+
+      # Trace is not stored in benchmark entry. Instead we get it from
+      # return value of `run_op_benchmark` call.
+      full_trace = benchmark_values3["extras"]["full_trace_chrome_format"]
+      json_trace = json.loads(full_trace)
+
       self.assertTrue(isinstance(json_trace, dict))
       self.assertTrue("traceEvents" in json_trace.keys())
       allocator_keys = [k for k in read_benchmark_3.extras.keys()
index 12dae94..eba2baa 100644 (file)
@@ -213,9 +213,10 @@ class TensorFlowBenchmark(Benchmark):
       burn_iters: Number of burn-in iterations to run.
       min_iters: Minimum number of iterations to use for timing.
       store_trace: Boolean, whether to run an extra untimed iteration and
-        store the trace of iteration in the benchmark report.
+        store the trace of iteration in returned extras.
         The trace will be stored as a string in Google Chrome trace format
-        in the extras field "full_trace_chrome_format".
+        in the extras field "full_trace_chrome_format". Note that trace
+        will not be stored in test_log_pb2.TestResults proto.
       store_memory_usage: Boolean, whether to run an extra untimed iteration,
         calculate memory usage, and store that in extras fields.
       name: (optional) Override the BenchmarkEntry name with `name`.
@@ -227,7 +228,9 @@ class TensorFlowBenchmark(Benchmark):
 
     Returns:
       A `dict` containing the key-value pairs that were passed to
-      `report_benchmark`.
+      `report_benchmark`. If `store_trace` option is used, then
+      `full_chrome_trace_format` will be included in return dictionary even
+      though it is not passed to `report_benchmark` with `extras`.
     """
     for _ in range(burn_iters):
       sess.run(op_or_tensor, feed_dict=feed_dict)
@@ -242,6 +245,7 @@ class TensorFlowBenchmark(Benchmark):
       deltas[i] = delta
 
     extras = extras if extras is not None else {}
+    unreported_extras = {}
     if store_trace or store_memory_usage:
       run_options = config_pb2.RunOptions(
           trace_level=config_pb2.RunOptions.FULL_TRACE)
@@ -251,7 +255,8 @@ class TensorFlowBenchmark(Benchmark):
       tl = timeline.Timeline(run_metadata.step_stats)
 
       if store_trace:
-        extras["full_trace_chrome_format"] = tl.generate_chrome_trace_format()
+        unreported_extras["full_trace_chrome_format"] = (
+            tl.generate_chrome_trace_format())
 
       if store_memory_usage:
         step_stats_analysis = tl.analyze_step_stats(show_memory=True)
@@ -277,6 +282,7 @@ class TensorFlowBenchmark(Benchmark):
         "throughput": mbs / median_delta
     }
     self.report_benchmark(**benchmark_values)
+    benchmark_values["extras"].update(unreported_extras)
     return benchmark_values