Let benchmark runner add summary traces.

author machenbach@chromium.org <machenbach@chromium.org>

Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)

committer machenbach@chromium.org <machenbach@chromium.org>

Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)
author machenbach@chromium.org <machenbach@chromium.org>
Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)
committer machenbach@chromium.org <machenbach@chromium.org>
Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)
diff --git a/tools/run_benchmarks.py b/tools/run_benchmarks.py

index b74f358..cc0bb2c 100755 (executable)
--- a/tools/run_benchmarks.py
+++ b/tools/run_benchmarks.py
@@ -92,6 +92,7 @@ Path pieces are concatenated. D8 is always run with the suite's path as cwd.
  """
  
  import json
+import math
  import optparse
  import os
  import re
@@ -116,6 +117,16 @@ SUPPORTED_ARCHS = ["android_arm",
  GENERIC_RESULTS_RE = re.compile(
      r"^Trace\(([^\)]+)\), Result\(([^\)]+)\), StdDev\(([^\)]+)\)$")
  
+
+def GeometricMean(values):
+  """Returns the geometric mean of a list of values.
+
+  The mean is calculated using log to avoid overflow.
+  """
+  values = map(float, values)
+  return str(math.exp(sum(map(math.log, values)) / len(values)))
+
+
  class Results(object):
    """Place holder for result traces."""
    def __init__(self, traces=None, errors=None):
@@ -160,6 +171,7 @@ class DefaultSentinel(Node):
      self.results_regexp = None
      self.stddev_regexp = None
      self.units = "score"
+    self.total = False
  
  
  class Graph(Node):
@@ -187,6 +199,7 @@ class Graph(Node):
      self.run_count = suite.get("run_count", parent.run_count)
      self.run_count = suite.get("run_count_%s" % arch, self.run_count)
      self.units = suite.get("units", parent.units)
+    self.total = suite.get("total", parent.total)
  
      # A regular expression for results. If the parent graph provides a
      # regexp and the current suite has none, a string place holder for the
@@ -276,8 +289,29 @@ class Runnable(Graph):
      for stdout in runner():
        for trace in self._children:
          trace.ConsumeOutput(stdout)
-    return reduce(lambda r, t: r + t.GetResults(), self._children, Results())
-
+    res = reduce(lambda r, t: r + t.GetResults(), self._children, Results())
+
+    if not res.traces or not self.total:
+      return res
+
+    # Assume all traces have the same structure.
+    if len(set(map(lambda t: len(t["results"]), res.traces))) != 1:
+      res.errors.append("Not all traces have the same number of results.")
+      return res
+
+    # Calculate the geometric means for all traces. Above we made sure that
+    # there is at least one trace and that the number of results is the same
+    # for each trace.
+    n_results = len(res.traces[0]["results"])
+    total_results = [GeometricMean(t["results"][i] for t in res.traces)
+                     for i in range(0, n_results)]
+    res.traces.append({
+      "graphs": self.graphs + ["Total"],
+      "units": res.traces[0]["units"],
+      "results": total_results,
+      "stddev": "",
+    })
+    return res
  
  class RunnableTrace(Trace, Runnable):
    """Represents a runnable benchmark suite definition that is a leaf."""
diff --git a/tools/unittests/run_benchmarks_test.py b/tools/unittests/run_benchmarks_test.py

index 0545220..d170252 100644 (file)
--- a/tools/unittests/run_benchmarks_test.py
+++ b/tools/unittests/run_benchmarks_test.py
@@ -293,6 +293,35 @@ class BenchmarksTest(unittest.TestCase):
      self._VerifyErrors([])
      self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js")
  
+  def testBuildbotWithTotal(self):
+    test_input = dict(V8_JSON)
+    test_input["total"] = True
+    self._WriteTestInput(test_input)
+    self._MockCommand(["."], ["Richards: 1.234\nDeltaBlue: 10657567\n"])
+    self.assertEquals(0, self._CallMain("--buildbot"))
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": ["1.234"], "stddev": ""},
+      {"name": "DeltaBlue", "results": ["10657567"], "stddev": ""},
+      {"name": "Total", "results": ["3626.49109719"], "stddev": ""},
+    ])
+    self._VerifyErrors([])
+    self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js")
+
+  def testBuildbotWithTotalAndErrors(self):
+    test_input = dict(V8_JSON)
+    test_input["total"] = True
+    self._WriteTestInput(test_input)
+    self._MockCommand(["."], ["x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n"])
+    self.assertEquals(1, self._CallMain("--buildbot"))
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": [], "stddev": ""},
+      {"name": "DeltaBlue", "results": ["10657567"], "stddev": ""},
+    ])
+    self._VerifyErrors(
+        ["Regexp \"^Richards: (.+)$\" didn't match for benchmark Richards.",
+         "Not all traces have the same number of results."])
+    self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js")
+
    def testRegexpNoMatch(self):
      self._WriteTestInput(V8_JSON)
      self._MockCommand(["."], ["x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n"])
author	machenbach@chromium.org <machenbach@chromium.org>
	Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)
committer	machenbach@chromium.org <machenbach@chromium.org>
	Thu, 28 Aug 2014 14:42:24 +0000 (14:42 +0000)
tools/run_benchmarks.py		patch \| blob \| history
tools/unittests/run_benchmarks_test.py		patch \| blob \| history