From 522beebb1a4d6b3eb4d7938ee893e1773a8a9c66 Mon Sep 17 00:00:00 2001 From: "machenbach@chromium.org" Date: Wed, 16 Jul 2014 08:53:46 +0000 Subject: [PATCH] Allow benchmarks to provide the standard deviation. Some benchmarks include their own runner which provides an overall average and a standard deviation. This enables extraction of that value similar to the other measurements. These benchmarks should only be run once. If a benchmarks specifies multiple runs and provides a standard deviation, a warning will be issued that makes the build fail on the buildbot side. TEST=python -m unittest run_benchmarks_test BUG=393947 LOG=n R=jkummerow@chromium.org Review URL: https://codereview.chromium.org/395633012 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22424 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- tools/run_benchmarks.py | 20 ++++++++++ tools/unittests/run_benchmarks_test.py | 71 +++++++++++++++++++++++++++------- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/tools/run_benchmarks.py b/tools/run_benchmarks.py index 1a07025..4c72eeb 100755 --- a/tools/run_benchmarks.py +++ b/tools/run_benchmarks.py @@ -156,6 +156,7 @@ class DefaultSentinel(Node): self.flags = [] self.resources = [] self.results_regexp = None + self.stddev_regexp = None self.units = "score" @@ -196,6 +197,13 @@ class Graph(Node): regexp_default = None self.results_regexp = suite.get("results_regexp", regexp_default) + # A similar regular expression for the standard deviation (optional). + if parent.stddev_regexp: + stddev_default = parent.stddev_regexp % suite["name"] + else: + stddev_default = None + self.stddev_regexp = suite.get("stddev_regexp", stddev_default) + class Trace(Graph): """Represents a leaf in the benchmark suite tree structure. @@ -207,6 +215,7 @@ class Trace(Graph): assert self.results_regexp self.results = [] self.errors = [] + self.stddev = "" def ConsumeOutput(self, stdout): try: @@ -216,11 +225,22 @@ class Trace(Graph): self.errors.append("Regexp \"%s\" didn't match for benchmark %s." % (self.results_regexp, self.graphs[-1])) + try: + if self.stddev_regexp and self.stddev: + self.errors.append("Benchmark %s should only run once since a stddev " + "is provided by the benchmark." % self.graphs[-1]) + if self.stddev_regexp: + self.stddev = re.search(self.stddev_regexp, stdout, re.M).group(1) + except: + self.errors.append("Regexp \"%s\" didn't match for benchmark %s." + % (self.stddev_regexp, self.graphs[-1])) + def GetResults(self): return Results([{ "graphs": self.graphs, "units": self.units, "results": self.results, + "stddev": self.stddev, }], self.errors) diff --git a/tools/unittests/run_benchmarks_test.py b/tools/unittests/run_benchmarks_test.py index f627d43..37a816e 100644 --- a/tools/unittests/run_benchmarks_test.py +++ b/tools/unittests/run_benchmarks_test.py @@ -135,8 +135,9 @@ class BenchmarksTest(unittest.TestCase): self.assertEquals([ {"units": units, "graphs": [suite, trace["name"]], - "results": trace["results"]} for trace in traces], - self._LoadResults()["traces"]) + "results": trace["results"], + "stddev": trace["stddev"]} for trace in traces], + self._LoadResults()["traces"]) def _VerifyErrors(self, errors): self.assertEquals(errors, self._LoadResults()["errors"]) @@ -159,8 +160,8 @@ class BenchmarksTest(unittest.TestCase): self._MockCommand(["."], ["x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n"]) self.assertEquals(0, self._CallMain()) self._VerifyResults("test", "score", [ - {"name": "Richards", "results": ["1.234"]}, - {"name": "DeltaBlue", "results": ["10657567"]}, + {"name": "Richards", "results": ["1.234"], "stddev": ""}, + {"name": "DeltaBlue", "results": ["10657567"], "stddev": ""}, ]) self._VerifyErrors([]) self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js") @@ -176,8 +177,8 @@ class BenchmarksTest(unittest.TestCase): "Richards: 50\nDeltaBlue: 300\n"]) self.assertEquals(0, self._CallMain()) self._VerifyResults("v8", "ms", [ - {"name": "Richards", "results": ["50", "100"]}, - {"name": "DeltaBlue", "results": ["300", "200"]}, + {"name": "Richards", "results": ["50", "100"], "stddev": ""}, + {"name": "DeltaBlue", "results": ["300", "200"], "stddev": ""}, ]) self._VerifyErrors([]) self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js") @@ -194,8 +195,8 @@ class BenchmarksTest(unittest.TestCase): "Richards: 50\nDeltaBlue: 300\n"]) self.assertEquals(0, self._CallMain()) self._VerifyResults("test", "score", [ - {"name": "Richards", "results": ["50", "100"]}, - {"name": "DeltaBlue", "results": ["300", "200"]}, + {"name": "Richards", "results": ["50", "100"], "stddev": ""}, + {"name": "DeltaBlue", "results": ["300", "200"], "stddev": ""}, ]) self._VerifyErrors([]) self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js") @@ -213,13 +214,16 @@ class BenchmarksTest(unittest.TestCase): self.assertEquals([ {"units": "score", "graphs": ["test", "Richards"], - "results": ["50", "100"]}, + "results": ["50", "100"], + "stddev": ""}, {"units": "ms", "graphs": ["test", "Sub", "Leaf"], - "results": ["3", "2", "1"]}, + "results": ["3", "2", "1"], + "stddev": ""}, {"units": "score", "graphs": ["test", "DeltaBlue"], - "results": ["200"]}, + "results": ["200"], + "stddev": ""}, ], self._LoadResults()["traces"]) self._VerifyErrors([]) self._VerifyMockMultiple( @@ -232,13 +236,50 @@ class BenchmarksTest(unittest.TestCase): (path.join("out", "x64.release", "d8"), "--flag", "run.js"), (path.join("out", "x64.release", "d8"), "--flag", "--flag2", "run.js")) + def testOneRunStdDevRegExp(self): + test_input = dict(V8_JSON) + test_input["stddev_regexp"] = "^%s\-stddev: (.+)$" + self._WriteTestInput(test_input) + self._MockCommand(["."], ["Richards: 1.234\nRichards-stddev: 0.23\n" + "DeltaBlue: 10657567\nDeltaBlue-stddev: 106\n"]) + self.assertEquals(0, self._CallMain()) + self._VerifyResults("test", "score", [ + {"name": "Richards", "results": ["1.234"], "stddev": "0.23"}, + {"name": "DeltaBlue", "results": ["10657567"], "stddev": "106"}, + ]) + self._VerifyErrors([]) + self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js") + + def testTwoRunsStdDevRegExp(self): + test_input = dict(V8_JSON) + test_input["stddev_regexp"] = "^%s\-stddev: (.+)$" + test_input["run_count"] = 2 + self._WriteTestInput(test_input) + self._MockCommand(["."], ["Richards: 3\nRichards-stddev: 0.7\n" + "DeltaBlue: 6\nDeltaBlue-boom: 0.9\n", + "Richards: 2\nRichards-stddev: 0.5\n" + "DeltaBlue: 5\nDeltaBlue-stddev: 0.8\n"]) + self.assertEquals(1, self._CallMain()) + self._VerifyResults("test", "score", [ + {"name": "Richards", "results": ["2", "3"], "stddev": "0.7"}, + {"name": "DeltaBlue", "results": ["5", "6"], "stddev": "0.8"}, + ]) + self._VerifyErrors( + ["Benchmark Richards should only run once since a stddev is provided " + "by the benchmark.", + "Benchmark DeltaBlue should only run once since a stddev is provided " + "by the benchmark.", + "Regexp \"^DeltaBlue\-stddev: (.+)$\" didn't match for benchmark " + "DeltaBlue."]) + self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js") + def testBuildbot(self): self._WriteTestInput(V8_JSON) self._MockCommand(["."], ["Richards: 1.234\nDeltaBlue: 10657567\n"]) self.assertEquals(0, self._CallMain("--buildbot")) self._VerifyResults("test", "score", [ - {"name": "Richards", "results": ["1.234"]}, - {"name": "DeltaBlue", "results": ["10657567"]}, + {"name": "Richards", "results": ["1.234"], "stddev": ""}, + {"name": "DeltaBlue", "results": ["10657567"], "stddev": ""}, ]) self._VerifyErrors([]) self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js") @@ -248,8 +289,8 @@ class BenchmarksTest(unittest.TestCase): self._MockCommand(["."], ["x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n"]) self.assertEquals(1, self._CallMain()) self._VerifyResults("test", "score", [ - {"name": "Richards", "results": []}, - {"name": "DeltaBlue", "results": ["10657567"]}, + {"name": "Richards", "results": [], "stddev": ""}, + {"name": "DeltaBlue", "results": ["10657567"], "stddev": ""}, ]) self._VerifyErrors( ["Regexp \"^Richards: (.+)$\" didn't match for benchmark Richards."]) -- 2.7.4