Add new benchmark suite runner.
authormachenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 28 May 2014 13:05:17 +0000 (13:05 +0000)
committermachenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 28 May 2014 13:05:17 +0000 (13:05 +0000)
TEST=./tools/run_benchmarks.py benchmarks/v8.json
TEST=cd tools/unittests; python -m unittest run_benchmarks_test

Does not support custom results processors yet. Will implement that in a future CL.

BUG=374740
LOG=n
R=jkummerow@chromium.org

Review URL: https://codereview.chromium.org/293023006

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@21570 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

benchmarks/v8.json [new file with mode: 0644]
tools/run_benchmarks.py [new file with mode: 0755]
tools/unittests/run_benchmarks_test.py [new file with mode: 0644]

diff --git a/benchmarks/v8.json b/benchmarks/v8.json
new file mode 100644 (file)
index 0000000..f4210d9
--- /dev/null
@@ -0,0 +1,16 @@
+{
+  "path": ["."],
+  "main": "run.js",
+  "run_count": 2,
+  "results_regexp": "^%s: (.+)$",
+  "benchmarks": [
+    {"name": "Richards"},
+    {"name": "DeltaBlue"},
+    {"name": "Crypto"},
+    {"name": "RayTrace"},
+    {"name": "EarleyBoyer"},
+    {"name": "RegExp"},
+    {"name": "Splay"},
+    {"name": "NavierStokes"}
+  ]
+}
diff --git a/tools/run_benchmarks.py b/tools/run_benchmarks.py
new file mode 100755 (executable)
index 0000000..42b5bf5
--- /dev/null
@@ -0,0 +1,399 @@
+#!/usr/bin/env python
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""
+Performance runner for d8.
+
+Call e.g. with tools/run-benchmarks.py --arch ia32 some_suite.json
+
+The suite json format is expected to be:
+{
+  "path": <relative path chunks to benchmark resources and main file>,
+  "name": <optional suite name, file name is default>,
+  "archs": [<architecture name for which this suite is run>, ...],
+  "binary": <name of binary to run, default "d8">,
+  "flags": [<flag to d8>, ...],
+  "run_count": <how often will this suite run (optional)>,
+  "run_count_XXX": <how often will this suite run for arch XXX (optional)>,
+  "resources": [<js file to be loaded before main>, ...]
+  "main": <main js benchmark runner file>,
+  "results_regexp": <optional regexp>,
+  "results_processor": <optional python results processor script>,
+  "units": <the unit specification for the performance dashboard>,
+  "benchmarks": [
+    {
+      "name": <name of the benchmark>,
+      "results_regexp": <optional more specific regexp>,
+      "results_processor": <optional python results processor script>,
+      "units": <the unit specification for the performance dashboard>,
+    }, ...
+  ]
+}
+
+The benchmarks field can also nest other suites in arbitrary depth. A suite
+with a "main" file is a leaf suite that can contain one more level of
+benchmarks.
+
+A suite's results_regexp is expected to have one string place holder
+"%s" for the benchmark name. A benchmark's results_regexp overwrites suite
+defaults.
+
+A suite's results_processor may point to an optional python script. If
+specified, it is called after running the benchmarks like this (with a path
+relatve to the suite level's path):
+<results_processor file> <same flags as for d8> <suite level name> <output>
+
+The <output> is a temporary file containing d8 output. The results_regexp will
+be applied to the output of this script.
+
+A suite without "benchmarks" is considered a benchmark itself.
+
+Full example (suite with one runner):
+{
+  "path": ["."],
+  "flags": ["--expose-gc"],
+  "archs": ["ia32", "x64"],
+  "run_count": 5,
+  "run_count_ia32": 3,
+  "main": "run.js",
+  "results_regexp": "^%s: (.+)$",
+  "units": "score",
+  "benchmarks": [
+    {"name": "Richards"},
+    {"name": "DeltaBlue"},
+    {"name": "NavierStokes",
+     "results_regexp": "^NavierStokes: (.+)$"}
+  ]
+}
+
+Full example (suite with several runners):
+{
+  "path": ["."],
+  "flags": ["--expose-gc"],
+  "archs": ["ia32", "x64"],
+  "run_count": 5,
+  "units": "score",
+  "benchmarks": [
+    {"name": "Richards",
+     "path": ["richards"],
+     "main": "run.js",
+     "run_count": 3,
+     "results_regexp": "^Richards: (.+)$"},
+    {"name": "NavierStokes",
+     "path": ["navier_stokes"],
+     "main": "run.js",
+     "results_regexp": "^NavierStokes: (.+)$"}
+  ]
+}
+
+Path pieces are concatenated. D8 is always run with the suite's path as cwd.
+"""
+
+import json
+import optparse
+import os
+import re
+import sys
+
+from testrunner.local import commands
+from testrunner.local import utils
+
+ARCH_GUESS = utils.DefaultArch()
+SUPPORTED_ARCHS = ["android_arm",
+                   "android_arm64",
+                   "android_ia32",
+                   "arm",
+                   "ia32",
+                   "mips",
+                   "mipsel",
+                   "nacl_ia32",
+                   "nacl_x64",
+                   "x64",
+                   "arm64"]
+
+
+class Results(object):
+  """Place holder for result traces."""
+  def __init__(self, traces=None, errors=None):
+    self.traces = traces or []
+    self.errors = errors or []
+
+  def ToDict(self):
+    return {"traces": self.traces, "errors": self.errors}
+
+  def WriteToFile(self, file_name):
+    with open(file_name, "w") as f:
+      f.write(json.dumps(self.ToDict()))
+
+  def __add__(self, other):
+    self.traces += other.traces
+    self.errors += other.errors
+    return self
+
+  def __str__(self):  # pragma: no cover
+    return str(self.ToDict())
+
+
+class Node(object):
+  """Represents a node in the benchmark suite tree structure."""
+  def __init__(self, *args):
+    self._children = []
+
+  def AppendChild(self, child):
+    self._children.append(child)
+
+
+class DefaultSentinel(Node):
+  """Fake parent node with all default values."""
+  def __init__(self):
+    super(DefaultSentinel, self).__init__()
+    self.binary = "d8"
+    self.run_count = 10
+    self.path = []
+    self.graphs = []
+    self.flags = []
+    self.resources = []
+    self.results_regexp = None
+    self.units = "score"
+
+
+class Graph(Node):
+  """Represents a benchmark suite definition.
+
+  Can either be a leaf or an inner node that provides default values.
+  """
+  def __init__(self, suite, parent, arch):
+    super(Graph, self).__init__()
+    self._suite = suite
+
+    assert isinstance(suite.get("path", []), list)
+    assert isinstance(suite["name"], basestring)
+    assert isinstance(suite.get("flags", []), list)
+    assert isinstance(suite.get("resources", []), list)
+
+    # Accumulated values.
+    self.path = parent.path[:] + suite.get("path", [])
+    self.graphs = parent.graphs[:] + [suite["name"]]
+    self.flags = parent.flags[:] + suite.get("flags", [])
+    self.resources = parent.resources[:] + suite.get("resources", [])
+
+    # Descrete values (with parent defaults).
+    self.binary = suite.get("binary", parent.binary)
+    self.run_count = suite.get("run_count", parent.run_count)
+    self.run_count = suite.get("run_count_%s" % arch, self.run_count)
+    self.units = suite.get("units", parent.units)
+
+    # A regular expression for results. If the parent graph provides a
+    # regexp and the current suite has none, a string place holder for the
+    # suite name is expected.
+    # TODO(machenbach): Currently that makes only sense for the leaf level.
+    # Multiple place holders for multiple levels are not supported.
+    if parent.results_regexp:
+      regexp_default = parent.results_regexp % suite["name"]
+    else:
+      regexp_default = None
+    self.results_regexp = suite.get("results_regexp", regexp_default)
+
+
+class Trace(Graph):
+  """Represents a leaf in the benchmark suite tree structure.
+
+  Handles collection of measurements.
+  """
+  def __init__(self, suite, parent, arch):
+    super(Trace, self).__init__(suite, parent, arch)
+    assert self.results_regexp
+    self.results = []
+    self.errors = []
+
+  def ConsumeOutput(self, stdout):
+    try:
+      self.results.append(
+          re.search(self.results_regexp, stdout, re.M).group(1))
+    except:
+      self.errors.append("Regexp \"%s\" didn't match for benchmark %s."
+                         % (self.results_regexp, self.graphs[-1]))
+
+  def GetResults(self):
+    return Results([{
+      "graphs": self.graphs,
+      "units": self.units,
+      "results": self.results,
+    }], self.errors)
+
+
+class Runnable(Graph):
+  """Represents a runnable benchmark suite definition (i.e. has a main file).
+  """
+  @property
+  def main(self):
+    return self._suite["main"]
+
+  def ChangeCWD(self, suite_path):
+    """Changes the cwd to to path defined in the current graph.
+
+    The benchmarks are supposed to be relative to the suite configuration.
+    """
+    suite_dir = os.path.abspath(os.path.dirname(suite_path))
+    bench_dir = os.path.normpath(os.path.join(*self.path))
+    os.chdir(os.path.join(suite_dir, bench_dir))
+
+  def GetCommand(self, shell_dir):
+    # TODO(machenbach): This requires +.exe if run on windows.
+    return (
+      [os.path.join(shell_dir, self.binary)] +
+      self.flags +
+      self.resources +
+      [self.main]
+    )
+
+  def Run(self, runner):
+    """Iterates over several runs and handles the output for all traces."""
+    for stdout in runner():
+      for trace in self._children:
+        trace.ConsumeOutput(stdout)
+    return reduce(lambda r, t: r + t.GetResults(), self._children, Results())
+
+
+class RunnableTrace(Trace, Runnable):
+  """Represents a runnable benchmark suite definition that is a leaf."""
+  def __init__(self, suite, parent, arch):
+    super(RunnableTrace, self).__init__(suite, parent, arch)
+
+  def Run(self, runner):
+    """Iterates over several runs and handles the output."""
+    for stdout in runner():
+      self.ConsumeOutput(stdout)
+    return self.GetResults()
+
+
+def MakeGraph(suite, arch, parent):
+  """Factory method for making graph objects."""
+  if isinstance(parent, Runnable):
+    # Below a runnable can only be traces.
+    return Trace(suite, parent, arch)
+  elif suite.get("main"):
+    # A main file makes this graph runnable.
+    if suite.get("benchmarks"):
+      # This graph has subbenchmarks (traces).
+      return Runnable(suite, parent, arch)
+    else:
+      # This graph has no subbenchmarks, it's a leaf.
+      return RunnableTrace(suite, parent, arch)
+  elif suite.get("benchmarks"):
+    # This is neither a leaf nor a runnable.
+    return Graph(suite, parent, arch)
+  else:  # pragma: no cover
+    raise Exception("Invalid benchmark suite configuration.")
+
+
+def BuildGraphs(suite, arch, parent=None):
+  """Builds a tree structure of graph objects that corresponds to the suite
+  configuration.
+  """
+  parent = parent or DefaultSentinel()
+
+  # TODO(machenbach): Implement notion of cpu type?
+  if arch not in suite.get("archs", ["ia32", "x64"]):
+    return None
+
+  graph = MakeGraph(suite, arch, parent)
+  for subsuite in suite.get("benchmarks", []):
+    BuildGraphs(subsuite, arch, graph)
+  parent.AppendChild(graph)
+  return graph
+
+
+def FlattenRunnables(node):
+  """Generator that traverses the tree structure and iterates over all
+  runnables.
+  """
+  if isinstance(node, Runnable):
+    yield node
+  elif isinstance(node, Node):
+    for child in node._children:
+      for result in FlattenRunnables(child):
+        yield result
+  else:  # pragma: no cover
+    raise Exception("Invalid benchmark suite configuration.")
+
+
+# TODO: Implement results_processor.
+def Main(args):
+  parser = optparse.OptionParser()
+  parser.add_option("--arch",
+                    help=("The architecture to run tests for, "
+                          "'auto' or 'native' for auto-detect"),
+                    default="x64")
+  parser.add_option("--buildbot",
+                    help="Adapt to path structure used on buildbots",
+                    default=False, action="store_true")
+  parser.add_option("--json-test-results",
+                    help="Path to a file for storing json results.")
+  parser.add_option("--outdir", help="Base directory with compile output",
+                    default="out")
+  (options, args) = parser.parse_args(args)
+
+  if len(args) == 0:  # pragma: no cover
+    parser.print_help()
+    return 1
+
+  if options.arch in ["auto", "native"]:  # pragma: no cover
+    options.arch = ARCH_GUESS
+
+  if not options.arch in SUPPORTED_ARCHS:  # pragma: no cover
+    print "Unknown architecture %s" % options.arch
+    return False
+
+  workspace = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+  if options.buildbot:
+    shell_dir = os.path.join(workspace, options.outdir, "Release")
+  else:
+    shell_dir = os.path.join(workspace, options.outdir,
+                             "%s.release" % options.arch)
+
+  results = Results()
+  for path in args:
+    path = os.path.abspath(path)
+
+    if not os.path.exists(path):  # pragma: no cover
+      results.errors.append("Benchmark file %s does not exist." % path)
+      continue
+
+    with open(path) as f:
+      suite = json.loads(f.read())
+
+    # If no name is given, default to the file name without .json.
+    suite.setdefault("name", os.path.splitext(os.path.basename(path))[0])
+
+    for runnable in FlattenRunnables(BuildGraphs(suite, options.arch)):
+      print ">>> Running suite: %s" % "/".join(runnable.graphs)
+      runnable.ChangeCWD(path)
+
+      def Runner():
+        """Output generator that reruns several times."""
+        for i in xrange(0, max(1, runnable.run_count)):
+          # TODO(machenbach): Make timeout configurable in the suite definition.
+          # Allow timeout per arch like with run_count per arch.
+          output = commands.Execute(runnable.GetCommand(shell_dir), timeout=60)
+          print ">>> Stdout (#%d):" % (i + 1)
+          print output.stdout
+          if output.stderr:  # pragma: no cover
+            # Print stderr for debugging.
+            print ">>> Stderr (#%d):" % (i + 1)
+            print output.stderr
+          yield output.stdout
+
+      # Let runnable iterate over all runs and handle output.
+      results += runnable.Run(Runner)
+
+  if options.json_test_results:
+    results.WriteToFile(options.json_test_results)
+  else:  # pragma: no cover
+    print results
+
+if __name__ == "__main__":  # pragma: no cover
+  sys.exit(Main(sys.argv[1:]))
diff --git a/tools/unittests/run_benchmarks_test.py b/tools/unittests/run_benchmarks_test.py
new file mode 100644 (file)
index 0000000..746d0ea
--- /dev/null
@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from collections import namedtuple
+import coverage
+import json
+from mock import DEFAULT
+from mock import MagicMock
+import os
+from os import path, sys
+import shutil
+import tempfile
+import unittest
+
+# Requires python-coverage and python-mock. Native python coverage
+# version >= 3.7.1 should be installed to get the best speed.
+
+TEST_WORKSPACE = path.join(tempfile.gettempdir(), "test-v8-run-benchmarks")
+
+V8_JSON = {
+  "path": ["."],
+  "binary": "d7",
+  "flags": ["--flag"],
+  "main": "run.js",
+  "run_count": 1,
+  "results_regexp": "^%s: (.+)$",
+  "benchmarks": [
+    {"name": "Richards"},
+    {"name": "DeltaBlue"},
+  ]
+}
+
+V8_NESTED_SUITES_JSON = {
+  "path": ["."],
+  "flags": ["--flag"],
+  "run_count": 1,
+  "units": "score",
+  "benchmarks": [
+    {"name": "Richards",
+     "path": ["richards"],
+     "binary": "d7",
+     "main": "run.js",
+     "resources": ["file1.js", "file2.js"],
+     "run_count": 2,
+     "results_regexp": "^Richards: (.+)$"},
+    {"name": "Sub",
+     "path": ["sub"],
+     "benchmarks": [
+       {"name": "Leaf",
+        "path": ["leaf"],
+        "run_count_x64": 3,
+        "units": "ms",
+        "main": "run.js",
+        "results_regexp": "^Simple: (.+) ms.$"},
+     ]
+    },
+    {"name": "DeltaBlue",
+     "path": ["delta_blue"],
+     "main": "run.js",
+     "flags": ["--flag2"],
+     "results_regexp": "^DeltaBlue: (.+)$"},
+    {"name": "ShouldntRun",
+     "path": ["."],
+     "archs": ["arm"],
+     "main": "run.js"},
+  ]
+}
+
+Output = namedtuple("Output", "stdout, stderr")
+
+class BenchmarksTest(unittest.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    cls.base = path.dirname(path.dirname(path.abspath(__file__)))
+    sys.path.append(cls.base)
+    cls._cov = coverage.coverage(
+        include=([os.path.join(cls.base, "run_benchmarks.py")]))
+    cls._cov.start()
+    import run_benchmarks
+    from testrunner.local import commands
+    global commands
+    global run_benchmarks
+
+  @classmethod
+  def tearDownClass(cls):
+    cls._cov.stop()
+    print ""
+    print cls._cov.report()
+
+  def setUp(self):
+    self.maxDiff = None
+    if path.exists(TEST_WORKSPACE):
+      shutil.rmtree(TEST_WORKSPACE)
+    os.makedirs(TEST_WORKSPACE)
+
+  def tearDown(self):
+    if path.exists(TEST_WORKSPACE):
+      shutil.rmtree(TEST_WORKSPACE)
+
+  def _WriteTestInput(self, json_content):
+    self._test_input = path.join(TEST_WORKSPACE, "test.json")
+    with open(self._test_input, "w") as f:
+      f.write(json.dumps(json_content))
+
+  def _MockCommand(self, *args):
+    # Fake output for each benchmark run.
+    benchmark_outputs = [Output(stdout=arg, stderr=None) for arg in args[1]]
+    def execute(*args, **kwargs):
+      return benchmark_outputs.pop()
+    commands.Execute = MagicMock(side_effect=execute)
+
+    # Check that d8 is called from the correct cwd for each benchmark run.
+    dirs = [path.join(TEST_WORKSPACE, arg) for arg in args[0]]
+    def chdir(*args, **kwargs):
+      self.assertEquals(dirs.pop(), args[0])
+    os.chdir = MagicMock(side_effect=chdir)
+
+  def _CallMain(self, *args):
+    self._test_output = path.join(TEST_WORKSPACE, "results.json")
+    all_args=[
+      "--json-test-results",
+      self._test_output,
+      self._test_input,
+    ]
+    all_args += args
+    run_benchmarks.Main(all_args)
+
+  def _LoadResults(self):
+    with open(self._test_output) as f:
+      return json.load(f)
+
+  def _VerifyResults(self, suite, units, traces):
+    self.assertEquals([
+      {"units": units,
+       "graphs": [suite, trace["name"]],
+       "results": trace["results"]} for trace in traces],
+        self._LoadResults()["traces"])
+
+  def _VerifyErrors(self, errors):
+    self.assertEquals(errors, self._LoadResults()["errors"])
+
+  def _VerifyMock(self, binary, *args):
+    arg = [path.join(path.dirname(self.base), binary)]
+    arg += args
+    commands.Execute.assert_called_with(arg, timeout=60)
+
+  def _VerifyMockMultiple(self, *args):
+    expected = []
+    for arg in args:
+      a = [path.join(path.dirname(self.base), arg[0])]
+      a += arg[1:]
+      expected.append(((a,), {"timeout": 60}))
+    self.assertEquals(expected, commands.Execute.call_args_list)
+
+  def testOneRun(self):
+    self._WriteTestInput(V8_JSON)
+    self._MockCommand(["."], ["x\nRichards: 1.234\nDeltaBlue: 10657567\ny\n"])
+    self._CallMain()
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": ["1.234"]},
+      {"name": "DeltaBlue", "results": ["10657567"]},
+    ])
+    self._VerifyErrors([])
+    self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
+
+  def testTwoRuns_Units_SuiteName(self):
+    test_input = dict(V8_JSON)
+    test_input["run_count"] = 2
+    test_input["name"] = "v8"
+    test_input["units"] = "ms"
+    self._WriteTestInput(test_input)
+    self._MockCommand([".", "."],
+                      ["Richards: 100\nDeltaBlue: 200\n",
+                       "Richards: 50\nDeltaBlue: 300\n"])
+    self._CallMain()
+    self._VerifyResults("v8", "ms", [
+      {"name": "Richards", "results": ["50", "100"]},
+      {"name": "DeltaBlue", "results": ["300", "200"]},
+    ])
+    self._VerifyErrors([])
+    self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
+
+  def testTwoRuns_SubRegexp(self):
+    test_input = dict(V8_JSON)
+    test_input["run_count"] = 2
+    del test_input["results_regexp"]
+    test_input["benchmarks"][0]["results_regexp"] = "^Richards: (.+)$"
+    test_input["benchmarks"][1]["results_regexp"] = "^DeltaBlue: (.+)$"
+    self._WriteTestInput(test_input)
+    self._MockCommand([".", "."],
+                      ["Richards: 100\nDeltaBlue: 200\n",
+                       "Richards: 50\nDeltaBlue: 300\n"])
+    self._CallMain()
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": ["50", "100"]},
+      {"name": "DeltaBlue", "results": ["300", "200"]},
+    ])
+    self._VerifyErrors([])
+    self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")
+
+  def testNestedSuite(self):
+    self._WriteTestInput(V8_NESTED_SUITES_JSON)
+    self._MockCommand(["delta_blue", "sub/leaf", "richards"],
+                      ["DeltaBlue: 200\n",
+                       "Simple: 1 ms.\n",
+                       "Simple: 2 ms.\n",
+                       "Simple: 3 ms.\n",
+                       "Richards: 100\n",
+                       "Richards: 50\n"])
+    self._CallMain()
+    self.assertEquals([
+      {"units": "score",
+       "graphs": ["test", "Richards"],
+       "results": ["50", "100"]},
+      {"units": "ms",
+       "graphs": ["test", "Sub", "Leaf"],
+       "results": ["3", "2", "1"]},
+      {"units": "score",
+       "graphs": ["test", "DeltaBlue"],
+       "results": ["200"]},
+      ], self._LoadResults()["traces"])
+    self._VerifyErrors([])
+    self._VerifyMockMultiple(
+        (path.join("out", "x64.release", "d7"), "--flag", "file1.js",
+         "file2.js", "run.js"),
+        (path.join("out", "x64.release", "d7"), "--flag", "file1.js",
+         "file2.js", "run.js"),
+        (path.join("out", "x64.release", "d8"), "--flag", "run.js"),
+        (path.join("out", "x64.release", "d8"), "--flag", "run.js"),
+        (path.join("out", "x64.release", "d8"), "--flag", "run.js"),
+        (path.join("out", "x64.release", "d8"), "--flag", "--flag2", "run.js"))
+
+  def testBuildbot(self):
+    self._WriteTestInput(V8_JSON)
+    self._MockCommand(["."], ["Richards: 1.234\nDeltaBlue: 10657567\n"])
+    self._CallMain("--buildbot")
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": ["1.234"]},
+      {"name": "DeltaBlue", "results": ["10657567"]},
+    ])
+    self._VerifyErrors([])
+    self._VerifyMock(path.join("out", "Release", "d7"), "--flag", "run.js")
+
+  def testRegexpNoMatch(self):
+    self._WriteTestInput(V8_JSON)
+    self._MockCommand(["."], ["x\nRichaards: 1.234\nDeltaBlue: 10657567\ny\n"])
+    self._CallMain()
+    self._VerifyResults("test", "score", [
+      {"name": "Richards", "results": []},
+      {"name": "DeltaBlue", "results": ["10657567"]},
+    ])
+    self._VerifyErrors(
+        ["Regexp \"^Richards: (.+)$\" didn't match for benchmark Richards."])
+    self._VerifyMock(path.join("out", "x64.release", "d7"), "--flag", "run.js")