Let test runner rerun failures to test for flakes.

author machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)

committer machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)
author machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)
committer machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)
diff --git a/tools/run-deopt-fuzzer.py b/tools/run-deopt-fuzzer.py

index eafed01..2b9fe1e 100755 (executable)
--- a/tools/run-deopt-fuzzer.py
+++ b/tools/run-deopt-fuzzer.py
@@ -369,9 +369,11 @@ def Execute(arch, mode, args, options, suites, workspace):
                          timeout, options.isolates,
                          options.command_prefix,
                          options.extra_flags,
-                        False,
+                        False,  # Keep i18n on by default.
                          options.random_seed,
-                        True)
+                        True,  # No sorting of test cases.
+                        0,  # Don't rerun failing tests.
+                        0)  # No use of a rerun-failing-tests maximum.
  
    # Find available test suites and read test cases from them.
    variables = {
diff --git a/tools/run-tests.py b/tools/run-tests.py

index fbbe416..03ad716 100755 (executable)
--- a/tools/run-tests.py
+++ b/tools/run-tests.py
@@ -181,6 +181,13 @@ def BuildOptions():
                      default=False, action="store_true")
    result.add_option("--json-test-results",
                      help="Path to a file for storing json results.")
+  result.add_option("--rerun-failures-count",
+                    help=("Number of times to rerun each failing test case. "
+                          "Very slow tests will be rerun only once."),
+                    default=0, type="int")
+  result.add_option("--rerun-failures-max",
+                    help="Maximum number of failing test cases to rerun.",
+                    default=100, type="int")
    result.add_option("--shard-count",
                      help="Split testsuites into this number of shards",
                      default=1, type="int")
@@ -416,7 +423,9 @@ def Execute(arch, mode, args, options, suites, workspace):
                          options.extra_flags,
                          options.no_i18n,
                          options.random_seed,
-                        options.no_sorting)
+                        options.no_sorting,
+                        options.rerun_failures_count,
+                        options.rerun_failures_max)
  
    # TODO(all): Combine "simulator" and "simulator_run".
    simulator_run = not options.dont_skip_simulator_slow_tests and \
diff --git a/tools/testrunner/local/execution.py b/tools/testrunner/local/execution.py

index 79f856c..939995c 100644 (file)
--- a/tools/testrunner/local/execution.py
+++ b/tools/testrunner/local/execution.py
@@ -81,6 +81,7 @@ class Runner(object):
      self.remaining = num_tests
      self.failed = []
      self.crashed = 0
+    self.reran_tests = 0
  
    def _RunPerfSafe(self, fun):
      try:
@@ -89,6 +90,42 @@ class Runner(object):
        print("PerfData exception: %s" % e)
        self.perf_failures = True
  
+  def _GetJob(self, test):
+    command = self.GetCommand(test)
+    timeout = self.context.timeout
+    if ("--stress-opt" in test.flags or
+        "--stress-opt" in self.context.mode_flags or
+        "--stress-opt" in self.context.extra_flags):
+      timeout *= 4
+    if test.dependency is not None:
+      dep_command = [ c.replace(test.path, test.dependency) for c in command ]
+    else:
+      dep_command = None
+    return Job(command, dep_command, test.id, timeout, self.context.verbose)
+
+  def _MaybeRerun(self, pool, test):
+    if test.run <= self.context.rerun_failures_count:
+      # Possibly rerun this test if its run count is below the maximum per
+      # test.
+      if test.run == 1:
+        # Count the overall number of reran tests on the first rerun.
+        if self.reran_tests < self.context.rerun_failures_max:
+          self.reran_tests += 1
+        else:
+          # Don't rerun this if the overall number of rerun tests has been
+          # reached.
+          return
+      if test.run >= 2 and test.duration > self.context.timeout / 20:
+        # Rerun slow tests at most once.
+        return
+
+      # Rerun this test.
+      test.duration = None
+      test.output = None
+      test.run += 1
+      pool.add([self._GetJob(test)])
+      self.remaining += 1
+
    def Run(self, jobs):
      self.indicator.Starting()
      self._RunInternal(jobs)
@@ -109,23 +146,12 @@ class Runner(object):
        assert test.id >= 0
        test_map[test.id] = test
        try:
-        command = self.GetCommand(test)
+        queue.append([self._GetJob(test)])
        except Exception, e:
          # If this failed, save the exception and re-raise it later (after
          # all other tests have had a chance to run).
          queued_exception = e
          continue
-      timeout = self.context.timeout
-      if ("--stress-opt" in test.flags or
-          "--stress-opt" in self.context.mode_flags or
-          "--stress-opt" in self.context.extra_flags):
-        timeout *= 4
-      if test.dependency is not None:
-        dep_command = [ c.replace(test.path, test.dependency) for c in command ]
-      else:
-        dep_command = None
-      job = Job(command, dep_command, test.id, timeout, self.context.verbose)
-      queue.append([job])
      try:
        it = pool.imap_unordered(RunTest, queue)
        for result in it:
@@ -143,6 +169,9 @@ class Runner(object):
            self.succeeded += 1
          self.remaining -= 1
          self.indicator.HasRun(test, has_unexpected_output)
+        if has_unexpected_output:
+          # Rerun test failures after the indicator has processed the results.
+          self._MaybeRerun(pool, test)
      finally:
        pool.terminate()
        self._RunPerfSafe(lambda: self.perf_data_manager.close())
diff --git a/tools/testrunner/objects/context.py b/tools/testrunner/objects/context.py

index f8f764b..d288e0e 100644 (file)
--- a/tools/testrunner/objects/context.py
+++ b/tools/testrunner/objects/context.py
@@ -29,7 +29,7 @@
  class Context():
    def __init__(self, arch, mode, shell_dir, mode_flags, verbose, timeout,
                 isolates, command_prefix, extra_flags, noi18n, random_seed,
-               no_sorting):
+               no_sorting, rerun_failures_count, rerun_failures_max):
      self.arch = arch
      self.mode = mode
      self.shell_dir = shell_dir
@@ -42,15 +42,18 @@ class Context():
      self.noi18n = noi18n
      self.random_seed = random_seed
      self.no_sorting = no_sorting
+    self.rerun_failures_count = rerun_failures_count
+    self.rerun_failures_max = rerun_failures_max
  
    def Pack(self):
      return [self.arch, self.mode, self.mode_flags, self.timeout, self.isolates,
              self.command_prefix, self.extra_flags, self.noi18n,
-            self.random_seed, self.no_sorting]
+            self.random_seed, self.no_sorting, self.rerun_failures_count,
+            self.rerun_failures_max]
  
    @staticmethod
    def Unpack(packed):
      # For the order of the fields, refer to Pack() above.
      return Context(packed[0], packed[1], None, packed[2], False,
                     packed[3], packed[4], packed[5], packed[6], packed[7],
-                   packed[8], packed[9])
+                   packed[8], packed[9], packed[10], packed[11])
diff --git a/tools/testrunner/objects/testcase.py b/tools/testrunner/objects/testcase.py

index cfc522e..ca82606 100644 (file)
--- a/tools/testrunner/objects/testcase.py
+++ b/tools/testrunner/objects/testcase.py
@@ -38,6 +38,7 @@ class TestCase(object):
      self.output = None
      self.id = None  # int, used to map result back to TestCase instance
      self.duration = None  # assigned during execution
+    self.run = 1  # The nth time this test is executed.
  
    def CopyAddingFlags(self, flags):
      copy = TestCase(self.suite, self.path, self.flags + flags, self.dependency)
@@ -60,6 +61,7 @@ class TestCase(object):
      test = TestCase(str(task[0]), task[1], task[2], task[3])
      test.outcomes = set(task[4])
      test.id = task[5]
+    test.run = 1
      return test
  
    def SetSuiteObject(self, suites):
author	machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)
committer	machenbach@chromium.org <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Wed, 2 Jul 2014 08:15:44 +0000 (08:15 +0000)
tools/run-deopt-fuzzer.py		patch \| blob \| history
tools/run-tests.py		patch \| blob \| history
tools/testrunner/local/execution.py		patch \| blob \| history
tools/testrunner/objects/context.py		patch \| blob \| history
tools/testrunner/objects/testcase.py		patch \| blob \| history