From ae017be69cee5da92256b8a9a2258c26562238aa Mon Sep 17 00:00:00 2001
From: "machenbach@chromium.org"
 <machenbach@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Wed, 2 Jul 2014 08:15:44 +0000
Subject: [PATCH] Let test runner rerun failures to test for flakes.

When enabled, this dynamically adds jobs that had failures back to the pool. Special json output for flakes will be handled in a separate CL.

BUG=374134
LOG=n
R=jkummerow@chromium.org

Review URL: https://codereview.chromium.org/360113003

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22143 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 tools/run-deopt-fuzzer.py            |  6 ++--
 tools/run-tests.py                   | 11 +++++++-
 tools/testrunner/local/execution.py  | 53 ++++++++++++++++++++++++++++--------
 tools/testrunner/objects/context.py  |  9 ++++--
 tools/testrunner/objects/testcase.py |  2 ++
 5 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/tools/run-deopt-fuzzer.py b/tools/run-deopt-fuzzer.py
index eafed01..2b9fe1e 100755
--- a/tools/run-deopt-fuzzer.py
+++ b/tools/run-deopt-fuzzer.py
@@ -369,9 +369,11 @@ def Execute(arch, mode, args, options, suites, workspace):
                         timeout, options.isolates,
                         options.command_prefix,
                         options.extra_flags,
-                        False,
+                        False,  # Keep i18n on by default.
                         options.random_seed,
-                        True)
+                        True,  # No sorting of test cases.
+                        0,  # Don't rerun failing tests.
+                        0)  # No use of a rerun-failing-tests maximum.
 
   # Find available test suites and read test cases from them.
   variables = {
diff --git a/tools/run-tests.py b/tools/run-tests.py
index fbbe416..03ad716 100755
--- a/tools/run-tests.py
+++ b/tools/run-tests.py
@@ -181,6 +181,13 @@ def BuildOptions():
                     default=False, action="store_true")
   result.add_option("--json-test-results",
                     help="Path to a file for storing json results.")
+  result.add_option("--rerun-failures-count",
+                    help=("Number of times to rerun each failing test case. "
+                          "Very slow tests will be rerun only once."),
+                    default=0, type="int")
+  result.add_option("--rerun-failures-max",
+                    help="Maximum number of failing test cases to rerun.",
+                    default=100, type="int")
   result.add_option("--shard-count",
                     help="Split testsuites into this number of shards",
                     default=1, type="int")
@@ -416,7 +423,9 @@ def Execute(arch, mode, args, options, suites, workspace):
                         options.extra_flags,
                         options.no_i18n,
                         options.random_seed,
-                        options.no_sorting)
+                        options.no_sorting,
+                        options.rerun_failures_count,
+                        options.rerun_failures_max)
 
   # TODO(all): Combine "simulator" and "simulator_run".
   simulator_run = not options.dont_skip_simulator_slow_tests and \
diff --git a/tools/testrunner/local/execution.py b/tools/testrunner/local/execution.py
index 79f856c..939995c 100644
--- a/tools/testrunner/local/execution.py
+++ b/tools/testrunner/local/execution.py
@@ -81,6 +81,7 @@ class Runner(object):
     self.remaining = num_tests
     self.failed = []
     self.crashed = 0
+    self.reran_tests = 0
 
   def _RunPerfSafe(self, fun):
     try:
@@ -89,6 +90,42 @@ class Runner(object):
       print("PerfData exception: %s" % e)
       self.perf_failures = True
 
+  def _GetJob(self, test):
+    command = self.GetCommand(test)
+    timeout = self.context.timeout
+    if ("--stress-opt" in test.flags or
+        "--stress-opt" in self.context.mode_flags or
+        "--stress-opt" in self.context.extra_flags):
+      timeout *= 4
+    if test.dependency is not None:
+      dep_command = [ c.replace(test.path, test.dependency) for c in command ]
+    else:
+      dep_command = None
+    return Job(command, dep_command, test.id, timeout, self.context.verbose)
+
+  def _MaybeRerun(self, pool, test):
+    if test.run <= self.context.rerun_failures_count:
+      # Possibly rerun this test if its run count is below the maximum per
+      # test.
+      if test.run == 1:
+        # Count the overall number of reran tests on the first rerun.
+        if self.reran_tests < self.context.rerun_failures_max:
+          self.reran_tests += 1
+        else:
+          # Don't rerun this if the overall number of rerun tests has been
+          # reached.
+          return
+      if test.run >= 2 and test.duration > self.context.timeout / 20:
+        # Rerun slow tests at most once.
+        return
+
+      # Rerun this test.
+      test.duration = None
+      test.output = None
+      test.run += 1
+      pool.add([self._GetJob(test)])
+      self.remaining += 1
+
   def Run(self, jobs):
     self.indicator.Starting()
     self._RunInternal(jobs)
@@ -109,23 +146,12 @@ class Runner(object):
       assert test.id >= 0
       test_map[test.id] = test
       try:
-        command = self.GetCommand(test)
+        queue.append([self._GetJob(test)])
       except Exception, e:
         # If this failed, save the exception and re-raise it later (after
         # all other tests have had a chance to run).
         queued_exception = e
         continue
-      timeout = self.context.timeout
-      if ("--stress-opt" in test.flags or
-          "--stress-opt" in self.context.mode_flags or
-          "--stress-opt" in self.context.extra_flags):
-        timeout *= 4
-      if test.dependency is not None:
-        dep_command = [ c.replace(test.path, test.dependency) for c in command ]
-      else:
-        dep_command = None
-      job = Job(command, dep_command, test.id, timeout, self.context.verbose)
-      queue.append([job])
     try:
       it = pool.imap_unordered(RunTest, queue)
       for result in it:
@@ -143,6 +169,9 @@ class Runner(object):
           self.succeeded += 1
         self.remaining -= 1
         self.indicator.HasRun(test, has_unexpected_output)
+        if has_unexpected_output:
+          # Rerun test failures after the indicator has processed the results.
+          self._MaybeRerun(pool, test)
     finally:
       pool.terminate()
       self._RunPerfSafe(lambda: self.perf_data_manager.close())
diff --git a/tools/testrunner/objects/context.py b/tools/testrunner/objects/context.py
index f8f764b..d288e0e 100644
--- a/tools/testrunner/objects/context.py
+++ b/tools/testrunner/objects/context.py
@@ -29,7 +29,7 @@
 class Context():
   def __init__(self, arch, mode, shell_dir, mode_flags, verbose, timeout,
                isolates, command_prefix, extra_flags, noi18n, random_seed,
-               no_sorting):
+               no_sorting, rerun_failures_count, rerun_failures_max):
     self.arch = arch
     self.mode = mode
     self.shell_dir = shell_dir
@@ -42,15 +42,18 @@ class Context():
     self.noi18n = noi18n
     self.random_seed = random_seed
     self.no_sorting = no_sorting
+    self.rerun_failures_count = rerun_failures_count
+    self.rerun_failures_max = rerun_failures_max
 
   def Pack(self):
     return [self.arch, self.mode, self.mode_flags, self.timeout, self.isolates,
             self.command_prefix, self.extra_flags, self.noi18n,
-            self.random_seed, self.no_sorting]
+            self.random_seed, self.no_sorting, self.rerun_failures_count,
+            self.rerun_failures_max]
 
   @staticmethod
   def Unpack(packed):
     # For the order of the fields, refer to Pack() above.
     return Context(packed[0], packed[1], None, packed[2], False,
                    packed[3], packed[4], packed[5], packed[6], packed[7],
-                   packed[8], packed[9])
+                   packed[8], packed[9], packed[10], packed[11])
diff --git a/tools/testrunner/objects/testcase.py b/tools/testrunner/objects/testcase.py
index cfc522e..ca82606 100644
--- a/tools/testrunner/objects/testcase.py
+++ b/tools/testrunner/objects/testcase.py
@@ -38,6 +38,7 @@ class TestCase(object):
     self.output = None
     self.id = None  # int, used to map result back to TestCase instance
     self.duration = None  # assigned during execution
+    self.run = 1  # The nth time this test is executed.
 
   def CopyAddingFlags(self, flags):
     copy = TestCase(self.suite, self.path, self.flags + flags, self.dependency)
@@ -60,6 +61,7 @@ class TestCase(object):
     test = TestCase(str(task[0]), task[1], task[2], task[3])
     test.outcomes = set(task[4])
     test.id = task[5]
+    test.run = 1
     return test
 
   def SetSuiteObject(self, suites):
-- 
2.7.4