src/third_party/WebKit/Tools/Scripts/webkitpy/layout_tests/controllers/single_test_runner.py

   1 # Copyright (C) 2011 Google Inc. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or without
   4 # modification, are permitted provided that the following conditions are
   5 # met:
   6 #
   7 #     * Redistributions of source code must retain the above copyright
   8 # notice, this list of conditions and the following disclaimer.
   9 #     * Redistributions in binary form must reproduce the above
  10 # copyright notice, this list of conditions and the following disclaimer
  11 # in the documentation and/or other materials provided with the
  12 # distribution.
  13 #     * Neither the name of Google Inc. nor the names of its
  14 # contributors may be used to endorse or promote products derived from
  15 # this software without specific prior written permission.
  16 #
  17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28
  29
  30 import logging
  31 import re
  32 import time
  33
  34 from webkitpy.layout_tests.controllers import repaint_overlay
  35 from webkitpy.layout_tests.controllers import test_result_writer
  36 from webkitpy.layout_tests.port.driver import DeviceFailure, DriverInput, DriverOutput
  37 from webkitpy.layout_tests.models import test_expectations
  38 from webkitpy.layout_tests.models import test_failures
  39 from webkitpy.layout_tests.models.test_results import TestResult
  40 from webkitpy.layout_tests.models import testharness_results
  41
  42
  43 _log = logging.getLogger(__name__)
  44
  45
  46 def run_single_test(port, options, results_directory, worker_name, driver, test_input, stop_when_done):
  47     runner = SingleTestRunner(port, options, results_directory, worker_name, driver, test_input, stop_when_done)
  48     try:
  49         return runner.run()
  50     except DeviceFailure as e:
  51         _log.error("device failed: %s", str(e))
  52         return TestResult(test_input.test_name, device_failed=True)
  53
  54
  55 class SingleTestRunner(object):
  56     (ALONGSIDE_TEST, PLATFORM_DIR, VERSION_DIR, UPDATE) = ('alongside', 'platform', 'version', 'update')
  57
  58     def __init__(self, port, options, results_directory, worker_name, driver, test_input, stop_when_done):
  59         self._port = port
  60         self._filesystem = port.host.filesystem
  61         self._options = options
  62         self._results_directory = results_directory
  63         self._driver = driver
  64         self._timeout = test_input.timeout
  65         self._worker_name = worker_name
  66         self._test_name = test_input.test_name
  67         self._should_run_pixel_test = test_input.should_run_pixel_test
  68         self._reference_files = test_input.reference_files
  69         self._should_add_missing_baselines = test_input.should_add_missing_baselines
  70         self._stop_when_done = stop_when_done
  71
  72         if self._reference_files:
  73             # Detect and report a test which has a wrong combination of expectation files.
  74             # For example, if 'foo.html' has two expectation files, 'foo-expected.html' and
  75             # 'foo-expected.txt', we should warn users. One test file must be used exclusively
  76             # in either layout tests or reftests, but not in both.
  77             for suffix in ('.txt', '.png', '.wav'):
  78                 expected_filename = self._port.expected_filename(self._test_name, suffix)
  79                 if self._filesystem.exists(expected_filename):
  80                     _log.error('%s is a reftest, but has an unused expectation file. Please remove %s.',
  81                         self._test_name, expected_filename)
  82
  83     def _expected_driver_output(self):
  84         return DriverOutput(self._port.expected_text(self._test_name),
  85                                  self._port.expected_image(self._test_name),
  86                                  self._port.expected_checksum(self._test_name),
  87                                  self._port.expected_audio(self._test_name))
  88
  89     def _should_fetch_expected_checksum(self):
  90         return self._should_run_pixel_test and not (self._options.new_baseline or self._options.reset_results)
  91
  92     def _driver_input(self):
  93         # The image hash is used to avoid doing an image dump if the
  94         # checksums match, so it should be set to a blank value if we
  95         # are generating a new baseline.  (Otherwise, an image from a
  96         # previous run will be copied into the baseline."""
  97         image_hash = None
  98         if self._should_fetch_expected_checksum():
  99             image_hash = self._port.expected_checksum(self._test_name)
 100
 101         test_base = self._port.lookup_virtual_test_base(self._test_name)
 102         if test_base:
 103             # If the file actually exists under the virtual dir, we want to use it (largely for virtual references),
 104             # but we want to use the extra command line args either way.
 105             if self._filesystem.exists(self._port.abspath_for_test(self._test_name)):
 106                 test_name = self._test_name
 107             else:
 108                 test_name = test_base
 109             args = self._port.lookup_virtual_test_args(self._test_name)
 110         else:
 111             test_name = self._test_name
 112             args = self._port.lookup_physical_test_args(self._test_name)
 113         return DriverInput(test_name, self._timeout, image_hash, self._should_run_pixel_test, args)
 114
 115     def run(self):
 116         if self._options.enable_sanitizer:
 117             return self._run_sanitized_test()
 118         if self._reference_files:
 119             if self._options.reset_results:
 120                 reftest_type = set([reference_file[0] for reference_file in self._reference_files])
 121                 result = TestResult(self._test_name, reftest_type=reftest_type)
 122                 result.type = test_expectations.SKIP
 123                 return result
 124             return self._run_reftest()
 125         if self._options.reset_results:
 126             return self._run_rebaseline()
 127         return self._run_compare_test()
 128
 129     def _run_sanitized_test(self):
 130         # running a sanitized test means that we ignore the actual test output and just look
 131         # for timeouts and crashes (real or forced by the driver). Most crashes should
 132         # indicate problems found by a sanitizer (ASAN, LSAN, etc.), but we will report
 133         # on other crashes and timeouts as well in order to detect at least *some* basic failures.
 134         driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
 135         expected_driver_output = self._expected_driver_output()
 136         failures = self._handle_error(driver_output)
 137         test_result = TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
 138                                  pid=driver_output.pid)
 139         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, expected_driver_output, test_result.failures)
 140         return test_result
 141
 142
 143     def _run_compare_test(self):
 144         driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
 145         expected_driver_output = self._expected_driver_output()
 146
 147         test_result = self._compare_output(expected_driver_output, driver_output)
 148         if self._should_add_missing_baselines:
 149             self._add_missing_baselines(test_result, driver_output)
 150         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, expected_driver_output, test_result.failures)
 151         return test_result
 152
 153     def _run_rebaseline(self):
 154         driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
 155         failures = self._handle_error(driver_output)
 156         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, None, failures)
 157         # FIXME: It the test crashed or timed out, it might be better to avoid
 158         # to write new baselines.
 159         self._overwrite_baselines(driver_output)
 160         return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
 161                           pid=driver_output.pid)
 162
 163     _render_tree_dump_pattern = re.compile(r"^layer at \(\d+,\d+\) size \d+x\d+\n")
 164
 165     def _add_missing_baselines(self, test_result, driver_output):
 166         missingImage = test_result.has_failure_matching_types(test_failures.FailureMissingImage, test_failures.FailureMissingImageHash)
 167         if test_result.has_failure_matching_types(test_failures.FailureMissingResult):
 168             self._save_baseline_data(driver_output.text, '.txt', self._location_for_new_baseline(driver_output.text, '.txt'))
 169         if test_result.has_failure_matching_types(test_failures.FailureMissingAudio):
 170             self._save_baseline_data(driver_output.audio, '.wav', self._location_for_new_baseline(driver_output.audio, '.wav'))
 171         if missingImage:
 172             self._save_baseline_data(driver_output.image, '.png', self._location_for_new_baseline(driver_output.image, '.png'))
 173
 174     def _location_for_new_baseline(self, data, extension):
 175         if self._options.add_platform_exceptions:
 176             return self.VERSION_DIR
 177         if extension == '.png':
 178             return self.PLATFORM_DIR
 179         if extension == '.wav':
 180             return self.ALONGSIDE_TEST
 181         if extension == '.txt' and self._render_tree_dump_pattern.match(data):
 182             return self.PLATFORM_DIR
 183         return self.ALONGSIDE_TEST
 184
 185     def _overwrite_baselines(self, driver_output):
 186         location = self.VERSION_DIR if self._options.add_platform_exceptions else self.UPDATE
 187         self._save_baseline_data(driver_output.text, '.txt', location)
 188         self._save_baseline_data(driver_output.audio, '.wav', location)
 189         if self._should_run_pixel_test:
 190             self._save_baseline_data(driver_output.image, '.png', location)
 191
 192     def _save_baseline_data(self, data, extension, location):
 193         if data is None:
 194             return
 195         port = self._port
 196         fs = self._filesystem
 197         if location == self.ALONGSIDE_TEST:
 198             output_dir = fs.dirname(port.abspath_for_test(self._test_name))
 199         elif location == self.VERSION_DIR:
 200             output_dir = fs.join(port.baseline_version_dir(), fs.dirname(self._test_name))
 201         elif location == self.PLATFORM_DIR:
 202             output_dir = fs.join(port.baseline_platform_dir(), fs.dirname(self._test_name))
 203         elif location == self.UPDATE:
 204             output_dir = fs.dirname(port.expected_filename(self._test_name, extension))
 205         else:
 206             raise AssertionError('unrecognized baseline location: %s' % location)
 207
 208         fs.maybe_make_directory(output_dir)
 209         output_basename = fs.basename(fs.splitext(self._test_name)[0] + "-expected" + extension)
 210         output_path = fs.join(output_dir, output_basename)
 211         _log.info('Writing new expected result "%s"' % port.relative_test_filename(output_path))
 212         port.update_baseline(output_path, data)
 213
 214     def _handle_error(self, driver_output, reference_filename=None):
 215         """Returns test failures if some unusual errors happen in driver's run.
 216
 217         Args:
 218           driver_output: The output from the driver.
 219           reference_filename: The full path to the reference file which produced the driver_output.
 220               This arg is optional and should be used only in reftests until we have a better way to know
 221               which html file is used for producing the driver_output.
 222         """
 223         failures = []
 224         fs = self._filesystem
 225         if driver_output.timeout:
 226             failures.append(test_failures.FailureTimeout(bool(reference_filename)))
 227
 228         if reference_filename:
 229             testname = self._port.relative_test_filename(reference_filename)
 230         else:
 231             testname = self._test_name
 232
 233         if driver_output.crash:
 234             failures.append(test_failures.FailureCrash(bool(reference_filename),
 235                                                        driver_output.crashed_process_name,
 236                                                        driver_output.crashed_pid,
 237                                                        bool('No crash log found' not in driver_output.crash_log)))
 238             if driver_output.error:
 239                 _log.debug("%s %s crashed, (stderr lines):" % (self._worker_name, testname))
 240             else:
 241                 _log.debug("%s %s crashed, (no stderr)" % (self._worker_name, testname))
 242         elif driver_output.leak:
 243             failures.append(test_failures.FailureLeak(bool(reference_filename),
 244                                                       driver_output.leak_log))
 245             _log.debug("%s %s leaked" % (self._worker_name, testname))
 246         elif driver_output.error:
 247             _log.debug("%s %s output stderr lines:" % (self._worker_name, testname))
 248         for line in driver_output.error.splitlines():
 249             _log.debug("  %s" % line)
 250         return failures
 251
 252     def _compare_output(self, expected_driver_output, driver_output):
 253         failures = []
 254         failures.extend(self._handle_error(driver_output))
 255
 256         if driver_output.crash:
 257             # Don't continue any more if we already have a crash.
 258             # In case of timeouts, we continue since we still want to see the text and image output.
 259             return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
 260                               pid=driver_output.pid)
 261
 262         is_testharness_test, testharness_failures = self._compare_testharness_test(driver_output, expected_driver_output)
 263         if is_testharness_test:
 264             failures.extend(testharness_failures)
 265         else:
 266             failures.extend(self._compare_text(expected_driver_output.text, driver_output.text))
 267             failures.extend(self._compare_audio(expected_driver_output.audio, driver_output.audio))
 268             if self._should_run_pixel_test:
 269                 failures.extend(self._compare_image(expected_driver_output, driver_output))
 270         has_repaint_overlay = (repaint_overlay.result_contains_repaint_rects(expected_driver_output.text) or
 271                                repaint_overlay.result_contains_repaint_rects(driver_output.text))
 272         return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
 273                           pid=driver_output.pid, has_repaint_overlay=has_repaint_overlay)
 274
 275     def _compare_testharness_test(self, driver_output, expected_driver_output):
 276         if expected_driver_output.image or expected_driver_output.audio or expected_driver_output.text:
 277             return False, []
 278
 279         if driver_output.image or driver_output.audio or self._is_render_tree(driver_output.text):
 280             return False, []
 281
 282         text = driver_output.text or ''
 283
 284         if not testharness_results.is_testharness_output(text):
 285             return False, []
 286         if not testharness_results.is_testharness_output_passing(text):
 287             return True, [test_failures.FailureTestHarnessAssertion()]
 288         return True, []
 289
 290     def _is_render_tree(self, text):
 291         return text and "layer at (0,0) size 800x600" in text
 292
 293     def _compare_text(self, expected_text, actual_text):
 294         failures = []
 295         if (expected_text and actual_text and
 296             # Assuming expected_text is already normalized.
 297             self._port.do_text_results_differ(expected_text, self._get_normalized_output_text(actual_text))):
 298             failures.append(test_failures.FailureTextMismatch())
 299         elif actual_text and not expected_text:
 300             failures.append(test_failures.FailureMissingResult())
 301         return failures
 302
 303     def _compare_audio(self, expected_audio, actual_audio):
 304         failures = []
 305         if (expected_audio and actual_audio and
 306             self._port.do_audio_results_differ(expected_audio, actual_audio)):
 307             failures.append(test_failures.FailureAudioMismatch())
 308         elif actual_audio and not expected_audio:
 309             failures.append(test_failures.FailureMissingAudio())
 310         return failures
 311
 312     def _get_normalized_output_text(self, output):
 313         """Returns the normalized text output, i.e. the output in which
 314         the end-of-line characters are normalized to "\n"."""
 315         # Running tests on Windows produces "\r\n".  The "\n" part is helpfully
 316         # changed to "\r\n" by our system (Python/Cygwin), resulting in
 317         # "\r\r\n", when, in fact, we wanted to compare the text output with
 318         # the normalized text expectation files.
 319         return output.replace("\r\r\n", "\r\n").replace("\r\n", "\n")
 320
 321     # FIXME: This function also creates the image diff. Maybe that work should
 322     # be handled elsewhere?
 323     def _compare_image(self, expected_driver_output, driver_output):
 324         failures = []
 325         # If we didn't produce a hash file, this test must be text-only.
 326         if driver_output.image_hash is None:
 327             return failures
 328         if not expected_driver_output.image:
 329             failures.append(test_failures.FailureMissingImage())
 330         elif not expected_driver_output.image_hash:
 331             failures.append(test_failures.FailureMissingImageHash())
 332         elif driver_output.image_hash != expected_driver_output.image_hash:
 333             diff, err_str = self._port.diff_image(expected_driver_output.image, driver_output.image)
 334             if err_str:
 335                 _log.warning('  %s : %s' % (self._test_name, err_str))
 336                 failures.append(test_failures.FailureImageHashMismatch())
 337                 driver_output.error = (driver_output.error or '') + err_str
 338             else:
 339                 driver_output.image_diff = diff
 340                 if driver_output.image_diff:
 341                     failures.append(test_failures.FailureImageHashMismatch())
 342                 else:
 343                     # See https://bugs.webkit.org/show_bug.cgi?id=69444 for why this isn't a full failure.
 344                     _log.warning('  %s -> pixel hash failed (but diff passed)' % self._test_name)
 345         return failures
 346
 347     def _run_reftest(self):
 348         test_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
 349         total_test_time = 0
 350         reference_output = None
 351         test_result = None
 352
 353         # If the test crashed, or timed out, there's no point in running the reference at all.
 354         # This can save a lot of execution time if we have a lot of crashes or timeouts.
 355         if test_output.crash or test_output.timeout:
 356             expected_driver_output = DriverOutput(text=None, image=None, image_hash=None, audio=None)
 357             return self._compare_output(expected_driver_output, test_output)
 358
 359         # A reftest can have multiple match references and multiple mismatch references;
 360         # the test fails if any mismatch matches and all of the matches don't match.
 361         # To minimize the number of references we have to check, we run all of the mismatches first,
 362         # then the matches, and short-circuit out as soon as we can.
 363         # Note that sorting by the expectation sorts "!=" before "==" so this is easy to do.
 364
 365         putAllMismatchBeforeMatch = sorted
 366         reference_test_names = []
 367         for expectation, reference_filename in putAllMismatchBeforeMatch(self._reference_files):
 368             if self._port.lookup_virtual_test_base(self._test_name):
 369                 args = self._port.lookup_virtual_test_args(self._test_name)
 370             else:
 371                 args = self._port.lookup_physical_test_args(self._test_name)
 372             reference_test_name = self._port.relative_test_filename(reference_filename)
 373             reference_test_names.append(reference_test_name)
 374             driver_input = DriverInput(reference_test_name, self._timeout, image_hash=None, should_run_pixel_test=True, args=args)
 375             reference_output = self._driver.run_test(driver_input, self._stop_when_done)
 376             test_result = self._compare_output_with_reference(reference_output, test_output, reference_filename, expectation == '!=')
 377
 378             if (expectation == '!=' and test_result.failures) or (expectation == '==' and not test_result.failures):
 379                 break
 380             total_test_time += test_result.test_run_time
 381
 382         assert(reference_output)
 383         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, test_output, reference_output, test_result.failures)
 384
 385         # FIXME: We don't really deal with a mix of reftest types properly. We pass in a set() to reftest_type
 386         # and only really handle the first of the references in the result.
 387         reftest_type = list(set([reference_file[0] for reference_file in self._reference_files]))
 388         return TestResult(self._test_name, test_result.failures, total_test_time + test_result.test_run_time,
 389                           test_result.has_stderr, reftest_type=reftest_type, pid=test_result.pid,
 390                           references=reference_test_names)
 391
 392     def _compare_output_with_reference(self, reference_driver_output, actual_driver_output, reference_filename, mismatch):
 393         total_test_time = reference_driver_output.test_time + actual_driver_output.test_time
 394         has_stderr = reference_driver_output.has_stderr() or actual_driver_output.has_stderr()
 395         failures = []
 396         failures.extend(self._handle_error(actual_driver_output))
 397         if failures:
 398             # Don't continue any more if we already have crash or timeout.
 399             return TestResult(self._test_name, failures, total_test_time, has_stderr)
 400         failures.extend(self._handle_error(reference_driver_output, reference_filename=reference_filename))
 401         if failures:
 402             return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)
 403
 404         if not reference_driver_output.image_hash and not actual_driver_output.image_hash:
 405             failures.append(test_failures.FailureReftestNoImagesGenerated(reference_filename))
 406         elif mismatch:
 407             if reference_driver_output.image_hash == actual_driver_output.image_hash:
 408                 diff, err_str = self._port.diff_image(reference_driver_output.image, actual_driver_output.image)
 409                 if not diff:
 410                     failures.append(test_failures.FailureReftestMismatchDidNotOccur(reference_filename))
 411                 elif err_str:
 412                     _log.error(err_str)
 413                 else:
 414                     _log.warning("  %s -> ref test hashes matched but diff failed" % self._test_name)
 415
 416         elif reference_driver_output.image_hash != actual_driver_output.image_hash:
 417             diff, err_str = self._port.diff_image(reference_driver_output.image, actual_driver_output.image)
 418             if diff:
 419                 failures.append(test_failures.FailureReftestMismatch(reference_filename))
 420             elif err_str:
 421                 _log.error(err_str)
 422             else:
 423                 _log.warning("  %s -> ref test hashes didn't match but diff passed" % self._test_name)
 424
 425         return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)