src/third_party/skia/gm/rebaseline_server/compare_to_expectations.py

   1 #!/usr/bin/python
   2
   3 """
   4 Copyright 2013 Google Inc.
   5
   6 Use of this source code is governed by a BSD-style license that can be
   7 found in the LICENSE file.
   8
   9 Repackage expected/actual GM results as needed by our HTML rebaseline viewer.
  10 """
  11
  12 # System-level imports
  13 import argparse
  14 import fnmatch
  15 import logging
  16 import os
  17 import time
  18
  19 # Must fix up PYTHONPATH before importing from within Skia
  20 import fix_pythonpath  # pylint: disable=W0611
  21
  22 # Imports from within Skia
  23 from py.utils import url_utils
  24 import column
  25 import gm_json
  26 import imagediffdb
  27 import imagepair
  28 import imagepairset
  29 import results
  30
  31 EXPECTATION_FIELDS_PASSED_THRU_VERBATIM = [
  32     results.KEY__EXPECTATIONS__BUGS,
  33     results.KEY__EXPECTATIONS__IGNOREFAILURE,
  34     results.KEY__EXPECTATIONS__REVIEWED,
  35 ]
  36 FREEFORM_COLUMN_IDS = [
  37     results.KEY__EXTRACOLUMNS__BUILDER,
  38     results.KEY__EXTRACOLUMNS__TEST,
  39 ]
  40 ORDERED_COLUMN_IDS = [
  41     results.KEY__EXTRACOLUMNS__RESULT_TYPE,
  42     results.KEY__EXTRACOLUMNS__BUILDER,
  43     results.KEY__EXTRACOLUMNS__TEST,
  44     results.KEY__EXTRACOLUMNS__CONFIG,
  45 ]
  46
  47 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
  48 DEFAULT_EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
  49 DEFAULT_IGNORE_FAILURES_FILE = 'ignored-tests.txt'
  50
  51 IMAGEPAIR_SET_DESCRIPTIONS = ('expected image', 'actual image')
  52
  53
  54 class ExpectationComparisons(results.BaseComparisons):
  55   """Loads actual and expected GM results into an ImagePairSet.
  56
  57   Loads actual and expected results from all builders, except for those skipped
  58   by _ignore_builder().
  59
  60   Once this object has been constructed, the results (in self._results[])
  61   are immutable.  If you want to update the results based on updated JSON
  62   file contents, you will need to create a new ExpectationComparisons object."""
  63
  64   def __init__(self, image_diff_db, actuals_root=results.DEFAULT_ACTUALS_DIR,
  65                expected_root=DEFAULT_EXPECTATIONS_DIR,
  66                ignore_failures_file=DEFAULT_IGNORE_FAILURES_FILE,
  67                diff_base_url=None, builder_regex_list=None):
  68     """
  69     Args:
  70       image_diff_db: instance of ImageDiffDB we use to cache the image diffs
  71       actuals_root: root directory containing all actual-results.json files
  72       expected_root: root directory containing all expected-results.json files
  73       ignore_failures_file: if a file with this name is found within
  74           expected_root, ignore failures for any tests listed in the file
  75       diff_base_url: base URL within which the client should look for diff
  76           images; if not specified, defaults to a "file:///" URL representation
  77           of image_diff_db's storage_root
  78       builder_regex_list: List of regular expressions specifying which builders
  79           we will process. If None, process all builders.
  80     """
  81     super(ExpectationComparisons, self).__init__()
  82     time_start = int(time.time())
  83     if builder_regex_list != None:
  84       self.set_match_builders_pattern_list(builder_regex_list)
  85     self._image_diff_db = image_diff_db
  86     self._diff_base_url = (
  87         diff_base_url or
  88         url_utils.create_filepath_url(image_diff_db.storage_root))
  89     self._actuals_root = actuals_root
  90     self._expected_root = expected_root
  91     self._ignore_failures_on_these_tests = []
  92     if ignore_failures_file:
  93       self._ignore_failures_on_these_tests = (
  94           ExpectationComparisons._read_noncomment_lines(
  95               os.path.join(expected_root, ignore_failures_file)))
  96     self._load_actual_and_expected()
  97     self._timestamp = int(time.time())
  98     logging.info('Results complete; took %d seconds.' %
  99                  (self._timestamp - time_start))
 100
 101   def edit_expectations(self, modifications):
 102     """Edit the expectations stored within this object and write them back
 103     to disk.
 104
 105     Note that this will NOT update the results stored in self._results[] ;
 106     in order to see those updates, you must instantiate a new
 107     ExpectationComparisons object based on the (now updated) files on disk.
 108
 109     Args:
 110       modifications: a list of dictionaries, one for each expectation to update:
 111
 112          [
 113            {
 114              imagepair.KEY__IMAGEPAIRS__EXPECTATIONS: {
 115                results.KEY__EXPECTATIONS__BUGS: [123, 456],
 116                results.KEY__EXPECTATIONS__IGNOREFAILURE: false,
 117                results.KEY__EXPECTATIONS__REVIEWED: true,
 118              },
 119              imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS: {
 120                results.KEY__EXTRACOLUMNS__BUILDER: 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
 121                results.KEY__EXTRACOLUMNS__CONFIG: '8888',
 122                results.KEY__EXTRACOLUMNS__TEST: 'bigmatrix',
 123              },
 124              results.KEY__IMAGEPAIRS__IMAGE_B_URL: 'bitmap-64bitMD5/bigmatrix/10894408024079689926.png',
 125            },
 126            ...
 127          ]
 128
 129     """
 130     expected_builder_dicts = self._read_builder_dicts_from_root(
 131         self._expected_root)
 132     for mod in modifications:
 133       image_name = results.IMAGE_FILENAME_FORMATTER % (
 134           mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS]
 135              [results.KEY__EXTRACOLUMNS__TEST],
 136           mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS]
 137              [results.KEY__EXTRACOLUMNS__CONFIG])
 138       _, hash_type, hash_digest = gm_json.SplitGmRelativeUrl(
 139           mod[imagepair.KEY__IMAGEPAIRS__IMAGE_B_URL])
 140       allowed_digests = [[hash_type, int(hash_digest)]]
 141       new_expectations = {
 142           gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,
 143       }
 144       for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM:
 145         value = mod[imagepair.KEY__IMAGEPAIRS__EXPECTATIONS].get(field)
 146         if value is not None:
 147           new_expectations[field] = value
 148       builder_dict = expected_builder_dicts[
 149           mod[imagepair.KEY__IMAGEPAIRS__EXTRACOLUMNS]
 150              [results.KEY__EXTRACOLUMNS__BUILDER]]
 151       builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)
 152       if not builder_expectations:
 153         builder_expectations = {}
 154         builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations
 155       builder_expectations[image_name] = new_expectations
 156     ExpectationComparisons._write_dicts_to_root(
 157         expected_builder_dicts, self._expected_root)
 158
 159   @staticmethod
 160   def _write_dicts_to_root(meta_dict, root, pattern='*.json'):
 161     """Write all per-builder dictionaries within meta_dict to files under
 162     the root path.
 163
 164     Security note: this will only write to files that already exist within
 165     the root path (as found by os.walk() within root), so we don't need to
 166     worry about malformed content writing to disk outside of root.
 167     However, the data written to those files is not double-checked, so it
 168     could contain poisonous data.
 169
 170     Args:
 171       meta_dict: a builder-keyed meta-dictionary containing all the JSON
 172                  dictionaries we want to write out
 173       root: path to root of directory tree within which to write files
 174       pattern: which files to write within root (fnmatch-style pattern)
 175
 176     Raises:
 177       IOError if root does not refer to an existing directory
 178       KeyError if the set of per-builder dictionaries written out was
 179                different than expected
 180     """
 181     if not os.path.isdir(root):
 182       raise IOError('no directory found at path %s' % root)
 183     actual_builders_written = []
 184     for dirpath, _, filenames in os.walk(root):
 185       for matching_filename in fnmatch.filter(filenames, pattern):
 186         builder = os.path.basename(dirpath)
 187         per_builder_dict = meta_dict.get(builder)
 188         if per_builder_dict is not None:
 189           fullpath = os.path.join(dirpath, matching_filename)
 190           gm_json.WriteToFile(per_builder_dict, fullpath)
 191           actual_builders_written.append(builder)
 192
 193     # Check: did we write out the set of per-builder dictionaries we
 194     # expected to?
 195     expected_builders_written = sorted(meta_dict.keys())
 196     actual_builders_written.sort()
 197     if expected_builders_written != actual_builders_written:
 198       raise KeyError(
 199           'expected to write dicts for builders %s, but actually wrote them '
 200           'for builders %s' % (
 201               expected_builders_written, actual_builders_written))
 202
 203   def _load_actual_and_expected(self):
 204     """Loads the results of all tests, across all builders (based on the
 205     files within self._actuals_root and self._expected_root),
 206     and stores them in self._results.
 207     """
 208     logging.info('Reading actual-results JSON files from %s...' %
 209                  self._actuals_root)
 210     actual_builder_dicts = self._read_builder_dicts_from_root(
 211         self._actuals_root)
 212     logging.info('Reading expected-results JSON files from %s...' %
 213                  self._expected_root)
 214     expected_builder_dicts = self._read_builder_dicts_from_root(
 215         self._expected_root)
 216
 217     all_image_pairs = imagepairset.ImagePairSet(
 218         descriptions=IMAGEPAIR_SET_DESCRIPTIONS,
 219         diff_base_url=self._diff_base_url)
 220     failing_image_pairs = imagepairset.ImagePairSet(
 221         descriptions=IMAGEPAIR_SET_DESCRIPTIONS,
 222         diff_base_url=self._diff_base_url)
 223
 224     # Override settings for columns that should be filtered using freeform text.
 225     for column_id in FREEFORM_COLUMN_IDS:
 226       factory = column.ColumnHeaderFactory(
 227           header_text=column_id, use_freeform_filter=True)
 228       all_image_pairs.set_column_header_factory(
 229           column_id=column_id, column_header_factory=factory)
 230       failing_image_pairs.set_column_header_factory(
 231           column_id=column_id, column_header_factory=factory)
 232
 233     all_image_pairs.ensure_extra_column_values_in_summary(
 234         column_id=results.KEY__EXTRACOLUMNS__RESULT_TYPE, values=[
 235             results.KEY__RESULT_TYPE__FAILED,
 236             results.KEY__RESULT_TYPE__FAILUREIGNORED,
 237             results.KEY__RESULT_TYPE__NOCOMPARISON,
 238             results.KEY__RESULT_TYPE__SUCCEEDED,
 239         ])
 240     failing_image_pairs.ensure_extra_column_values_in_summary(
 241         column_id=results.KEY__EXTRACOLUMNS__RESULT_TYPE, values=[
 242             results.KEY__RESULT_TYPE__FAILED,
 243             results.KEY__RESULT_TYPE__FAILUREIGNORED,
 244             results.KEY__RESULT_TYPE__NOCOMPARISON,
 245         ])
 246
 247     # Only consider builders we have both expected and actual results for.
 248     # Fixes http://skbug.com/2486 ('rebaseline_server shows actual results
 249     # (but not expectations) for Test-Ubuntu12-ShuttleA-NoGPU-x86_64-Debug
 250     # builder')
 251     actual_builder_set = set(actual_builder_dicts.keys())
 252     expected_builder_set = set(expected_builder_dicts.keys())
 253     builders = sorted(actual_builder_set.intersection(expected_builder_set))
 254
 255     num_builders = len(builders)
 256     builder_num = 0
 257     for builder in builders:
 258       builder_num += 1
 259       logging.info('Generating pixel diffs for builder #%d of %d, "%s"...' %
 260                    (builder_num, num_builders, builder))
 261       actual_results_for_this_builder = (
 262           actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])
 263       for result_type in sorted(actual_results_for_this_builder.keys()):
 264         results_of_this_type = actual_results_for_this_builder[result_type]
 265         if not results_of_this_type:
 266           continue
 267         for image_name in sorted(results_of_this_type.keys()):
 268           (test, config) = results.IMAGE_FILENAME_RE.match(image_name).groups()
 269           actual_image_relative_url = (
 270               ExpectationComparisons._create_relative_url(
 271                   hashtype_and_digest=results_of_this_type[image_name],
 272                   test_name=test))
 273
 274           # Default empty expectations; overwrite these if we find any real ones
 275           expectations_per_test = None
 276           expected_image_relative_url = None
 277           expectations_dict = None
 278           try:
 279             expectations_per_test = (
 280                 expected_builder_dicts
 281                 [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name])
 282             # TODO(epoger): assumes a single allowed digest per test, which is
 283             # fine; see https://code.google.com/p/skia/issues/detail?id=1787
 284             expected_image_hashtype_and_digest = (
 285                 expectations_per_test
 286                 [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0])
 287             expected_image_relative_url = (
 288                 ExpectationComparisons._create_relative_url(
 289                     hashtype_and_digest=expected_image_hashtype_and_digest,
 290                     test_name=test))
 291             expectations_dict = {}
 292             for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM:
 293               expectations_dict[field] = expectations_per_test.get(field)
 294           except (KeyError, TypeError):
 295             # There are several cases in which we would expect to find
 296             # no expectations for a given test:
 297             #
 298             # 1. result_type == NOCOMPARISON
 299             #   There are no expectations for this test yet!
 300             #
 301             # 2. alternate rendering mode failures (e.g. serialized)
 302             #   In cases like
 303             #   https://code.google.com/p/skia/issues/detail?id=1684
 304             #   ('tileimagefilter GM test failing in serialized render mode'),
 305             #   the gm-actuals will list a failure for the alternate
 306             #   rendering mode even though we don't have explicit expectations
 307             #   for the test (the implicit expectation is that it must
 308             #   render the same in all rendering modes).
 309             #
 310             # Don't log type 1, because it is common.
 311             # Log other types, because they are rare and we should know about
 312             # them, but don't throw an exception, because we need to keep our
 313             # tools working in the meanwhile!
 314             if result_type != results.KEY__RESULT_TYPE__NOCOMPARISON:
 315               logging.warning('No expectations found for test: %s' % {
 316                   results.KEY__EXTRACOLUMNS__BUILDER: builder,
 317                   results.KEY__EXTRACOLUMNS__RESULT_TYPE: result_type,
 318                   'image_name': image_name,
 319                   })
 320
 321           # If this test was recently rebaselined, it will remain in
 322           # the 'failed' set of actuals until all the bots have
 323           # cycled (although the expectations have indeed been set
 324           # from the most recent actuals).  Treat these as successes
 325           # instead of failures.
 326           #
 327           # TODO(epoger): Do we need to do something similar in
 328           # other cases, such as when we have recently marked a test
 329           # as ignoreFailure but it still shows up in the 'failed'
 330           # category?  Maybe we should not rely on the result_type
 331           # categories recorded within the gm_actuals AT ALL, and
 332           # instead evaluate the result_type ourselves based on what
 333           # we see in expectations vs actual checksum?
 334           if expected_image_relative_url == actual_image_relative_url:
 335             updated_result_type = results.KEY__RESULT_TYPE__SUCCEEDED
 336           elif ((result_type == results.KEY__RESULT_TYPE__FAILED) and
 337                 (test in self._ignore_failures_on_these_tests)):
 338             updated_result_type = results.KEY__RESULT_TYPE__FAILUREIGNORED
 339           else:
 340             updated_result_type = result_type
 341           extra_columns_dict = {
 342               results.KEY__EXTRACOLUMNS__RESULT_TYPE: updated_result_type,
 343               results.KEY__EXTRACOLUMNS__BUILDER: builder,
 344               results.KEY__EXTRACOLUMNS__TEST: test,
 345               results.KEY__EXTRACOLUMNS__CONFIG: config,
 346           }
 347           try:
 348             image_pair = imagepair.ImagePair(
 349                 image_diff_db=self._image_diff_db,
 350                 base_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL,
 351                 imageA_relative_url=expected_image_relative_url,
 352                 imageB_relative_url=actual_image_relative_url,
 353                 expectations=expectations_dict,
 354                 extra_columns=extra_columns_dict)
 355             all_image_pairs.add_image_pair(image_pair)
 356             if updated_result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
 357               failing_image_pairs.add_image_pair(image_pair)
 358           except Exception:
 359             logging.exception('got exception while creating new ImagePair')
 360
 361     # pylint: disable=W0201
 362     self._results = {
 363       results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
 364           column_ids_in_order=ORDERED_COLUMN_IDS),
 365       results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
 366           column_ids_in_order=ORDERED_COLUMN_IDS),
 367     }
 368
 369
 370 def main():
 371   logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
 372                       datefmt='%m/%d/%Y %H:%M:%S',
 373                       level=logging.INFO)
 374   parser = argparse.ArgumentParser()
 375   parser.add_argument(
 376       '--actuals', default=results.DEFAULT_ACTUALS_DIR,
 377       help='Directory containing all actual-result JSON files; defaults to '
 378       '\'%(default)s\' .')
 379   parser.add_argument(
 380       '--expectations', default=DEFAULT_EXPECTATIONS_DIR,
 381       help='Directory containing all expected-result JSON files; defaults to '
 382       '\'%(default)s\' .')
 383   parser.add_argument(
 384       '--ignore-failures-file', default=DEFAULT_IGNORE_FAILURES_FILE,
 385       help='If a file with this name is found within the EXPECTATIONS dir, '
 386       'ignore failures for any tests listed in the file; defaults to '
 387       '\'%(default)s\' .')
 388   parser.add_argument(
 389       '--outfile', required=True,
 390       help='File to write result summary into, in JSON format.')
 391   parser.add_argument(
 392       '--results', default=results.KEY__HEADER__RESULTS_FAILURES,
 393       help='Which result types to include. Defaults to \'%(default)s\'; '
 394       'must be one of ' +
 395       str([results.KEY__HEADER__RESULTS_FAILURES,
 396            results.KEY__HEADER__RESULTS_ALL]))
 397   parser.add_argument(
 398       '--workdir', default=results.DEFAULT_GENERATED_IMAGES_ROOT,
 399       help='Directory within which to download images and generate diffs; '
 400       'defaults to \'%(default)s\' .')
 401   args = parser.parse_args()
 402   image_diff_db = imagediffdb.ImageDiffDB(storage_root=args.workdir)
 403   results_obj = ExpectationComparisons(
 404       image_diff_db=image_diff_db,
 405       actuals_root=args.actuals,
 406       expected_root=args.expectations,
 407       ignore_failures_file=args.ignore_failures_file)
 408   gm_json.WriteToFile(
 409       results_obj.get_packaged_results_of_type(results_type=args.results),
 410       args.outfile)
 411
 412
 413 if __name__ == '__main__':
 414   main()