src/third_party/skia/gm/rebaseline_server/compare_rendered_pictures.py

   1 #!/usr/bin/python
   2
   3 """
   4 Copyright 2014 Google Inc.
   5
   6 Use of this source code is governed by a BSD-style license that can be
   7 found in the LICENSE file.
   8
   9 Compare results of two render_pictures runs.
  10
  11 TODO(epoger): Start using this module to compare ALL images (whether they
  12 were generated from GMs or SKPs), and rename it accordingly.
  13 """
  14
  15 # System-level imports
  16 import logging
  17 import os
  18 import shutil
  19 import subprocess
  20 import tempfile
  21 import time
  22
  23 # Must fix up PYTHONPATH before importing from within Skia
  24 import fix_pythonpath  # pylint: disable=W0611
  25
  26 # Imports from within Skia
  27 from py.utils import git_utils
  28 from py.utils import gs_utils
  29 from py.utils import url_utils
  30 import buildbot_globals
  31 import column
  32 import gm_json
  33 import imagediffdb
  34 import imagepair
  35 import imagepairset
  36 import results
  37
  38 # URL under which all render_pictures images can be found in Google Storage.
  39 #
  40 # TODO(epoger): In order to allow live-view of GMs and other images, read this
  41 # from the input summary files, or allow the caller to set it within the
  42 # GET_live_results call.
  43 DEFAULT_IMAGE_BASE_GS_URL = 'gs://' + buildbot_globals.Get('skp_images_bucket')
  44
  45 # Column descriptors, and display preferences for them.
  46 COLUMN__RESULT_TYPE = results.KEY__EXTRACOLUMNS__RESULT_TYPE
  47 COLUMN__SOURCE_SKP = 'sourceSkpFile'
  48 COLUMN__TILED_OR_WHOLE = 'tiledOrWhole'
  49 COLUMN__TILENUM = 'tilenum'
  50 FREEFORM_COLUMN_IDS = [
  51     COLUMN__SOURCE_SKP,
  52     COLUMN__TILENUM,
  53 ]
  54 ORDERED_COLUMN_IDS = [
  55     COLUMN__RESULT_TYPE,
  56     COLUMN__SOURCE_SKP,
  57     COLUMN__TILED_OR_WHOLE,
  58     COLUMN__TILENUM,
  59 ]
  60
  61 # A special "repo:" URL type that we use to refer to Skia repo contents.
  62 # (Useful for comparing against expectations files we store in our repo.)
  63 REPO_URL_PREFIX = 'repo:'
  64 REPO_BASEPATH = os.path.abspath(os.path.join(
  65     os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
  66
  67 # Which sections within a JSON summary file can contain results.
  68 ALLOWED_SECTION_NAMES = [
  69     gm_json.JSONKEY_ACTUALRESULTS,
  70     gm_json.JSONKEY_EXPECTEDRESULTS,
  71 ]
  72
  73
  74 class RenderedPicturesComparisons(results.BaseComparisons):
  75   """Loads results from multiple render_pictures runs into an ImagePairSet.
  76   """
  77
  78   def __init__(self,
  79                setA_dirs, setB_dirs,
  80                setA_section, setB_section,
  81                image_diff_db,
  82                image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL, diff_base_url=None,
  83                setA_label=None, setB_label=None,
  84                gs=None, truncate_results=False, prefetch_only=False,
  85                download_all_images=False):
  86     """Constructor: downloads images and generates diffs.
  87
  88     Once the object has been created (which may take a while), you can call its
  89     get_packaged_results_of_type() method to quickly retrieve the results...
  90     unless you have set prefetch_only to True, in which case we will
  91     asynchronously warm up the ImageDiffDB cache but not fill in self._results.
  92
  93     Args:
  94       setA_dirs: list of root directories to copy all JSON summaries from,
  95           and to use as setA within the comparisons. These directories may be
  96           gs:// URLs, special "repo:" URLs, or local filepaths.
  97       setB_dirs: list of root directories to copy all JSON summaries from,
  98           and to use as setB within the comparisons. These directories may be
  99           gs:// URLs, special "repo:" URLs, or local filepaths.
 100       setA_section: which section within setA to examine; must be one of
 101           ALLOWED_SECTION_NAMES
 102       setB_section: which section within setB to examine; must be one of
 103           ALLOWED_SECTION_NAMES
 104       image_diff_db: ImageDiffDB instance
 105       image_base_gs_url: "gs://" URL pointing at the Google Storage bucket/dir
 106           under which all render_pictures result images can
 107           be found; this will be used to read images for comparison within
 108           this code, and included in the ImagePairSet (as an HTTP URL) so its
 109           consumers know where to download the images from
 110       diff_base_url: base URL within which the client should look for diff
 111           images; if not specified, defaults to a "file:///" URL representation
 112           of image_diff_db's storage_root
 113       setA_label: description to use for results in setA; if None, will be
 114           set to a reasonable default
 115       setB_label: description to use for results in setB; if None, will be
 116           set to a reasonable default
 117       gs: instance of GSUtils object we can use to download summary files
 118       truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
 119           we process, to speed up testing.
 120       prefetch_only: if True, return the new object as quickly as possible
 121           with empty self._results (just queue up all the files to process,
 122           don't wait around for them to be processed and recorded); otherwise,
 123           block until the results have been assembled and recorded in
 124           self._results.
 125       download_all_images: if True, download all images, even if we don't
 126           need them to generate diffs.  This will take much longer to complete,
 127           but is useful for warming up the bitmap cache on local disk.
 128     """
 129     super(RenderedPicturesComparisons, self).__init__()
 130     self._image_diff_db = image_diff_db
 131     self._image_base_gs_url = image_base_gs_url
 132     self._diff_base_url = (
 133         diff_base_url or
 134         url_utils.create_filepath_url(image_diff_db.storage_root))
 135     self._gs = gs
 136     self.truncate_results = truncate_results
 137     self._prefetch_only = prefetch_only
 138     self._download_all_images = download_all_images
 139
 140     # If we are comparing two different section types, we can use those
 141     # as the default labels for setA and setB.
 142     if setA_section != setB_section:
 143       self._setA_label = setA_label or setA_section
 144       self._setB_label = setB_label or setB_section
 145     else:
 146       self._setA_label = setA_label or 'setA'
 147       self._setB_label = setB_label or 'setB'
 148
 149     tempdir = tempfile.mkdtemp()
 150     try:
 151       setA_root = os.path.join(tempdir, 'setA')
 152       setB_root = os.path.join(tempdir, 'setB')
 153       setA_repo_revision = None
 154       setB_repo_revision = None
 155       for source_dir in setA_dirs:
 156         self._copy_dir_contents(source_dir=source_dir, dest_dir=setA_root)
 157         # TODO(stephana): There is a potential race condition here... we copy
 158         # the contents out of the source_dir, and THEN we get the commithash
 159         # of source_dir.  If source_dir points at a git checkout, and that
 160         # checkout is updated (by a different thread/process) during this
 161         # operation, then the contents and commithash will be out of sync.
 162         setA_repo_revision = self._get_repo_revision(
 163             source_dir=source_dir, assert_if_not=setA_repo_revision)
 164       for source_dir in setB_dirs:
 165         self._copy_dir_contents(source_dir=source_dir, dest_dir=setB_root)
 166         setB_repo_revision = self._get_repo_revision(
 167             source_dir=source_dir, assert_if_not=setB_repo_revision)
 168
 169       self._setA_descriptions = {
 170           results.KEY__SET_DESCRIPTIONS__DIR: setA_dirs,
 171           results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setA_repo_revision,
 172           results.KEY__SET_DESCRIPTIONS__SECTION: setA_section,
 173       }
 174       self._setB_descriptions = {
 175           results.KEY__SET_DESCRIPTIONS__DIR: setB_dirs,
 176           results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setB_repo_revision,
 177           results.KEY__SET_DESCRIPTIONS__SECTION: setB_section,
 178       }
 179
 180       time_start = int(time.time())
 181       self._results = self._load_result_pairs(
 182           setA_root=setA_root, setB_root=setB_root,
 183           setA_section=setA_section, setB_section=setB_section)
 184       if self._results:
 185         self._timestamp = int(time.time())
 186         logging.info('Number of download file collisions: %s' %
 187                      imagediffdb.global_file_collisions)
 188         logging.info('Results complete; took %d seconds.' %
 189                      (self._timestamp - time_start))
 190     finally:
 191       shutil.rmtree(tempdir)
 192
 193   def _load_result_pairs(self, setA_root, setB_root,
 194                          setA_section, setB_section):
 195     """Loads all JSON image summaries from 2 directory trees and compares them.
 196
 197     TODO(stephana): This method is only called from within __init__(); it might
 198     make more sense to just roll the content of this method into __init__().
 199
 200     Args:
 201       setA_root: root directory containing JSON summaries of rendering results
 202       setB_root: root directory containing JSON summaries of rendering results
 203       setA_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
 204           gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setA
 205       setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
 206           gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
 207
 208     Returns the summary of all image diff results (or None, depending on
 209     self._prefetch_only).
 210     """
 211     logging.info('Reading JSON image summaries from dirs %s and %s...' % (
 212         setA_root, setB_root))
 213     setA_dicts = self._read_dicts_from_root(setA_root)
 214     setB_dicts = self._read_dicts_from_root(setB_root)
 215     logging.info('Comparing summary dicts...')
 216
 217     all_image_pairs = imagepairset.ImagePairSet(
 218         descriptions=(self._setA_label, self._setB_label),
 219         diff_base_url=self._diff_base_url)
 220     failing_image_pairs = imagepairset.ImagePairSet(
 221         descriptions=(self._setA_label, self._setB_label),
 222         diff_base_url=self._diff_base_url)
 223
 224     # Override settings for columns that should be filtered using freeform text.
 225     for column_id in FREEFORM_COLUMN_IDS:
 226       factory = column.ColumnHeaderFactory(
 227           header_text=column_id, use_freeform_filter=True)
 228       all_image_pairs.set_column_header_factory(
 229           column_id=column_id, column_header_factory=factory)
 230       failing_image_pairs.set_column_header_factory(
 231           column_id=column_id, column_header_factory=factory)
 232
 233     all_image_pairs.ensure_extra_column_values_in_summary(
 234         column_id=COLUMN__RESULT_TYPE, values=[
 235             results.KEY__RESULT_TYPE__FAILED,
 236             results.KEY__RESULT_TYPE__NOCOMPARISON,
 237             results.KEY__RESULT_TYPE__SUCCEEDED,
 238         ])
 239     failing_image_pairs.ensure_extra_column_values_in_summary(
 240         column_id=COLUMN__RESULT_TYPE, values=[
 241             results.KEY__RESULT_TYPE__FAILED,
 242             results.KEY__RESULT_TYPE__NOCOMPARISON,
 243         ])
 244
 245     logging.info('Starting to add imagepairs to queue.')
 246     self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
 247
 248     union_dict_paths = sorted(set(setA_dicts.keys() + setB_dicts.keys()))
 249     num_union_dict_paths = len(union_dict_paths)
 250     dict_num = 0
 251     for dict_path in union_dict_paths:
 252       dict_num += 1
 253       logging.info(
 254           'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
 255           (dict_num, num_union_dict_paths, dict_path))
 256
 257       dictA = self.get_default(setA_dicts, None, dict_path)
 258       self._validate_dict_version(dictA)
 259       dictA_results = self.get_default(dictA, {}, setA_section)
 260
 261       dictB = self.get_default(setB_dicts, None, dict_path)
 262       self._validate_dict_version(dictB)
 263       dictB_results = self.get_default(dictB, {}, setB_section)
 264
 265       skp_names = sorted(set(dictA_results.keys() + dictB_results.keys()))
 266       # Just for manual testing... truncate to an arbitrary subset.
 267       if self.truncate_results:
 268         skp_names = skp_names[1:3]
 269       for skp_name in skp_names:
 270         imagepairs_for_this_skp = []
 271
 272         whole_image_A = self.get_default(
 273             dictA_results, None,
 274             skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
 275         whole_image_B = self.get_default(
 276             dictB_results, None,
 277             skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
 278         imagepairs_for_this_skp.append(self._create_image_pair(
 279             image_dict_A=whole_image_A, image_dict_B=whole_image_B,
 280             source_skp_name=skp_name, tilenum=None))
 281
 282         tiled_images_A = self.get_default(
 283             dictA_results, [],
 284             skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
 285         tiled_images_B = self.get_default(
 286             dictB_results, [],
 287             skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
 288         if tiled_images_A or tiled_images_B:
 289           num_tiles_A = len(tiled_images_A)
 290           num_tiles_B = len(tiled_images_B)
 291           num_tiles = max(num_tiles_A, num_tiles_B)
 292           for tile_num in range(num_tiles):
 293             imagepairs_for_this_skp.append(self._create_image_pair(
 294                 image_dict_A=(tiled_images_A[tile_num]
 295                               if tile_num < num_tiles_A else None),
 296                 image_dict_B=(tiled_images_B[tile_num]
 297                               if tile_num < num_tiles_B else None),
 298                 source_skp_name=skp_name, tilenum=tile_num))
 299
 300         for one_imagepair in imagepairs_for_this_skp:
 301           if one_imagepair:
 302             all_image_pairs.add_image_pair(one_imagepair)
 303             result_type = one_imagepair.extra_columns_dict\
 304                 [COLUMN__RESULT_TYPE]
 305             if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
 306               failing_image_pairs.add_image_pair(one_imagepair)
 307
 308     logging.info('Finished adding imagepairs to queue.')
 309     self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
 310
 311     if self._prefetch_only:
 312       return None
 313     else:
 314       return {
 315           results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
 316               column_ids_in_order=ORDERED_COLUMN_IDS),
 317           results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
 318               column_ids_in_order=ORDERED_COLUMN_IDS),
 319       }
 320
 321   def _validate_dict_version(self, result_dict):
 322     """Raises Exception if the dict is not the type/version we know how to read.
 323
 324     Args:
 325       result_dict: dictionary holding output of render_pictures; if None,
 326           this method will return without raising an Exception
 327     """
 328     expected_header_type = 'ChecksummedImages'
 329     expected_header_revision = 1
 330
 331     if result_dict == None:
 332       return
 333     header = result_dict[gm_json.JSONKEY_HEADER]
 334     header_type = header[gm_json.JSONKEY_HEADER_TYPE]
 335     if header_type != expected_header_type:
 336       raise Exception('expected header_type "%s", but got "%s"' % (
 337           expected_header_type, header_type))
 338     header_revision = header[gm_json.JSONKEY_HEADER_REVISION]
 339     if header_revision != expected_header_revision:
 340       raise Exception('expected header_revision %d, but got %d' % (
 341           expected_header_revision, header_revision))
 342
 343   def _create_image_pair(self, image_dict_A, image_dict_B, source_skp_name,
 344                          tilenum):
 345     """Creates an ImagePair object for this pair of images.
 346
 347     Args:
 348       image_dict_A: dict with JSONKEY_IMAGE_* keys, or None if no image
 349       image_dict_B: dict with JSONKEY_IMAGE_* keys, or None if no image
 350       source_skp_name: string; name of the source SKP file
 351       tilenum: which tile, or None if a wholeimage
 352
 353     Returns:
 354       An ImagePair object, or None if both image_dict_A and image_dict_B are
 355       None.
 356     """
 357     if (not image_dict_A) and (not image_dict_B):
 358       return None
 359
 360     def _checksum_and_relative_url(dic):
 361       if dic:
 362         return ((dic[gm_json.JSONKEY_IMAGE_CHECKSUMALGORITHM],
 363                  dic[gm_json.JSONKEY_IMAGE_CHECKSUMVALUE]),
 364                 dic[gm_json.JSONKEY_IMAGE_FILEPATH])
 365       else:
 366         return None, None
 367
 368     imageA_checksum, imageA_relative_url = _checksum_and_relative_url(
 369         image_dict_A)
 370     imageB_checksum, imageB_relative_url = _checksum_and_relative_url(
 371         image_dict_B)
 372
 373     if not imageA_checksum:
 374       result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
 375     elif not imageB_checksum:
 376       result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
 377     elif imageA_checksum == imageB_checksum:
 378       result_type = results.KEY__RESULT_TYPE__SUCCEEDED
 379     else:
 380       result_type = results.KEY__RESULT_TYPE__FAILED
 381
 382     extra_columns_dict = {
 383         COLUMN__RESULT_TYPE: result_type,
 384         COLUMN__SOURCE_SKP: source_skp_name,
 385     }
 386     if tilenum == None:
 387       extra_columns_dict[COLUMN__TILED_OR_WHOLE] = 'whole'
 388       extra_columns_dict[COLUMN__TILENUM] = 'N/A'
 389     else:
 390       extra_columns_dict[COLUMN__TILED_OR_WHOLE] = 'tiled'
 391       extra_columns_dict[COLUMN__TILENUM] = str(tilenum)
 392
 393     try:
 394       return imagepair.ImagePair(
 395           image_diff_db=self._image_diff_db,
 396           base_url=self._image_base_gs_url,
 397           imageA_relative_url=imageA_relative_url,
 398           imageB_relative_url=imageB_relative_url,
 399           extra_columns=extra_columns_dict,
 400           download_all_images=self._download_all_images)
 401     except (KeyError, TypeError):
 402       logging.exception(
 403           'got exception while creating ImagePair for'
 404           ' urlPair=("%s","%s"), source_skp_name="%s", tilenum="%s"' % (
 405               imageA_relative_url, imageB_relative_url, source_skp_name,
 406               tilenum))
 407       return None
 408
 409   def _copy_dir_contents(self, source_dir, dest_dir):
 410     """Copy all contents of source_dir into dest_dir, recursing into subdirs.
 411
 412     Args:
 413       source_dir: path to source dir (GS URL, local filepath, or a special
 414           "repo:" URL type that points at a file within our Skia checkout)
 415       dest_dir: path to destination dir (local filepath)
 416
 417     The copy operates as a "merge with overwrite": any files in source_dir will
 418     be "overlaid" on top of the existing content in dest_dir.  Existing files
 419     with the same names will be overwritten.
 420     """
 421     if gs_utils.GSUtils.is_gs_url(source_dir):
 422       (bucket, path) = gs_utils.GSUtils.split_gs_url(source_dir)
 423       self._gs.download_dir_contents(source_bucket=bucket, source_dir=path,
 424                                      dest_dir=dest_dir)
 425     elif source_dir.lower().startswith(REPO_URL_PREFIX):
 426       repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
 427       shutil.copytree(repo_dir, dest_dir)
 428     else:
 429       shutil.copytree(source_dir, dest_dir)
 430
 431   def _get_repo_revision(self, source_dir, assert_if_not=None):
 432     """Get the commit hash of source_dir, IF it refers to a git checkout.
 433
 434     Args:
 435       source_dir: path to source dir (GS URL, local filepath, or a special
 436           "repo:" URL type that points at a file within our Skia checkout;
 437           only the "repo:" URL type will have a commit hash.
 438       assert_if_not: if not None, raise an Exception if source_dir has a
 439           commit hash and that hash is not equal to this
 440     """
 441     if source_dir.lower().startswith(REPO_URL_PREFIX):
 442       repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
 443       revision = subprocess.check_output(
 444           args=[git_utils.GIT, 'rev-parse', 'HEAD'], cwd=repo_dir).strip()
 445       if assert_if_not and revision != assert_if_not:
 446         raise Exception('found revision %s that did not match %s' % (
 447             revision, assert_if_not))
 448       return revision
 449     else:
 450       return None