4 Copyright 2014 Google Inc.
6 Use of this source code is governed by a BSD-style license that can be
7 found in the LICENSE file.
9 Compare results of two render_pictures runs.
11 TODO(epoger): Start using this module to compare ALL images (whether they
12 were generated from GMs or SKPs), and rename it accordingly.
15 # System-level imports
23 # Must fix up PYTHONPATH before importing from within Skia
24 import fix_pythonpath # pylint: disable=W0611
26 # Imports from within Skia
27 from py.utils import git_utils
28 from py.utils import gs_utils
29 from py.utils import url_utils
30 import buildbot_globals
38 # URL under which all render_pictures images can be found in Google Storage.
40 # TODO(epoger): In order to allow live-view of GMs and other images, read this
41 # from the input summary files, or allow the caller to set it within the
42 # GET_live_results call.
43 DEFAULT_IMAGE_BASE_GS_URL = 'gs://' + buildbot_globals.Get('skp_images_bucket')
45 # Column descriptors, and display preferences for them.
46 COLUMN__RESULT_TYPE = results.KEY__EXTRACOLUMNS__RESULT_TYPE
47 COLUMN__SOURCE_SKP = 'sourceSkpFile'
48 COLUMN__TILED_OR_WHOLE = 'tiledOrWhole'
49 COLUMN__TILENUM = 'tilenum'
50 FREEFORM_COLUMN_IDS = [
54 ORDERED_COLUMN_IDS = [
57 COLUMN__TILED_OR_WHOLE,
61 # A special "repo:" URL type that we use to refer to Skia repo contents.
62 # (Useful for comparing against expectations files we store in our repo.)
63 REPO_URL_PREFIX = 'repo:'
64 REPO_BASEPATH = os.path.abspath(os.path.join(
65 os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
67 # Which sections within a JSON summary file can contain results.
68 ALLOWED_SECTION_NAMES = [
69 gm_json.JSONKEY_ACTUALRESULTS,
70 gm_json.JSONKEY_EXPECTEDRESULTS,
74 class RenderedPicturesComparisons(results.BaseComparisons):
75 """Loads results from multiple render_pictures runs into an ImagePairSet.
80 setA_section, setB_section,
82 image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL, diff_base_url=None,
83 setA_label=None, setB_label=None,
84 gs=None, truncate_results=False, prefetch_only=False,
85 download_all_images=False):
86 """Constructor: downloads images and generates diffs.
88 Once the object has been created (which may take a while), you can call its
89 get_packaged_results_of_type() method to quickly retrieve the results...
90 unless you have set prefetch_only to True, in which case we will
91 asynchronously warm up the ImageDiffDB cache but not fill in self._results.
94 setA_dirs: list of root directories to copy all JSON summaries from,
95 and to use as setA within the comparisons. These directories may be
96 gs:// URLs, special "repo:" URLs, or local filepaths.
97 setB_dirs: list of root directories to copy all JSON summaries from,
98 and to use as setB within the comparisons. These directories may be
99 gs:// URLs, special "repo:" URLs, or local filepaths.
100 setA_section: which section within setA to examine; must be one of
101 ALLOWED_SECTION_NAMES
102 setB_section: which section within setB to examine; must be one of
103 ALLOWED_SECTION_NAMES
104 image_diff_db: ImageDiffDB instance
105 image_base_gs_url: "gs://" URL pointing at the Google Storage bucket/dir
106 under which all render_pictures result images can
107 be found; this will be used to read images for comparison within
108 this code, and included in the ImagePairSet (as an HTTP URL) so its
109 consumers know where to download the images from
110 diff_base_url: base URL within which the client should look for diff
111 images; if not specified, defaults to a "file:///" URL representation
112 of image_diff_db's storage_root
113 setA_label: description to use for results in setA; if None, will be
114 set to a reasonable default
115 setB_label: description to use for results in setB; if None, will be
116 set to a reasonable default
117 gs: instance of GSUtils object we can use to download summary files
118 truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
119 we process, to speed up testing.
120 prefetch_only: if True, return the new object as quickly as possible
121 with empty self._results (just queue up all the files to process,
122 don't wait around for them to be processed and recorded); otherwise,
123 block until the results have been assembled and recorded in
125 download_all_images: if True, download all images, even if we don't
126 need them to generate diffs. This will take much longer to complete,
127 but is useful for warming up the bitmap cache on local disk.
129 super(RenderedPicturesComparisons, self).__init__()
130 self._image_diff_db = image_diff_db
131 self._image_base_gs_url = image_base_gs_url
132 self._diff_base_url = (
134 url_utils.create_filepath_url(image_diff_db.storage_root))
136 self.truncate_results = truncate_results
137 self._prefetch_only = prefetch_only
138 self._download_all_images = download_all_images
140 # If we are comparing two different section types, we can use those
141 # as the default labels for setA and setB.
142 if setA_section != setB_section:
143 self._setA_label = setA_label or setA_section
144 self._setB_label = setB_label or setB_section
146 self._setA_label = setA_label or 'setA'
147 self._setB_label = setB_label or 'setB'
149 tempdir = tempfile.mkdtemp()
151 setA_root = os.path.join(tempdir, 'setA')
152 setB_root = os.path.join(tempdir, 'setB')
153 setA_repo_revision = None
154 setB_repo_revision = None
155 for source_dir in setA_dirs:
156 self._copy_dir_contents(source_dir=source_dir, dest_dir=setA_root)
157 # TODO(stephana): There is a potential race condition here... we copy
158 # the contents out of the source_dir, and THEN we get the commithash
159 # of source_dir. If source_dir points at a git checkout, and that
160 # checkout is updated (by a different thread/process) during this
161 # operation, then the contents and commithash will be out of sync.
162 setA_repo_revision = self._get_repo_revision(
163 source_dir=source_dir, assert_if_not=setA_repo_revision)
164 for source_dir in setB_dirs:
165 self._copy_dir_contents(source_dir=source_dir, dest_dir=setB_root)
166 setB_repo_revision = self._get_repo_revision(
167 source_dir=source_dir, assert_if_not=setB_repo_revision)
169 self._setA_descriptions = {
170 results.KEY__SET_DESCRIPTIONS__DIR: setA_dirs,
171 results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setA_repo_revision,
172 results.KEY__SET_DESCRIPTIONS__SECTION: setA_section,
174 self._setB_descriptions = {
175 results.KEY__SET_DESCRIPTIONS__DIR: setB_dirs,
176 results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setB_repo_revision,
177 results.KEY__SET_DESCRIPTIONS__SECTION: setB_section,
180 time_start = int(time.time())
181 self._results = self._load_result_pairs(
182 setA_root=setA_root, setB_root=setB_root,
183 setA_section=setA_section, setB_section=setB_section)
185 self._timestamp = int(time.time())
186 logging.info('Number of download file collisions: %s' %
187 imagediffdb.global_file_collisions)
188 logging.info('Results complete; took %d seconds.' %
189 (self._timestamp - time_start))
191 shutil.rmtree(tempdir)
193 def _load_result_pairs(self, setA_root, setB_root,
194 setA_section, setB_section):
195 """Loads all JSON image summaries from 2 directory trees and compares them.
197 TODO(stephana): This method is only called from within __init__(); it might
198 make more sense to just roll the content of this method into __init__().
201 setA_root: root directory containing JSON summaries of rendering results
202 setB_root: root directory containing JSON summaries of rendering results
203 setA_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
204 gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setA
205 setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
206 gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
208 Returns the summary of all image diff results (or None, depending on
209 self._prefetch_only).
211 logging.info('Reading JSON image summaries from dirs %s and %s...' % (
212 setA_root, setB_root))
213 setA_dicts = self._read_dicts_from_root(setA_root)
214 setB_dicts = self._read_dicts_from_root(setB_root)
215 logging.info('Comparing summary dicts...')
217 all_image_pairs = imagepairset.ImagePairSet(
218 descriptions=(self._setA_label, self._setB_label),
219 diff_base_url=self._diff_base_url)
220 failing_image_pairs = imagepairset.ImagePairSet(
221 descriptions=(self._setA_label, self._setB_label),
222 diff_base_url=self._diff_base_url)
224 # Override settings for columns that should be filtered using freeform text.
225 for column_id in FREEFORM_COLUMN_IDS:
226 factory = column.ColumnHeaderFactory(
227 header_text=column_id, use_freeform_filter=True)
228 all_image_pairs.set_column_header_factory(
229 column_id=column_id, column_header_factory=factory)
230 failing_image_pairs.set_column_header_factory(
231 column_id=column_id, column_header_factory=factory)
233 all_image_pairs.ensure_extra_column_values_in_summary(
234 column_id=COLUMN__RESULT_TYPE, values=[
235 results.KEY__RESULT_TYPE__FAILED,
236 results.KEY__RESULT_TYPE__NOCOMPARISON,
237 results.KEY__RESULT_TYPE__SUCCEEDED,
239 failing_image_pairs.ensure_extra_column_values_in_summary(
240 column_id=COLUMN__RESULT_TYPE, values=[
241 results.KEY__RESULT_TYPE__FAILED,
242 results.KEY__RESULT_TYPE__NOCOMPARISON,
245 logging.info('Starting to add imagepairs to queue.')
246 self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
248 union_dict_paths = sorted(set(setA_dicts.keys() + setB_dicts.keys()))
249 num_union_dict_paths = len(union_dict_paths)
251 for dict_path in union_dict_paths:
254 'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
255 (dict_num, num_union_dict_paths, dict_path))
257 dictA = self.get_default(setA_dicts, None, dict_path)
258 self._validate_dict_version(dictA)
259 dictA_results = self.get_default(dictA, {}, setA_section)
261 dictB = self.get_default(setB_dicts, None, dict_path)
262 self._validate_dict_version(dictB)
263 dictB_results = self.get_default(dictB, {}, setB_section)
265 skp_names = sorted(set(dictA_results.keys() + dictB_results.keys()))
266 # Just for manual testing... truncate to an arbitrary subset.
267 if self.truncate_results:
268 skp_names = skp_names[1:3]
269 for skp_name in skp_names:
270 imagepairs_for_this_skp = []
272 whole_image_A = self.get_default(
274 skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
275 whole_image_B = self.get_default(
277 skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
278 imagepairs_for_this_skp.append(self._create_image_pair(
279 image_dict_A=whole_image_A, image_dict_B=whole_image_B,
280 source_skp_name=skp_name, tilenum=None))
282 tiled_images_A = self.get_default(
284 skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
285 tiled_images_B = self.get_default(
287 skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
288 if tiled_images_A or tiled_images_B:
289 num_tiles_A = len(tiled_images_A)
290 num_tiles_B = len(tiled_images_B)
291 num_tiles = max(num_tiles_A, num_tiles_B)
292 for tile_num in range(num_tiles):
293 imagepairs_for_this_skp.append(self._create_image_pair(
294 image_dict_A=(tiled_images_A[tile_num]
295 if tile_num < num_tiles_A else None),
296 image_dict_B=(tiled_images_B[tile_num]
297 if tile_num < num_tiles_B else None),
298 source_skp_name=skp_name, tilenum=tile_num))
300 for one_imagepair in imagepairs_for_this_skp:
302 all_image_pairs.add_image_pair(one_imagepair)
303 result_type = one_imagepair.extra_columns_dict\
304 [COLUMN__RESULT_TYPE]
305 if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
306 failing_image_pairs.add_image_pair(one_imagepair)
308 logging.info('Finished adding imagepairs to queue.')
309 self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
311 if self._prefetch_only:
315 results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
316 column_ids_in_order=ORDERED_COLUMN_IDS),
317 results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
318 column_ids_in_order=ORDERED_COLUMN_IDS),
321 def _validate_dict_version(self, result_dict):
322 """Raises Exception if the dict is not the type/version we know how to read.
325 result_dict: dictionary holding output of render_pictures; if None,
326 this method will return without raising an Exception
328 expected_header_type = 'ChecksummedImages'
329 expected_header_revision = 1
331 if result_dict == None:
333 header = result_dict[gm_json.JSONKEY_HEADER]
334 header_type = header[gm_json.JSONKEY_HEADER_TYPE]
335 if header_type != expected_header_type:
336 raise Exception('expected header_type "%s", but got "%s"' % (
337 expected_header_type, header_type))
338 header_revision = header[gm_json.JSONKEY_HEADER_REVISION]
339 if header_revision != expected_header_revision:
340 raise Exception('expected header_revision %d, but got %d' % (
341 expected_header_revision, header_revision))
343 def _create_image_pair(self, image_dict_A, image_dict_B, source_skp_name,
345 """Creates an ImagePair object for this pair of images.
348 image_dict_A: dict with JSONKEY_IMAGE_* keys, or None if no image
349 image_dict_B: dict with JSONKEY_IMAGE_* keys, or None if no image
350 source_skp_name: string; name of the source SKP file
351 tilenum: which tile, or None if a wholeimage
354 An ImagePair object, or None if both image_dict_A and image_dict_B are
357 if (not image_dict_A) and (not image_dict_B):
360 def _checksum_and_relative_url(dic):
362 return ((dic[gm_json.JSONKEY_IMAGE_CHECKSUMALGORITHM],
363 dic[gm_json.JSONKEY_IMAGE_CHECKSUMVALUE]),
364 dic[gm_json.JSONKEY_IMAGE_FILEPATH])
368 imageA_checksum, imageA_relative_url = _checksum_and_relative_url(
370 imageB_checksum, imageB_relative_url = _checksum_and_relative_url(
373 if not imageA_checksum:
374 result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
375 elif not imageB_checksum:
376 result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
377 elif imageA_checksum == imageB_checksum:
378 result_type = results.KEY__RESULT_TYPE__SUCCEEDED
380 result_type = results.KEY__RESULT_TYPE__FAILED
382 extra_columns_dict = {
383 COLUMN__RESULT_TYPE: result_type,
384 COLUMN__SOURCE_SKP: source_skp_name,
387 extra_columns_dict[COLUMN__TILED_OR_WHOLE] = 'whole'
388 extra_columns_dict[COLUMN__TILENUM] = 'N/A'
390 extra_columns_dict[COLUMN__TILED_OR_WHOLE] = 'tiled'
391 extra_columns_dict[COLUMN__TILENUM] = str(tilenum)
394 return imagepair.ImagePair(
395 image_diff_db=self._image_diff_db,
396 base_url=self._image_base_gs_url,
397 imageA_relative_url=imageA_relative_url,
398 imageB_relative_url=imageB_relative_url,
399 extra_columns=extra_columns_dict,
400 download_all_images=self._download_all_images)
401 except (KeyError, TypeError):
403 'got exception while creating ImagePair for'
404 ' urlPair=("%s","%s"), source_skp_name="%s", tilenum="%s"' % (
405 imageA_relative_url, imageB_relative_url, source_skp_name,
409 def _copy_dir_contents(self, source_dir, dest_dir):
410 """Copy all contents of source_dir into dest_dir, recursing into subdirs.
413 source_dir: path to source dir (GS URL, local filepath, or a special
414 "repo:" URL type that points at a file within our Skia checkout)
415 dest_dir: path to destination dir (local filepath)
417 The copy operates as a "merge with overwrite": any files in source_dir will
418 be "overlaid" on top of the existing content in dest_dir. Existing files
419 with the same names will be overwritten.
421 if gs_utils.GSUtils.is_gs_url(source_dir):
422 (bucket, path) = gs_utils.GSUtils.split_gs_url(source_dir)
423 self._gs.download_dir_contents(source_bucket=bucket, source_dir=path,
425 elif source_dir.lower().startswith(REPO_URL_PREFIX):
426 repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
427 shutil.copytree(repo_dir, dest_dir)
429 shutil.copytree(source_dir, dest_dir)
431 def _get_repo_revision(self, source_dir, assert_if_not=None):
432 """Get the commit hash of source_dir, IF it refers to a git checkout.
435 source_dir: path to source dir (GS URL, local filepath, or a special
436 "repo:" URL type that points at a file within our Skia checkout;
437 only the "repo:" URL type will have a commit hash.
438 assert_if_not: if not None, raise an Exception if source_dir has a
439 commit hash and that hash is not equal to this
441 if source_dir.lower().startswith(REPO_URL_PREFIX):
442 repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
443 revision = subprocess.check_output(
444 args=[git_utils.GIT, 'rev-parse', 'HEAD'], cwd=repo_dir).strip()
445 if assert_if_not and revision != assert_if_not:
446 raise Exception('found revision %s that did not match %s' % (
447 revision, assert_if_not))