rebaseline_server: add "prefetch" directive that just warms the cache without awaitin...

author epoger <epoger@google.com>

Wed, 6 Aug 2014 17:56:50 +0000 (10:56 -0700)

committer Commit bot <commit-bot@chromium.org>

Wed, 6 Aug 2014 17:56:51 +0000 (10:56 -0700)
author epoger <epoger@google.com>
Wed, 6 Aug 2014 17:56:50 +0000 (10:56 -0700)
committer Commit bot <commit-bot@chromium.org>
Wed, 6 Aug 2014 17:56:51 +0000 (10:56 -0700)
diff --git a/gm/rebaseline_server/compare_rendered_pictures.py b/gm/rebaseline_server/compare_rendered_pictures.py

index ed36c7c9dfb17024d959b3d0da5d5f3c874ebb49..4f8f6cd0769a861ffffd0c2dafc49e7cfb1b6ddb 100755 (executable)
--- a/gm/rebaseline_server/compare_rendered_pictures.py
+++ b/gm/rebaseline_server/compare_rendered_pictures.py
@@ -70,8 +70,15 @@ class RenderedPicturesComparisons(results.BaseComparisons):
                 image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL,
                 diff_base_url=None, setA_label='setA',
                 setB_label='setB', gs=None,
-               truncate_results=False):
-    """
+               truncate_results=False, prefetch_only=False,
+               download_all_images=False):
+    """Constructor: downloads images and generates diffs.
+
+    Once the object has been created (which may take a while), you can call its
+    get_packaged_results_of_type() method to quickly retrieve the results...
+    unless you have set prefetch_only to True, in which case we will
+    asynchronously warm up the ImageDiffDB cache but not fill in self._results.
+
      Args:
        setA_dirs: list of root directories to copy all JSON summaries from,
            and to use as setA within the comparisons. These directories may be
@@ -93,6 +100,14 @@ class RenderedPicturesComparisons(results.BaseComparisons):
        gs: instance of GSUtils object we can use to download summary files
        truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
            we process, to speed up testing.
+      prefetch_only: if True, return the new object as quickly as possible
+          with empty self._results (just queue up all the files to process,
+          don't wait around for them to be processed and recorded); otherwise,
+          block until the results have been assembled and recorded in
+          self._results.
+      download_all_images: if True, download all images, even if we don't
+          need them to generate diffs.  This will take much longer to complete,
+          but is useful for warming up the bitmap cache on local disk.
      """
      super(RenderedPicturesComparisons, self).__init__()
      self._image_diff_db = image_diff_db
@@ -104,6 +119,8 @@ class RenderedPicturesComparisons(results.BaseComparisons):
      self._setB_label = setB_label
      self._gs = gs
      self.truncate_results = truncate_results
+    self._prefetch_only = prefetch_only
+    self._download_all_images = download_all_images
  
      tempdir = tempfile.mkdtemp()
      try:
@@ -121,11 +138,12 @@ class RenderedPicturesComparisons(results.BaseComparisons):
        self._results = self._load_result_pairs(
            setA_root=setA_root, setA_section=gm_json.JSONKEY_ACTUALRESULTS,
            setB_root=setB_root, setB_section=gm_json.JSONKEY_ACTUALRESULTS)
-      self._timestamp = int(time.time())
-      logging.info('Number of download file collisions: %s' %
-                   imagediffdb.global_file_collisions)
-      logging.info('Results complete; took %d seconds.' %
-                   (self._timestamp - time_start))
+      if self._results:
+        self._timestamp = int(time.time())
+        logging.info('Number of download file collisions: %s' %
+                     imagediffdb.global_file_collisions)
+        logging.info('Results complete; took %d seconds.' %
+                     (self._timestamp - time_start))
      finally:
        shutil.rmtree(tempdir)
  
@@ -141,7 +159,8 @@ class RenderedPicturesComparisons(results.BaseComparisons):
        setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
            gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
  
-    Returns the summary of all image diff results.
+    Returns the summary of all image diff results (or None, depending on
+    self._prefetch_only).
      """
      logging.info('Reading JSON image summaries from dirs %s and %s...' % (
          setA_root, setB_root))
@@ -182,8 +201,9 @@ class RenderedPicturesComparisons(results.BaseComparisons):
      dict_num = 0
      for dict_path in union_dict_paths:
        dict_num += 1
-      logging.info('Generating pixel diffs for dict #%d of %d, "%s"...' %
-                   (dict_num, num_union_dict_paths, dict_path))
+      logging.info(
+          'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
+          (dict_num, num_union_dict_paths, dict_path))
  
        dictA = self.get_default(setA_dicts, None, dict_path)
        self._validate_dict_version(dictA)
@@ -235,12 +255,15 @@ class RenderedPicturesComparisons(results.BaseComparisons):
              if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
                failing_image_pairs.add_image_pair(one_imagepair)
  
-    return {
-      results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
-          column_ids_in_order=ORDERED_COLUMN_IDS),
-      results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
-          column_ids_in_order=ORDERED_COLUMN_IDS),
-    }
+    if self._prefetch_only:
+      return None
+    else:
+      return {
+          results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
+              column_ids_in_order=ORDERED_COLUMN_IDS),
+          results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
+              column_ids_in_order=ORDERED_COLUMN_IDS),
+      }
  
    def _validate_dict_version(self, result_dict):
      """Raises Exception if the dict is not the type/version we know how to read.
@@ -320,7 +343,8 @@ class RenderedPicturesComparisons(results.BaseComparisons):
            base_url=self._image_base_gs_url,
            imageA_relative_url=imageA_relative_url,
            imageB_relative_url=imageB_relative_url,
-          extra_columns=extra_columns_dict)
+          extra_columns=extra_columns_dict,
+          download_all_images=self._download_all_images)
      except (KeyError, TypeError):
        logging.exception(
            'got exception while creating ImagePair for'
diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py

index fbe71211401f812775bc7fe6a55bdacda971e13c..1f939407a8a2e93b2af7d89031f0506a8b39f307 100644 (file)
--- a/gm/rebaseline_server/imagediffdb.py
+++ b/gm/rebaseline_server/imagediffdb.py
@@ -101,30 +101,29 @@ class DiffRecord(object):
      actual_image_file = os.path.join(
          storage_root, actual_images_subdir,
          str(actual_image_locator) + image_suffix)
-    try:
-      _download_file(gs, expected_image_file, expected_image_url)
-    except Exception:
-      logging.exception('unable to download expected_image_url %s to file %s' %
-                        (expected_image_url, expected_image_file))
-      raise
-    try:
-      _download_file(gs, actual_image_file, actual_image_url)
-    except Exception:
-      logging.exception('unable to download actual_image_url %s to file %s' %
-                        (actual_image_url, actual_image_file))
-      raise
-
-    # Get all diff images and values from skpdiff binary.
+    for image_file, image_url in [
+        (expected_image_file, expected_image_url),
+        (actual_image_file, actual_image_url)]:
+      if image_file and image_url:
+        try:
+          _download_file(gs, image_file, image_url)
+        except Exception:
+          logging.exception('unable to download image_url %s to file %s' %
+                            (image_url, image_file))
+          raise
+
+    # Return early if we do not need to generate diffs.
+    if (expected_image_url == actual_image_url or
+        not expected_image_url or not actual_image_url):
+      return
+
+    # Get all diff images and values using the skpdiff binary.
      skpdiff_output_dir = tempfile.mkdtemp()
      try:
        skpdiff_summary_file = os.path.join(skpdiff_output_dir,
                                            'skpdiff-output.json')
        skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
        skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
-      expected_img = os.path.join(storage_root, expected_images_subdir,
-                                  str(expected_image_locator) + image_suffix)
-      actual_img = os.path.join(storage_root, actual_images_subdir,
-                                str(actual_image_locator) + image_suffix)
  
        # TODO(epoger): Consider calling skpdiff ONCE for all image pairs,
        # instead of calling it separately for each image pair.
@@ -133,7 +132,7 @@ class DiffRecord(object):
        # Con: we would have to wait until all image pairs were loaded before
        # generating any of the diffs?
        find_run_binary.run_command(
-          [SKPDIFF_BINARY, '-p', expected_img, actual_img,
+          [SKPDIFF_BINARY, '-p', expected_image_file, actual_image_file,
             '--jsonp', 'false',
             '--output', skpdiff_summary_file,
             '--differs', 'perceptual', 'different_pixels',
@@ -261,7 +260,9 @@ class ImageDiffDB(object):
  
      # Set up the queue for asynchronously loading DiffRecords, and start the
      # worker threads reading from it.
-    self._tasks_queue = Queue.Queue(maxsize=2*num_worker_threads)
+    # The queue maxsize must be 0 (infinite size queue), so that asynchronous
+    # calls can return as soon as possible.
+    self._tasks_queue = Queue.Queue(maxsize=0)
      self._workers = []
      for i in range(num_worker_threads):
        worker = threading.Thread(target=self.worker, args=(i,))
@@ -310,6 +311,9 @@ class ImageDiffDB(object):
      If we already have a DiffRecord for this particular image pair, no work
      will be done.
  
+    If expected_image_url (or its locator) is None, just download actual_image.
+    If actual_image_url (or its locator) is None, just download expected_image.
+
      Args:
        expected_image_url: file, GS, or HTTP url from which we will download the
            expected image
@@ -435,10 +439,14 @@ def _sanitize_locator(locator):
    characters will have special meaning in filenames).
  
    Args:
-    locator: string, or something that can be represented as a string
+    locator: string, or something that can be represented as a string.
+        If None or '', it is returned without modification, because empty
+        locators have a particular meaning ("there is no image for this")
    """
-  return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
-
+  if locator:
+    return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
+  else:
+    return locator
  
  def _get_difference_locator(expected_image_locator, actual_image_locator):
    """Returns the locator string used to look up the diffs between expected_image
diff --git a/gm/rebaseline_server/imagepair.py b/gm/rebaseline_server/imagepair.py

index 12d6718857aa68c5afb221aa4286ee3e15ce36ae..d9c4cb82b98bd13fb855d2ff9f6e1abf8d26c6cc 100644 (file)
--- a/gm/rebaseline_server/imagepair.py
+++ b/gm/rebaseline_server/imagepair.py
@@ -32,7 +32,8 @@ class ImagePair(object):
  
    def __init__(self, image_diff_db,
                 base_url, imageA_relative_url, imageB_relative_url,
-               expectations=None, extra_columns=None):
+               expectations=None, extra_columns=None,
+               download_all_images=False):
      """
      Args:
        image_diff_db: ImageDiffDB instance we use to generate/store image diffs
@@ -45,6 +46,9 @@ class ImagePair(object):
            metadata (ignore-failure, bug numbers, etc.)
        extra_columns: optional dictionary containing more metadata (test name,
            builder name, etc.)
+      download_all_images: if True, download any images associated with this
+          image pair, even if we don't need them to generate diffs
+          (imageA == imageB, or one of them is missing)
      """
      self._image_diff_db = image_diff_db
      self.base_url = base_url
@@ -63,11 +67,13 @@ class ImagePair(object):
        # Later on, we will call image_diff_db.get_diff_record() to find it.
        self._is_different = True
        self._diff_record = _DIFF_RECORD_STILL_LOADING
+
+    if self._diff_record != None or download_all_images:
        image_diff_db.add_image_pair(
            expected_image_locator=imageA_relative_url,
-          expected_image_url=posixpath.join(base_url, imageA_relative_url),
+          expected_image_url=self.posixpath_join(base_url, imageA_relative_url),
            actual_image_locator=imageB_relative_url,
-          actual_image_url=posixpath.join(base_url, imageB_relative_url))
+          actual_image_url=self.posixpath_join(base_url, imageB_relative_url))
  
    def as_dict(self):
      """Returns a dictionary describing this ImagePair.
@@ -97,3 +103,14 @@ class ImagePair(object):
      if self._diff_record != None:
        asdict[KEY__IMAGEPAIRS__DIFFERENCES] = self._diff_record.as_dict()
      return asdict
+
+  @staticmethod
+  def posixpath_join(*args):
+    """Wrapper around posixpath.join().
+
+    Returns posixpath.join(*args), or None if any arg is None.
+    """
+    for arg in args:
+      if arg == None:
+        return None
+    return posixpath.join(*args)
diff --git a/gm/rebaseline_server/imagepair_test.py b/gm/rebaseline_server/imagepair_test.py

index ef7695acacc80af6e9873cb4e3ea8aba42de36ba..5a9f42484834818750475861076a0235a1d8606c 100755 (executable)
--- a/gm/rebaseline_server/imagepair_test.py
+++ b/gm/rebaseline_server/imagepair_test.py
@@ -161,6 +161,23 @@ class ImagePairTest(unittest.TestCase):
                  'isDifferent': True,
              },
          ],
+
+        # One of the two images is missing, but download_all_images=True so we
+        # should download it anyway.
+        [
+            # inputs:
+            None,
+            'arcofzorro/13786535001616823825.png',
+            None,
+            None,
+            # expected output:
+            {
+                'imageAUrl': None,
+                'imageBUrl': 'arcofzorro/13786535001616823825.png',
+                'isDifferent': True,
+            },
+        ],
+
      ]
  
      db = imagediffdb.ImageDiffDB(self.temp_dir)
@@ -171,7 +188,8 @@ class ImagePairTest(unittest.TestCase):
            imageA_relative_url=selftest[0],
            imageB_relative_url=selftest[1],
            expectations=selftest[2],
-          extra_columns=selftest[3])
+          extra_columns=selftest[3],
+          download_all_images=True)
        self.assertEqual(image_pair.as_dict(), selftest[4])
  
  
diff --git a/gm/rebaseline_server/server.py b/gm/rebaseline_server/server.py

index 6271f97dd2fa97dbd0da68456b24f5f1fd255e57..03bbe74b01e3b327494e62976e9c3abe079cf0dd 100755 (executable)
--- a/gm/rebaseline_server/server.py
+++ b/gm/rebaseline_server/server.py
@@ -73,12 +73,7 @@ DEFAULT_PORT = 8888
  
  PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
  TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
-# Directory, relative to PARENT_DIRECTORY, within which the server will serve
-# out image diff data from within the precomputed _SERVER.results .
-PRECOMPUTED_RESULTS_SUBDIR = 'results'
-# Directory, relative to PARENT_DIRECTORY, within which the server will serve
-# out live-generated image diff data.
-LIVE_RESULTS_SUBDIR = 'live-results'
+
  # Directory, relative to PARENT_DIRECTORY, within which the server will serve
  # out static files.
  STATIC_CONTENTS_SUBDIR = 'static'
@@ -87,10 +82,19 @@ GENERATED_HTML_SUBDIR = 'generated-html'
  GENERATED_IMAGES_SUBDIR = 'generated-images'
  GENERATED_JSON_SUBDIR = 'generated-json'
  
+# Directives associated with various HTTP GET requests.
+GET__LIVE_RESULTS = 'live-results'
+GET__PRECOMPUTED_RESULTS = 'results'
+GET__PREFETCH_RESULTS = 'prefetch'
+GET__STATIC_CONTENTS = 'static'
+
  # Parameters we use within do_GET_live_results()
  LIVE_PARAM__SET_A_DIR = 'setADir'
  LIVE_PARAM__SET_B_DIR = 'setBDir'
  
+# Parameters we use within do_GET_prefetch_results()
+PREFETCH_PARAM__DOWNLOAD_ONLY_DIFFERING = 'downloadOnlyDifferingImages'
+
  # How often (in seconds) clients should reload while waiting for initial
  # results to load.
  RELOAD_INTERVAL_UNTIL_READY = 10
@@ -170,11 +174,11 @@ def _create_index(file_path, config_pairs):
        file_handle.write('<li>Expectations vs Actuals</li><ul>')
        for summary_type in SUMMARY_TYPES:
          file_handle.write(
-            '<li><a href="/{static_subdir}/view.html#/view.html?'
-            'resultsToLoad=/{results_subdir}/{summary_type}">'
+            '<li><a href="/{static_directive}/view.html#/view.html?'
+            'resultsToLoad=/{results_directive}/{summary_type}">'
              '{summary_type}</a></li>'.format(
-                results_subdir=PRECOMPUTED_RESULTS_SUBDIR,
-                static_subdir=STATIC_CONTENTS_SUBDIR,
+                results_directive=GET__PRECOMPUTED_RESULTS,
+                static_directive=GET__STATIC_CONTENTS,
                  summary_type=summary_type))
        file_handle.write('</ul>')
      if config_pairs:
@@ -185,7 +189,7 @@ def _create_index(file_path, config_pairs):
            file_handle.write(
                ' <a href="/%s/view.html#/view.html?'
                'resultsToLoad=/%s/%s/%s-vs-%s_%s.json">%s</a>' % (
-                  STATIC_CONTENTS_SUBDIR, STATIC_CONTENTS_SUBDIR,
+                  GET__STATIC_CONTENTS, GET__STATIC_CONTENTS,
                    GENERATED_JSON_SUBDIR, config_pair[0], config_pair[1],
                    summary_type, summary_type))
          file_handle.write('</li>')
@@ -461,10 +465,10 @@ class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
        logging.debug('do_GET: path="%s"' % self.path)
        if self.path == '' or self.path == '/' or self.path == '/index.html' :
          self.redirect_to('/%s/%s/index.html' % (
-            STATIC_CONTENTS_SUBDIR, GENERATED_HTML_SUBDIR))
+            GET__STATIC_CONTENTS, GENERATED_HTML_SUBDIR))
          return
        if self.path == '/favicon.ico' :
-        self.redirect_to('/%s/favicon.ico' % STATIC_CONTENTS_SUBDIR)
+        self.redirect_to('/%s/favicon.ico' % GET__STATIC_CONTENTS)
          return
  
        # All requests must be of this form:
@@ -474,9 +478,10 @@ class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
        normpath = posixpath.normpath(self.path)
        (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
        dispatchers = {
-          PRECOMPUTED_RESULTS_SUBDIR: self.do_GET_precomputed_results,
-          LIVE_RESULTS_SUBDIR: self.do_GET_live_results,
-          STATIC_CONTENTS_SUBDIR: self.do_GET_static,
+          GET__LIVE_RESULTS: self.do_GET_live_results,
+          GET__PRECOMPUTED_RESULTS: self.do_GET_precomputed_results,
+          GET__PREFETCH_RESULTS: self.do_GET_prefetch_results,
+          GET__STATIC_CONTENTS: self.do_GET_static,
        }
        dispatcher = dispatchers[dispatcher_name]
        dispatcher(remainder)
@@ -537,6 +542,26 @@ class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
      self.send_json_dict(results_obj.get_packaged_results_of_type(
          results_mod.KEY__HEADER__RESULTS_ALL))
  
+  def do_GET_prefetch_results(self, url_remainder):
+    """ Prefetch image diff data for a future do_GET_live_results() call.
+
+    Args:
+      url_remainder: string indicating which image diffs to generate
+    """
+    logging.debug('do_GET_prefetch_results: url_remainder="%s"' % url_remainder)
+    param_dict = urlparse.parse_qs(url_remainder)
+    download_all_images = (
+        param_dict.get(PREFETCH_PARAM__DOWNLOAD_ONLY_DIFFERING, [''])[0].lower()
+        not in ['1', 'true'])
+    compare_rendered_pictures.RenderedPicturesComparisons(
+        setA_dirs=param_dict[LIVE_PARAM__SET_A_DIR],
+        setB_dirs=param_dict[LIVE_PARAM__SET_B_DIR],
+        image_diff_db=_SERVER.image_diff_db,
+        diff_base_url='/static/generated-images',
+        gs=_SERVER.gs, truncate_results=_SERVER.truncate_results,
+        prefetch_only=True, download_all_images=download_all_images)
+    self.send_response(200)
+
    def do_GET_static(self, path):
      """ Handle a GET request for a file under STATIC_CONTENTS_SUBDIR .
      Only allow serving of files within STATIC_CONTENTS_SUBDIR that is a
author	epoger <epoger@google.com>
	Wed, 6 Aug 2014 17:56:50 +0000 (10:56 -0700)
committer	Commit bot <commit-bot@chromium.org>
	Wed, 6 Aug 2014 17:56:51 +0000 (10:56 -0700)
gm/rebaseline_server/compare_rendered_pictures.py		patch \| blob \| history
gm/rebaseline_server/imagediffdb.py		patch \| blob \| history
gm/rebaseline_server/imagepair.py		patch \| blob \| history
gm/rebaseline_server/imagepair_test.py		patch \| blob \| history
gm/rebaseline_server/server.py		patch \| blob \| history