src/tools/telemetry/telemetry/page/page_set_archive_info.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import json
   6 import logging
   7 import os
   8 import re
   9 import shutil
  10 import tempfile
  11
  12 from telemetry.util import cloud_storage
  13
  14
  15 class PageSetArchiveInfo(object):
  16   def __init__(self, file_path, data, ignore_archive=False):
  17     self._file_path = file_path
  18     self._base_dir = os.path.dirname(file_path)
  19
  20     # Ensure directory exists.
  21     if not os.path.exists(self._base_dir):
  22       os.makedirs(self._base_dir)
  23
  24     # Download all .wpr files.
  25     if not ignore_archive:
  26       # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed.
  27       log_cloud_storage_exception = True
  28       for archive_path in data['archives']:
  29         archive_path = self._WprFileNameToPath(archive_path)
  30         try:
  31           cloud_storage.GetIfChanged(archive_path)
  32         except (cloud_storage.CredentialsError,
  33                 cloud_storage.PermissionError) as e:
  34           if os.path.exists(archive_path):
  35             # If the archive exists, assume the user recorded their own and
  36             # simply warn.
  37             logging.warning('Need credentials to update WPR archive: %s',
  38                             archive_path)
  39           elif log_cloud_storage_exception:
  40             # Log access errors only once, as they should stay the same in other
  41             # iterations.
  42             log_cloud_storage_exception = False
  43             logging.warning('Error getting WPR archive %s: %s ' %
  44                                 (archive_path, str(e)))
  45             logging.info(
  46                 'HOME: "%s"; USER: "%s"' %
  47                 (os.environ.get('HOME', ''), os.environ.get('USER', '')))
  48
  49     # Map from the relative path (as it appears in the metadata file) of the
  50     # .wpr file to a list of page names it supports.
  51     self._wpr_file_to_page_names = data['archives']
  52
  53     # Map from the page name to a relative path (as it appears in the metadata
  54     # file) of the .wpr file.
  55     self._page_name_to_wpr_file = dict()
  56     # Find out the wpr file names for each page.
  57     for wpr_file in data['archives']:
  58       page_names = data['archives'][wpr_file]
  59       for page_name in page_names:
  60         self._page_name_to_wpr_file[page_name] = wpr_file
  61     self.temp_target_wpr_file_path = None
  62
  63   @classmethod
  64   def FromFile(cls, file_path, ignore_archive=False):
  65     if os.path.exists(file_path):
  66       with open(file_path, 'r') as f:
  67         data = json.load(f)
  68         return cls(file_path, data, ignore_archive=ignore_archive)
  69     # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed.
  70     logging.warning('Page set archives not found: %s' % file_path)
  71     return cls(file_path, {'archives': {}}, ignore_archive=ignore_archive)
  72
  73   def WprFilePathForPage(self, page):
  74     if self.temp_target_wpr_file_path:
  75       return self.temp_target_wpr_file_path
  76     wpr_file = self._page_name_to_wpr_file.get(page.display_name, None)
  77     if wpr_file is None:
  78       # Some old page sets always use the URL to identify a page rather than the
  79       # display_name, so try to look for that.
  80       wpr_file = self._page_name_to_wpr_file.get(page.url, None)
  81     if wpr_file:
  82       return self._WprFileNameToPath(wpr_file)
  83     return None
  84
  85   def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
  86     if temp_wpr_file_path is None:
  87       temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
  88       os.close(temp_wpr_file_handle)
  89     self.temp_target_wpr_file_path = temp_wpr_file_path
  90
  91   def AddRecordedPages(self, pages):
  92     if not pages:
  93       os.remove(self.temp_target_wpr_file_path)
  94       return
  95
  96     (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
  97     for page in pages:
  98       self._SetWprFileForPage(page.display_name, target_wpr_file)
  99     shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
 100
 101     # Update the hash file.
 102     with open(target_wpr_file_path + '.sha1', 'wb') as f:
 103       f.write(cloud_storage.CalculateHash(target_wpr_file_path))
 104       f.flush()
 105
 106     self._WriteToFile()
 107     self._DeleteAbandonedWprFiles()
 108
 109   def _DeleteAbandonedWprFiles(self):
 110     # Update the metadata so that the abandoned wpr files don't have empty page
 111     # name arrays.
 112     abandoned_wpr_files = self._AbandonedWprFiles()
 113     for wpr_file in abandoned_wpr_files:
 114       del self._wpr_file_to_page_names[wpr_file]
 115       # Don't fail if we're unable to delete some of the files.
 116       wpr_file_path = self._WprFileNameToPath(wpr_file)
 117       try:
 118         os.remove(wpr_file_path)
 119       except Exception:
 120         logging.warning('Failed to delete file: %s' % wpr_file_path)
 121
 122   def _AbandonedWprFiles(self):
 123     abandoned_wpr_files = []
 124     for wpr_file, page_names in self._wpr_file_to_page_names.iteritems():
 125       if not page_names:
 126         abandoned_wpr_files.append(wpr_file)
 127     return abandoned_wpr_files
 128
 129   def _WriteToFile(self):
 130     """Writes the metadata into the file passed as constructor parameter."""
 131     metadata = dict()
 132     metadata['description'] = (
 133         'Describes the Web Page Replay archives for a page set. Don\'t edit by '
 134         'hand! Use record_wpr for updating.')
 135     metadata['archives'] = self._wpr_file_to_page_names.copy()
 136     # Don't write data for abandoned archives.
 137     abandoned_wpr_files = self._AbandonedWprFiles()
 138     for wpr_file in abandoned_wpr_files:
 139       del metadata['archives'][wpr_file]
 140
 141     with open(self._file_path, 'w') as f:
 142       json.dump(metadata, f, indent=4)
 143       f.flush()
 144
 145   def _WprFileNameToPath(self, wpr_file):
 146     return os.path.abspath(os.path.join(self._base_dir, wpr_file))
 147
 148   def _NextWprFileName(self):
 149     """Creates a new file name for a wpr archive file."""
 150     # The names are of the format "some_thing_number.wpr". Read the numbers.
 151     highest_number = -1
 152     base = None
 153     for wpr_file in self._wpr_file_to_page_names:
 154       match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
 155       if not match:
 156         raise Exception('Illegal wpr file name ' + wpr_file)
 157       highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
 158       if base and match.groupdict()['BASE'] != base:
 159         raise Exception('Illegal wpr file name ' + wpr_file +
 160                         ', doesn\'t begin with ' + base)
 161       base = match.groupdict()['BASE']
 162     if not base:
 163       # If we're creating a completely new info file, use the base name of the
 164       # page set file.
 165       base = os.path.splitext(os.path.basename(self._file_path))[0]
 166     new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
 167     return new_filename, self._WprFileNameToPath(new_filename)
 168
 169   def _SetWprFileForPage(self, page_name, wpr_file):
 170     """For modifying the metadata when we're going to record a new archive."""
 171     old_wpr_file = self._page_name_to_wpr_file.get(page_name, None)
 172     if old_wpr_file:
 173       self._wpr_file_to_page_names[old_wpr_file].remove(page_name)
 174     self._page_name_to_wpr_file[page_name] = wpr_file
 175     if wpr_file not in self._wpr_file_to_page_names:
 176       self._wpr_file_to_page_names[wpr_file] = []
 177     self._wpr_file_to_page_names[wpr_file].append(page_name)