1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
12 from telemetry.util import cloud_storage
15 class PageSetArchiveInfo(object):
16 def __init__(self, file_path, data, ignore_archive=False):
17 self._file_path = file_path
18 self._base_dir = os.path.dirname(file_path)
20 # Ensure directory exists.
21 if not os.path.exists(self._base_dir):
22 os.makedirs(self._base_dir)
24 # Download all .wpr files.
25 if not ignore_archive:
26 # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed.
27 log_cloud_storage_exception = True
28 for archive_path in data['archives']:
29 archive_path = self._WprFileNameToPath(archive_path)
31 cloud_storage.GetIfChanged(archive_path)
32 except (cloud_storage.CredentialsError,
33 cloud_storage.PermissionError) as e:
34 if os.path.exists(archive_path):
35 # If the archive exists, assume the user recorded their own and
37 logging.warning('Need credentials to update WPR archive: %s',
39 elif log_cloud_storage_exception:
40 # Log access errors only once, as they should stay the same in other
42 log_cloud_storage_exception = False
43 logging.warning('Error getting WPR archive %s: %s ' %
44 (archive_path, str(e)))
46 'HOME: "%s"; USER: "%s"' %
47 (os.environ.get('HOME', ''), os.environ.get('USER', '')))
49 # Map from the relative path (as it appears in the metadata file) of the
50 # .wpr file to a list of page names it supports.
51 self._wpr_file_to_page_names = data['archives']
53 # Map from the page name to a relative path (as it appears in the metadata
54 # file) of the .wpr file.
55 self._page_name_to_wpr_file = dict()
56 # Find out the wpr file names for each page.
57 for wpr_file in data['archives']:
58 page_names = data['archives'][wpr_file]
59 for page_name in page_names:
60 self._page_name_to_wpr_file[page_name] = wpr_file
61 self.temp_target_wpr_file_path = None
64 def FromFile(cls, file_path, ignore_archive=False):
65 if os.path.exists(file_path):
66 with open(file_path, 'r') as f:
68 return cls(file_path, data, ignore_archive=ignore_archive)
69 # TODO(tbarzic): Remove this once http://crbug.com/351143 is diagnosed.
70 logging.warning('Page set archives not found: %s' % file_path)
71 return cls(file_path, {'archives': {}}, ignore_archive=ignore_archive)
73 def WprFilePathForPage(self, page):
74 if self.temp_target_wpr_file_path:
75 return self.temp_target_wpr_file_path
76 wpr_file = self._page_name_to_wpr_file.get(page.display_name, None)
78 # Some old page sets always use the URL to identify a page rather than the
79 # display_name, so try to look for that.
80 wpr_file = self._page_name_to_wpr_file.get(page.url, None)
82 return self._WprFileNameToPath(wpr_file)
85 def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
86 if temp_wpr_file_path is None:
87 temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp()
88 os.close(temp_wpr_file_handle)
89 self.temp_target_wpr_file_path = temp_wpr_file_path
91 def AddRecordedPages(self, pages):
93 os.remove(self.temp_target_wpr_file_path)
96 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
98 self._SetWprFileForPage(page.display_name, target_wpr_file)
99 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
101 # Update the hash file.
102 with open(target_wpr_file_path + '.sha1', 'wb') as f:
103 f.write(cloud_storage.CalculateHash(target_wpr_file_path))
107 self._DeleteAbandonedWprFiles()
109 def _DeleteAbandonedWprFiles(self):
110 # Update the metadata so that the abandoned wpr files don't have empty page
112 abandoned_wpr_files = self._AbandonedWprFiles()
113 for wpr_file in abandoned_wpr_files:
114 del self._wpr_file_to_page_names[wpr_file]
115 # Don't fail if we're unable to delete some of the files.
116 wpr_file_path = self._WprFileNameToPath(wpr_file)
118 os.remove(wpr_file_path)
120 logging.warning('Failed to delete file: %s' % wpr_file_path)
122 def _AbandonedWprFiles(self):
123 abandoned_wpr_files = []
124 for wpr_file, page_names in self._wpr_file_to_page_names.iteritems():
126 abandoned_wpr_files.append(wpr_file)
127 return abandoned_wpr_files
129 def _WriteToFile(self):
130 """Writes the metadata into the file passed as constructor parameter."""
132 metadata['description'] = (
133 'Describes the Web Page Replay archives for a page set. Don\'t edit by '
134 'hand! Use record_wpr for updating.')
135 metadata['archives'] = self._wpr_file_to_page_names.copy()
136 # Don't write data for abandoned archives.
137 abandoned_wpr_files = self._AbandonedWprFiles()
138 for wpr_file in abandoned_wpr_files:
139 del metadata['archives'][wpr_file]
141 with open(self._file_path, 'w') as f:
142 json.dump(metadata, f, indent=4)
145 def _WprFileNameToPath(self, wpr_file):
146 return os.path.abspath(os.path.join(self._base_dir, wpr_file))
148 def _NextWprFileName(self):
149 """Creates a new file name for a wpr archive file."""
150 # The names are of the format "some_thing_number.wpr". Read the numbers.
153 for wpr_file in self._wpr_file_to_page_names:
154 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
156 raise Exception('Illegal wpr file name ' + wpr_file)
157 highest_number = max(int(match.groupdict()['NUMBER']), highest_number)
158 if base and match.groupdict()['BASE'] != base:
159 raise Exception('Illegal wpr file name ' + wpr_file +
160 ', doesn\'t begin with ' + base)
161 base = match.groupdict()['BASE']
163 # If we're creating a completely new info file, use the base name of the
165 base = os.path.splitext(os.path.basename(self._file_path))[0]
166 new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
167 return new_filename, self._WprFileNameToPath(new_filename)
169 def _SetWprFileForPage(self, page_name, wpr_file):
170 """For modifying the metadata when we're going to record a new archive."""
171 old_wpr_file = self._page_name_to_wpr_file.get(page_name, None)
173 self._wpr_file_to_page_names[old_wpr_file].remove(page_name)
174 self._page_name_to_wpr_file[page_name] = wpr_file
175 if wpr_file not in self._wpr_file_to_page_names:
176 self._wpr_file_to_page_names[wpr_file] = []
177 self._wpr_file_to_page_names[wpr_file].append(page_name)