1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
10 from telemetry.page import cloud_storage
11 from telemetry.page import page as page_module
12 from telemetry.page import page_set_archive_info
15 class PageSet(object):
16 def __init__(self, file_path='', attributes=None):
17 self.file_path = file_path
19 # These attributes can be set dynamically by the page set.
21 self.archive_data_file = ''
22 self.credentials_path = None
23 self.user_agent_type = None
24 self.make_javascript_deterministic = True
25 self.navigate_steps = {'action': 'navigate'}
28 for k, v in attributes.iteritems():
31 # Create a PageSetArchiveInfo object.
32 if self.archive_data_file:
33 self.wpr_archive_info = page_set_archive_info.PageSetArchiveInfo.FromFile(
34 os.path.join(self._base_dir, self.archive_data_file))
36 self.wpr_archive_info = None
38 # Create a Page object for every page.
40 if attributes and 'pages' in attributes:
41 for page_attributes in attributes['pages']:
42 url = page_attributes.pop('url')
44 page = page_module.Page(
45 url, self, attributes=page_attributes, base_dir=self._base_dir)
46 self.pages.append(page)
48 # Prepend _base_dir to our serving dirs.
49 # Always use realpath to ensure no duplicates in set.
50 self.serving_dirs = set()
51 if attributes and 'serving_dirs' in attributes:
52 if not isinstance(attributes['serving_dirs'], list):
53 raise ValueError('serving_dirs must be a list.')
54 for serving_dir in attributes['serving_dirs']:
55 self.serving_dirs.add(
56 os.path.realpath(os.path.join(self._base_dir, serving_dir)))
58 # Attempt to download the credentials file.
59 if self.credentials_path:
61 cloud_storage.GetIfChanged(
62 os.path.join(self._base_dir, self.credentials_path))
63 except (cloud_storage.CredentialsError,
64 cloud_storage.PermissionError):
65 logging.warning('Cannot retrieve credential file: %s',
66 self.credentials_path)
68 # Scan every serving directory for .sha1 files
69 # and download them from Cloud Storage. Assume all data is public.
70 all_serving_dirs = self.serving_dirs.copy()
71 # Add individual page dirs to all serving dirs.
74 all_serving_dirs.add(page.serving_dir)
75 # Scan all serving dirs.
76 for serving_dir in all_serving_dirs:
77 if os.path.splitdrive(serving_dir)[1] == '/':
78 raise ValueError('Trying to serve root directory from HTTP server.')
79 for dirpath, _, filenames in os.walk(serving_dir):
80 for filename in filenames:
81 path, extension = os.path.splitext(
82 os.path.join(dirpath, filename))
83 if extension != '.sha1':
85 cloud_storage.GetIfChanged(path)
88 def FromFile(cls, file_path):
89 with open(file_path, 'r') as f:
91 data = json.loads(contents)
92 return cls.FromDict(data, file_path)
95 def FromDict(cls, data, file_path):
96 return cls(file_path, data)
100 if os.path.isfile(self.file_path):
101 return os.path.dirname(self.file_path)
103 return self.file_path
105 def ContainsOnlyFileURLs(self):
106 for page in self.pages:
111 def ReorderPageSet(self, results_file):
112 """Reorders this page set based on the results of a past run."""
114 for page in self.pages:
115 page_set_dict[page.url] = page
118 with open(results_file, 'rb') as csv_file:
119 csv_reader = csv.reader(csv_file)
120 csv_header = csv_reader.next()
122 if 'url' not in csv_header:
123 raise Exception('Unusable results_file.')
125 url_index = csv_header.index('url')
127 for csv_row in csv_reader:
128 if csv_row[url_index] in page_set_dict:
129 pages.append(page_set_dict[csv_row[url_index]])
131 raise Exception('Unusable results_file.')
135 def WprFilePathForPage(self, page):
136 if not self.wpr_archive_info:
138 return self.wpr_archive_info.WprFilePathForPage(page)
141 return self.pages.__iter__()
144 return len(self.pages)
146 def __getitem__(self, key):
147 return self.pages[key]
149 def __setitem__(self, key, value):
150 self.pages[key] = value