1 # Copyright (C) 2011, Google Inc. All rights reserved.
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
7 # * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 # * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
13 # * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 from webkitpy.common.memoized import memoized
34 _log = logging.getLogger(__name__)
37 # FIXME: Should this function be somewhere more general?
38 def _invert_dictionary(dictionary):
39 inverted_dictionary = {}
40 for key, value in dictionary.items():
41 if inverted_dictionary.get(value):
42 inverted_dictionary[value].append(key)
44 inverted_dictionary[value] = [key]
45 return inverted_dictionary
48 class BaselineOptimizer(object):
49 ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
51 def __init__(self, host, port, port_names, skip_scm_commands):
52 self._filesystem = host.filesystem
53 self._skip_scm_commands = skip_scm_commands
54 self._files_to_delete = []
55 self._files_to_add = []
56 self._scm = host.scm()
57 self._default_port = port
59 for port_name in port_names:
60 self._ports[port_name] = host.port_factory.get(port_name)
62 self._webkit_base = port.webkit_base()
63 self._layout_tests_dir = port.layout_tests_dir()
65 # Only used by unittests.
66 self.new_results_by_directory = []
68 def _baseline_root(self, baseline_name):
69 virtual_suite = self._virtual_suite(baseline_name)
71 return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name)
72 return self.ROOT_LAYOUT_TESTS_DIRECTORY
74 def _baseline_search_path(self, port, baseline_name):
75 virtual_suite = self._virtual_suite(baseline_name)
77 return port.virtual_baseline_search_path(baseline_name)
78 return port.baseline_search_path()
80 def _virtual_suite(self, baseline_name):
81 return self._default_port.lookup_virtual_suite(baseline_name)
83 def _virtual_base(self, baseline_name):
84 return self._default_port.lookup_virtual_test_base(baseline_name)
86 def _relative_baseline_search_paths(self, port, baseline_name):
87 baseline_search_path = self._baseline_search_path(port, baseline_name)
88 baseline_root = self._baseline_root(baseline_name)
89 relative_paths = [self._filesystem.relpath(path, self._webkit_base) for path in baseline_search_path]
90 return relative_paths + [baseline_root]
92 def _join_directory(self, directory, baseline_name):
93 # This code is complicated because both the directory name and the baseline_name have the virtual
94 # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name.
95 # For example, virtual/gpu/fast/canvas/foo-expected.png corresponds to fast/canvas/foo-expected.png and
96 # the baseline directories are like platform/mac/virtual/gpu/fast/canvas. So, to get the path
97 # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory.
98 virtual_suite = self._virtual_suite(baseline_name)
100 baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:]
102 baseline_name_without_virtual = baseline_name
103 return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual)
105 def read_results_by_directory(self, baseline_name):
106 results_by_directory = {}
107 directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port, baseline_name) for port in self._ports.values()]))
109 for directory in directories:
110 path = self._join_directory(directory, baseline_name)
111 if self._filesystem.exists(path):
112 results_by_directory[directory] = self._filesystem.sha1(path)
113 return results_by_directory
115 def _results_by_port_name(self, results_by_directory, baseline_name):
116 results_by_port_name = {}
117 for port_name, port in self._ports.items():
118 for directory in self._relative_baseline_search_paths(port, baseline_name):
119 if directory in results_by_directory:
120 results_by_port_name[port_name] = results_by_directory[directory]
122 return results_by_port_name
125 def _directories_immediately_preceding_root(self, baseline_name):
127 for port in self._ports.values():
128 directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], self._webkit_base)
129 directories.add(directory)
132 def _optimize_result_for_root(self, new_results_by_directory, baseline_name):
133 # The root directory (i.e. LayoutTests) is the only one that doesn't correspond
134 # to a specific platform. As such, it's the only one where the baseline in fallback directories
135 # immediately before it can be promoted up, i.e. if win and mac
136 # have the same baseline, then it can be promoted up to be the LayoutTests baseline.
137 # All other baselines can only be removed if they're redundant with a baseline earlier
138 # in the fallback order. They can never promoted up.
139 directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name)
142 root_baseline_unused = False
143 for directory in directories_immediately_preceding_root:
144 this_result = new_results_by_directory.get(directory)
146 # If any of these directories don't have a baseline, there's no optimization we can do.
150 if not shared_result:
151 shared_result = this_result
152 elif shared_result != this_result:
153 root_baseline_unused = True
155 baseline_root = self._baseline_root(baseline_name)
157 # The root baseline is unused if all the directories immediately preceding the root
158 # have a baseline, but have different baselines, so the baselines can't be promoted up.
159 if root_baseline_unused:
160 if baseline_root in new_results_by_directory:
161 del new_results_by_directory[baseline_root]
164 new_results_by_directory[baseline_root] = shared_result
165 for directory in directories_immediately_preceding_root:
166 del new_results_by_directory[directory]
168 def _find_optimal_result_placement(self, baseline_name):
169 results_by_directory = self.read_results_by_directory(baseline_name)
170 results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name)
171 port_names_by_result = _invert_dictionary(results_by_port_name)
173 new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name)
174 self._optimize_result_for_root(new_results_by_directory, baseline_name)
176 return results_by_directory, new_results_by_directory
178 def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name):
179 new_results_by_directory = copy.copy(results_by_directory)
180 for port_name, port in self._ports.items():
181 current_result = results_by_port_name.get(port_name)
183 # This happens if we're missing baselines for a port.
184 if not current_result:
187 fallback_path = self._relative_baseline_search_paths(port, baseline_name)
188 current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory)
189 for index in range(current_index + 1, len(fallback_path)):
190 new_directory = fallback_path[index]
191 if not new_directory in new_results_by_directory:
192 # No result for this baseline in this directory.
194 elif new_results_by_directory[new_directory] == current_result:
195 # Result for new_directory are redundant with the result earlier in the fallback order.
196 if current_directory in new_results_by_directory:
197 del new_results_by_directory[current_directory]
199 # The new_directory contains a different result, so stop trying to push results up.
202 return new_results_by_directory
204 def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory):
205 for index, directory in enumerate(fallback_path):
206 if directory in results_by_directory and (results_by_directory[directory] == current_result):
207 return index, directory
208 assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory)
210 def _platform(self, filename):
211 platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep
212 if filename.startswith(platform_dir):
213 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
214 platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir)
215 if filename.startswith(platform_dir):
216 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
219 def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
221 for directory, result in results_by_directory.items():
222 if not result in data_for_result:
223 source = self._join_directory(directory, baseline_name)
224 data_for_result[result] = self._filesystem.read_binary_file(source)
228 for directory, result in results_by_directory.items():
229 if new_results_by_directory.get(directory) != result:
230 file_name = self._join_directory(directory, baseline_name)
231 if self._scm.exists(file_name):
232 scm_files.append(file_name)
234 fs_files.append(file_name)
236 if scm_files or fs_files:
238 _log.debug(" Deleting (SCM):")
239 for platform_dir in sorted(self._platform(filename) for filename in scm_files):
240 _log.debug(" " + platform_dir)
241 if self._skip_scm_commands:
242 self._files_to_delete.extend(scm_files)
244 self._scm.delete_list(scm_files)
246 _log.debug(" Deleting (file system):")
247 for platform_dir in sorted(self._platform(filename) for filename in fs_files):
248 _log.debug(" " + platform_dir)
249 for filename in fs_files:
250 self._filesystem.remove(filename)
252 _log.debug(" (Nothing to delete)")
255 for directory, result in new_results_by_directory.items():
256 if results_by_directory.get(directory) != result:
257 destination = self._join_directory(directory, baseline_name)
258 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
259 self._filesystem.write_binary_file(destination, data_for_result[result])
260 file_names.append(destination)
263 _log.debug(" Adding:")
264 for platform_dir in sorted(self._platform(filename) for filename in file_names):
265 _log.debug(" " + platform_dir)
266 if self._skip_scm_commands:
267 # Have adds win over deletes.
268 self._files_to_delete = list(set(self._files_to_delete) - set(file_names))
269 self._files_to_add.extend(file_names)
271 self._scm.add_list(file_names)
273 _log.debug(" (Nothing to add)")
275 def write_by_directory(self, results_by_directory, writer, indent):
276 for path in sorted(results_by_directory):
277 writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6]))
279 def _optimize_subtree(self, baseline_name):
280 basename = self._filesystem.basename(baseline_name)
281 results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
283 if new_results_by_directory == results_by_directory:
284 if new_results_by_directory:
285 _log.debug(" %s: (already optimal)" % basename)
286 self.write_by_directory(results_by_directory, _log.debug, " ")
288 _log.debug(" %s: (no baselines found)" % basename)
289 # This is just used for unittests. Intentionally set it to the old data if we don't modify anything.
290 self.new_results_by_directory.append(results_by_directory)
293 if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name):
294 # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs
295 # instead of committing incorrect baselines.
296 _log.error(" %s: optimization failed" % basename)
297 self.write_by_directory(results_by_directory, _log.warning, " ")
300 _log.debug(" %s:" % basename)
301 _log.debug(" Before: ")
302 self.write_by_directory(results_by_directory, _log.debug, " ")
303 _log.debug(" After: ")
304 self.write_by_directory(new_results_by_directory, _log.debug, " ")
306 self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
309 def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name):
310 virtual_root_expected_baseline_path = self._filesystem.join(self._layout_tests_dir, baseline_name)
311 if not self._filesystem.exists(virtual_root_expected_baseline_path):
313 root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path)
315 results_by_directory = self.read_results_by_directory(non_virtual_baseline_name)
316 # See if all the immediate predecessors of the virtual root have the same expected result.
317 for port in self._ports.values():
318 directories = self._relative_baseline_search_paths(port, non_virtual_baseline_name)
319 for directory in directories:
320 if directory not in results_by_directory:
322 if results_by_directory[directory] != root_sha1:
326 _log.debug("Deleting redundant virtual root expected result.")
327 if self._skip_scm_commands and virtual_root_expected_baseline_path in self._files_to_add:
328 self._files_to_add.remove(virtual_root_expected_baseline_path)
329 if self._scm.exists(virtual_root_expected_baseline_path):
330 _log.debug(" Deleting (SCM): " + virtual_root_expected_baseline_path)
331 if self._skip_scm_commands:
332 self._files_to_delete.append(virtual_root_expected_baseline_path)
334 self._scm.delete(virtual_root_expected_baseline_path)
336 _log.debug(" Deleting (file system): " + virtual_root_expected_baseline_path)
337 self._filesystem.remove(virtual_root_expected_baseline_path)
339 def optimize(self, baseline_name):
340 # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path.
341 # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for
342 # a visual representation of this.
344 # So, we can optimize the virtual path, then the virtual root and then the regular path.
346 self._files_to_delete = []
347 self._files_to_add = []
348 _log.debug("Optimizing regular fallback path.")
349 result = self._optimize_subtree(baseline_name)
350 non_virtual_baseline_name = self._virtual_base(baseline_name)
351 if not non_virtual_baseline_name:
352 return result, self._files_to_delete, self._files_to_add
354 self._optimize_virtual_root(baseline_name, non_virtual_baseline_name)
356 _log.debug("Optimizing non-virtual fallback path.")
357 result |= self._optimize_subtree(non_virtual_baseline_name)
358 return result, self._files_to_delete, self._files_to_add