Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Tools / Scripts / webkitpy / common / checkout / baselineoptimizer.py
1 # Copyright (C) 2011, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 import copy
30 import logging
31
32 from webkitpy.common.memoized import memoized
33
34 _log = logging.getLogger(__name__)
35
36
37 # FIXME: Should this function be somewhere more general?
38 def _invert_dictionary(dictionary):
39     inverted_dictionary = {}
40     for key, value in dictionary.items():
41         if inverted_dictionary.get(value):
42             inverted_dictionary[value].append(key)
43         else:
44             inverted_dictionary[value] = [key]
45     return inverted_dictionary
46
47
48 class BaselineOptimizer(object):
49     ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
50
51     def __init__(self, host, port, port_names, skip_scm_commands):
52         self._filesystem = host.filesystem
53         self._skip_scm_commands = skip_scm_commands
54         self._files_to_delete = []
55         self._files_to_add = []
56         self._scm = host.scm()
57         self._default_port = port
58         self._ports = {}
59         for port_name in port_names:
60             self._ports[port_name] = host.port_factory.get(port_name)
61
62         self._webkit_base = port.webkit_base()
63         self._layout_tests_dir = port.layout_tests_dir()
64
65         # Only used by unittests.
66         self.new_results_by_directory = []
67
68     def _baseline_root(self, baseline_name):
69         virtual_suite = self._virtual_suite(baseline_name)
70         if virtual_suite:
71             return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name)
72         return self.ROOT_LAYOUT_TESTS_DIRECTORY
73
74     def _baseline_search_path(self, port, baseline_name):
75         virtual_suite = self._virtual_suite(baseline_name)
76         if virtual_suite:
77             return port.virtual_baseline_search_path(baseline_name)
78         return port.baseline_search_path()
79
80     def _virtual_suite(self, baseline_name):
81         return self._default_port.lookup_virtual_suite(baseline_name)
82
83     def _virtual_base(self, baseline_name):
84         return self._default_port.lookup_virtual_test_base(baseline_name)
85
86     def _relative_baseline_search_paths(self, port, baseline_name):
87         baseline_search_path = self._baseline_search_path(port, baseline_name)
88         baseline_root = self._baseline_root(baseline_name)
89         relative_paths = [self._filesystem.relpath(path, self._webkit_base) for path in baseline_search_path]
90         return relative_paths + [baseline_root]
91
92     def _join_directory(self, directory, baseline_name):
93         # This code is complicated because both the directory name and the baseline_name have the virtual
94         # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name.
95         # For example, virtual/gpu/fast/canvas/foo-expected.png corresponds to fast/canvas/foo-expected.png and
96         # the baseline directories are like platform/mac/virtual/gpu/fast/canvas. So, to get the path
97         # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory.
98         virtual_suite = self._virtual_suite(baseline_name)
99         if virtual_suite:
100             baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:]
101         else:
102             baseline_name_without_virtual = baseline_name
103         return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual)
104
105     def read_results_by_directory(self, baseline_name):
106         results_by_directory = {}
107         directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port, baseline_name) for port in self._ports.values()]))
108
109         for directory in directories:
110             path = self._join_directory(directory, baseline_name)
111             if self._filesystem.exists(path):
112                 results_by_directory[directory] = self._filesystem.sha1(path)
113         return results_by_directory
114
115     def _results_by_port_name(self, results_by_directory, baseline_name):
116         results_by_port_name = {}
117         for port_name, port in self._ports.items():
118             for directory in self._relative_baseline_search_paths(port, baseline_name):
119                 if directory in results_by_directory:
120                     results_by_port_name[port_name] = results_by_directory[directory]
121                     break
122         return results_by_port_name
123
124     @memoized
125     def _directories_immediately_preceding_root(self, baseline_name):
126         directories = set()
127         for port in self._ports.values():
128             directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], self._webkit_base)
129             directories.add(directory)
130         return directories
131
132     def _optimize_result_for_root(self, new_results_by_directory, baseline_name):
133         # The root directory (i.e. LayoutTests) is the only one that doesn't correspond
134         # to a specific platform. As such, it's the only one where the baseline in fallback directories
135         # immediately before it can be promoted up, i.e. if win and mac
136         # have the same baseline, then it can be promoted up to be the LayoutTests baseline.
137         # All other baselines can only be removed if they're redundant with a baseline earlier
138         # in the fallback order. They can never promoted up.
139         directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name)
140
141         shared_result = None
142         root_baseline_unused = False
143         for directory in directories_immediately_preceding_root:
144             this_result = new_results_by_directory.get(directory)
145
146             # If any of these directories don't have a baseline, there's no optimization we can do.
147             if not this_result:
148                 return
149
150             if not shared_result:
151                 shared_result = this_result
152             elif shared_result != this_result:
153                 root_baseline_unused = True
154
155         baseline_root = self._baseline_root(baseline_name)
156
157         # The root baseline is unused if all the directories immediately preceding the root
158         # have a baseline, but have different baselines, so the baselines can't be promoted up.
159         if root_baseline_unused:
160             if baseline_root in new_results_by_directory:
161                 del new_results_by_directory[baseline_root]
162             return
163
164         new_results_by_directory[baseline_root] = shared_result
165         for directory in directories_immediately_preceding_root:
166             del new_results_by_directory[directory]
167
168     def _find_optimal_result_placement(self, baseline_name):
169         results_by_directory = self.read_results_by_directory(baseline_name)
170         results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name)
171         port_names_by_result = _invert_dictionary(results_by_port_name)
172
173         new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name)
174         self._optimize_result_for_root(new_results_by_directory, baseline_name)
175
176         return results_by_directory, new_results_by_directory
177
178     def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name):
179         new_results_by_directory = copy.copy(results_by_directory)
180         for port_name, port in self._ports.items():
181             current_result = results_by_port_name.get(port_name)
182
183             # This happens if we're missing baselines for a port.
184             if not current_result:
185                 continue;
186
187             fallback_path = self._relative_baseline_search_paths(port, baseline_name)
188             current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory)
189             for index in range(current_index + 1, len(fallback_path)):
190                 new_directory = fallback_path[index]
191                 if not new_directory in new_results_by_directory:
192                     # No result for this baseline in this directory.
193                     continue
194                 elif new_results_by_directory[new_directory] == current_result:
195                     # Result for new_directory are redundant with the result earlier in the fallback order.
196                     if current_directory in new_results_by_directory:
197                         del new_results_by_directory[current_directory]
198                 else:
199                     # The new_directory contains a different result, so stop trying to push results up.
200                     break
201
202         return new_results_by_directory
203
204     def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory):
205         for index, directory in enumerate(fallback_path):
206             if directory in results_by_directory and (results_by_directory[directory] == current_result):
207                 return index, directory
208         assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory)
209
210     def _platform(self, filename):
211         platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep
212         if filename.startswith(platform_dir):
213             return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
214         platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir)
215         if filename.startswith(platform_dir):
216             return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
217         return '(generic)'
218
219     def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
220         data_for_result = {}
221         for directory, result in results_by_directory.items():
222             if not result in data_for_result:
223                 source = self._join_directory(directory, baseline_name)
224                 data_for_result[result] = self._filesystem.read_binary_file(source)
225
226         scm_files = []
227         fs_files = []
228         for directory, result in results_by_directory.items():
229             if new_results_by_directory.get(directory) != result:
230                 file_name = self._join_directory(directory, baseline_name)
231                 if self._scm.exists(file_name):
232                     scm_files.append(file_name)
233                 else:
234                     fs_files.append(file_name)
235
236         if scm_files or fs_files:
237             if scm_files:
238                 _log.debug("    Deleting (SCM):")
239                 for platform_dir in sorted(self._platform(filename) for filename in scm_files):
240                     _log.debug("      " + platform_dir)
241                 if self._skip_scm_commands:
242                     self._files_to_delete.extend(scm_files)
243                 else:
244                     self._scm.delete_list(scm_files)
245             if fs_files:
246                 _log.debug("    Deleting (file system):")
247                 for platform_dir in sorted(self._platform(filename) for filename in fs_files):
248                     _log.debug("      " + platform_dir)
249                 for filename in fs_files:
250                     self._filesystem.remove(filename)
251         else:
252             _log.debug("    (Nothing to delete)")
253
254         file_names = []
255         for directory, result in new_results_by_directory.items():
256             if results_by_directory.get(directory) != result:
257                 destination = self._join_directory(directory, baseline_name)
258                 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
259                 self._filesystem.write_binary_file(destination, data_for_result[result])
260                 file_names.append(destination)
261
262         if file_names:
263             _log.debug("    Adding:")
264             for platform_dir in sorted(self._platform(filename) for filename in file_names):
265                 _log.debug("      " + platform_dir)
266             if self._skip_scm_commands:
267                 # Have adds win over deletes.
268                 self._files_to_delete = list(set(self._files_to_delete) - set(file_names))
269                 self._files_to_add.extend(file_names)
270             else:
271                 self._scm.add_list(file_names)
272         else:
273             _log.debug("    (Nothing to add)")
274
275     def write_by_directory(self, results_by_directory, writer, indent):
276         for path in sorted(results_by_directory):
277             writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6]))
278
279     def _optimize_subtree(self, baseline_name):
280         basename = self._filesystem.basename(baseline_name)
281         results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
282
283         if new_results_by_directory == results_by_directory:
284             if new_results_by_directory:
285                 _log.debug("  %s: (already optimal)" % basename)
286                 self.write_by_directory(results_by_directory, _log.debug, "    ")
287             else:
288                 _log.debug("  %s: (no baselines found)" % basename)
289             # This is just used for unittests. Intentionally set it to the old data if we don't modify anything.
290             self.new_results_by_directory.append(results_by_directory)
291             return True
292
293         if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name):
294             # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs
295             # instead of committing incorrect baselines.
296             _log.error("  %s: optimization failed" % basename)
297             self.write_by_directory(results_by_directory, _log.warning, "      ")
298             return False
299
300         _log.debug("  %s:" % basename)
301         _log.debug("    Before: ")
302         self.write_by_directory(results_by_directory, _log.debug, "      ")
303         _log.debug("    After: ")
304         self.write_by_directory(new_results_by_directory, _log.debug, "      ")
305
306         self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
307         return True
308
309     def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name):
310         virtual_root_expected_baseline_path = self._filesystem.join(self._layout_tests_dir, baseline_name)
311         if not self._filesystem.exists(virtual_root_expected_baseline_path):
312             return
313         root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path)
314
315         results_by_directory = self.read_results_by_directory(non_virtual_baseline_name)
316         # See if all the immediate predecessors of the virtual root have the same expected result.
317         for port in self._ports.values():
318             directories = self._relative_baseline_search_paths(port, non_virtual_baseline_name)
319             for directory in directories:
320                 if directory not in results_by_directory:
321                     continue
322                 if results_by_directory[directory] != root_sha1:
323                     return
324                 break
325
326         _log.debug("Deleting redundant virtual root expected result.")
327         if self._skip_scm_commands and virtual_root_expected_baseline_path in self._files_to_add:
328             self._files_to_add.remove(virtual_root_expected_baseline_path)
329         if self._scm.exists(virtual_root_expected_baseline_path):
330             _log.debug("    Deleting (SCM): " + virtual_root_expected_baseline_path)
331             if self._skip_scm_commands:
332                 self._files_to_delete.append(virtual_root_expected_baseline_path)
333             else:
334                 self._scm.delete(virtual_root_expected_baseline_path)
335         else:
336             _log.debug("    Deleting (file system): " + virtual_root_expected_baseline_path)
337             self._filesystem.remove(virtual_root_expected_baseline_path)
338
339     def optimize(self, baseline_name):
340         # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path.
341         # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for
342         # a visual representation of this.
343         #
344         # So, we can optimize the virtual path, then the virtual root and then the regular path.
345
346         self._files_to_delete = []
347         self._files_to_add = []
348         _log.debug("Optimizing regular fallback path.")
349         result = self._optimize_subtree(baseline_name)
350         non_virtual_baseline_name = self._virtual_base(baseline_name)
351         if not non_virtual_baseline_name:
352             return result, self._files_to_delete, self._files_to_add
353
354         self._optimize_virtual_root(baseline_name, non_virtual_baseline_name)
355
356         _log.debug("Optimizing non-virtual fallback path.")
357         result |= self._optimize_subtree(non_virtual_baseline_name)
358         return result, self._files_to_delete, self._files_to_add