2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Wrapper script to help run clang tools across Chromium code.
9 If you want to run the tool across all Chromium code:
10 run_tool.py <tool> <path/to/compiledb>
12 If you only want to run the tool across just chrome/browser and content/browser:
13 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
15 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
16 information, which documents the entire automated refactoring flow in Chromium.
19 The clang tool implementation doesn't take advantage of multiple cores, and if
20 it fails mysteriously in the middle, all the generated replacements will be
23 Unfortunately, if the work is simply sharded across multiple cores by running
24 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
25 the same time. To work around that, clang tools that are run using this tool
26 should output edits to stdout in the following format:
29 r:<file path>:<offset>:<length>:<replacement text>
30 r:<file path>:<offset>:<length>:<replacement text>
34 Any generated edits are applied once the clang tool has finished running
35 across Chromium, regardless of whether some instances failed or not.
40 import multiprocessing
47 Edit = collections.namedtuple(
48 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
51 def _GetFilesFromGit(paths = None):
52 """Gets the list of files in the git repository.
55 paths: Prefix filter for the returned paths. May contain multiple entries.
57 args = ['git', 'ls-files']
60 command = subprocess.Popen(args, stdout=subprocess.PIPE)
61 output, _ = command.communicate()
62 return output.splitlines()
65 def _ExtractEditsFromStdout(build_directory, stdout):
66 """Extracts generated list of edits from the tool's stdout.
68 The expected format is documented at the top of this file.
71 build_directory: Directory that contains the compile database. Used to
72 normalize the filenames.
73 stdout: The stdout from running the clang tool.
76 A dictionary mapping filenames to the associated edits.
78 lines = stdout.splitlines()
79 start_index = lines.index('==== BEGIN EDITS ====')
80 end_index = lines.index('==== END EDITS ====')
81 edits = collections.defaultdict(list)
82 for line in lines[start_index + 1:end_index]:
84 edit_type, path, offset, length, replacement = line.split(':', 4)
85 replacement = replacement.replace("\0", "\n");
86 # Normalize the file path emitted by the clang tool to be relative to the
87 # current working directory.
88 path = os.path.relpath(os.path.join(build_directory, path))
89 edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
91 print 'Unable to parse edit: %s' % line
95 def _ExecuteTool(toolname, build_directory, filename):
98 This is defined outside the class so it can be pickled for the multiprocessing
102 toolname: Path to the tool to execute.
103 build_directory: Directory that contains the compile database.
104 filename: The file to run the tool over.
107 A dictionary that must contain the key "status" and a boolean value
110 If status is True, then the generated edits are stored with the key "edits"
113 Otherwise, the filename and the output from stderr are associated with the
114 keys "filename" and "stderr" respectively.
116 command = subprocess.Popen((toolname, '-p', build_directory, filename),
117 stdout=subprocess.PIPE,
118 stderr=subprocess.PIPE)
119 stdout, stderr = command.communicate()
120 if command.returncode != 0:
121 return {'status': False, 'filename': filename, 'stderr': stderr}
123 return {'status': True,
124 'edits': _ExtractEditsFromStdout(build_directory, stdout)}
127 class _CompilerDispatcher(object):
128 """Multiprocessing controller for running clang tools in parallel."""
130 def __init__(self, toolname, build_directory, filenames):
131 """Initializer method.
134 toolname: Path to the tool to execute.
135 build_directory: Directory that contains the compile database.
136 filenames: The files to run the tool over.
138 self.__toolname = toolname
139 self.__build_directory = build_directory
140 self.__filenames = filenames
141 self.__success_count = 0
142 self.__failed_count = 0
143 self.__edits = collections.defaultdict(list)
150 def failed_count(self):
151 return self.__failed_count
154 """Does the grunt work."""
155 pool = multiprocessing.Pool()
156 result_iterator = pool.imap_unordered(
157 functools.partial(_ExecuteTool, self.__toolname,
158 self.__build_directory),
160 for result in result_iterator:
161 self.__ProcessResult(result)
162 sys.stdout.write('\n')
165 def __ProcessResult(self, result):
166 """Handles result processing.
169 result: The result dictionary returned by _ExecuteTool.
172 self.__success_count += 1
173 for k, v in result['edits'].iteritems():
174 self.__edits[k].extend(v)
176 self.__failed_count += 1
177 sys.stdout.write('\nFailed to process %s\n' % result['filename'])
178 sys.stdout.write(result['stderr'])
179 sys.stdout.write('\n')
181 float(self.__success_count + self.__failed_count) /
182 len(self.__filenames)) * 100
183 sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
184 self.__success_count, self.__failed_count, percentage))
188 def _ApplyEdits(edits, clang_format_diff_path):
189 """Apply the generated edits.
192 edits: A dict mapping filenames to Edit instances that apply to that file.
193 clang_format_diff_path: Path to the clang-format-diff.py helper to help
194 automatically reformat diffs to avoid style violations. Pass None if the
195 clang-format step should be skipped.
198 for k, v in edits.iteritems():
199 # Sort the edits and iterate through them in reverse order. Sorting allows
200 # duplicate edits to be quickly skipped, while reversing means that
201 # subsequent edits don't need to have their offsets updated with each edit
205 with open(k, 'rb+') as f:
206 contents = bytearray(f.read())
207 for edit in reversed(v):
208 if edit == last_edit:
211 contents[edit.offset:edit.offset + edit.length] = edit.replacement
212 if not edit.replacement:
213 _ExtendDeletionIfElementIsInList(contents, edit.offset)
218 if clang_format_diff_path:
219 # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
220 # uses python2.7. Use the deprecated interface until Chrome uses a newer
222 if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
223 pipes.quote(k), clang_format_diff_path), shell=True) != 0:
224 print 'clang-format failed for %s' % k
225 print 'Applied %d edits to %d files' % (edit_count, len(edits))
228 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
231 def _ExtendDeletionIfElementIsInList(contents, offset):
232 """Extends the range of a deletion if the deleted element was part of a list.
234 This rewriter helper makes it easy for refactoring tools to remove elements
235 from a list. Even if a matcher callback knows that it is removing an element
236 from a list, it may not have enough information to accurately remove the list
237 element; for example, another matcher callback may end up removing an adjacent
238 list element, or all the list elements may end up being removed.
240 With this helper, refactoring tools can simply remove the list element and not
241 worry about having to include the comma in the replacement.
244 contents: A bytearray with the deletion already applied.
245 offset: The offset in the bytearray where the deleted range used to be.
247 char_before = char_after = None
249 for byte in reversed(contents[:offset]):
251 if byte in _WHITESPACE_BYTES:
253 if byte in (ord(','), ord(':'), ord('('), ord('{')):
254 char_before = chr(byte)
258 for byte in contents[offset:]:
259 right_trim_count += 1
260 if byte in _WHITESPACE_BYTES:
263 char_after = chr(byte)
268 del contents[offset:offset + right_trim_count]
269 elif char_before in (',', ':'):
270 del contents[offset - left_trim_count:offset]
275 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
276 print ' <clang tool> is the clang tool that should be run.'
277 print ' <compile db> is the directory that contains the compile database'
278 print ' <path 1> <path2> ... can be used to filter what files are edited'
281 clang_format_diff_path = os.path.join(
282 os.path.dirname(os.path.realpath(__file__)),
283 '../../../third_party/llvm/tools/clang/tools/clang-format',
284 'clang-format-diff.py')
285 # TODO(dcheng): Allow this to be controlled with a flag as well.
286 if not os.path.isfile(clang_format_diff_path):
287 clang_format_diff_path = None
289 filenames = frozenset(_GetFilesFromGit(argv[2:]))
290 # Filter out files that aren't C/C++/Obj-C/Obj-C++.
291 extensions = frozenset(('.c', '.cc', '.m', '.mm'))
292 dispatcher = _CompilerDispatcher(argv[0], argv[1],
293 [f for f in filenames
294 if os.path.splitext(f)[1] in extensions])
296 # Filter out edits to files that aren't in the git repository, since it's not
297 # useful to modify files that aren't under source control--typically, these
298 # are generated files or files in a git submodule that's not part of Chromium.
299 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
301 clang_format_diff_path)
302 if dispatcher.failed_count != 0:
307 if __name__ == '__main__':
308 sys.exit(main(sys.argv[1:]))