src/tools/clang/scripts/run_tool.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Wrapper script to help run clang tools across Chromium code.
   7
   8 How to use this tool:
   9 If you want to run the tool across all Chromium code:
  10 run_tool.py <tool> <path/to/compiledb>
  11
  12 If you only want to run the tool across just chrome/browser and content/browser:
  13 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
  14
  15 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
  16 information, which documents the entire automated refactoring flow in Chromium.
  17
  18 Why use this tool:
  19 The clang tool implementation doesn't take advantage of multiple cores, and if
  20 it fails mysteriously in the middle, all the generated replacements will be
  21 lost.
  22
  23 Unfortunately, if the work is simply sharded across multiple cores by running
  24 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
  25 the same time. To work around that, clang tools that are run using this tool
  26 should output edits to stdout in the following format:
  27
  28 ==== BEGIN EDITS ====
  29 r:<file path>:<offset>:<length>:<replacement text>
  30 r:<file path>:<offset>:<length>:<replacement text>
  31 ...etc...
  32 ==== END EDITS ====
  33
  34 Any generated edits are applied once the clang tool has finished running
  35 across Chromium, regardless of whether some instances failed or not.
  36 """
  37
  38 import collections
  39 import functools
  40 import multiprocessing
  41 import os.path
  42 import pipes
  43 import subprocess
  44 import sys
  45
  46
  47 Edit = collections.namedtuple(
  48     'Edit', ('edit_type', 'offset', 'length', 'replacement'))
  49
  50
  51 def _GetFilesFromGit(paths = None):
  52   """Gets the list of files in the git repository.
  53
  54   Args:
  55     paths: Prefix filter for the returned paths. May contain multiple entries.
  56   """
  57   args = ['git', 'ls-files']
  58   if paths:
  59     args.extend(paths)
  60   command = subprocess.Popen(args, stdout=subprocess.PIPE)
  61   output, _ = command.communicate()
  62   return output.splitlines()
  63
  64
  65 def _ExtractEditsFromStdout(build_directory, stdout):
  66   """Extracts generated list of edits from the tool's stdout.
  67
  68   The expected format is documented at the top of this file.
  69
  70   Args:
  71     build_directory: Directory that contains the compile database. Used to
  72       normalize the filenames.
  73     stdout: The stdout from running the clang tool.
  74
  75   Returns:
  76     A dictionary mapping filenames to the associated edits.
  77   """
  78   lines = stdout.splitlines()
  79   start_index = lines.index('==== BEGIN EDITS ====')
  80   end_index = lines.index('==== END EDITS ====')
  81   edits = collections.defaultdict(list)
  82   for line in lines[start_index + 1:end_index]:
  83     try:
  84       edit_type, path, offset, length, replacement = line.split(':', 4)
  85       replacement = replacement.replace("\0", "\n");
  86       # Normalize the file path emitted by the clang tool to be relative to the
  87       # current working directory.
  88       path = os.path.relpath(os.path.join(build_directory, path))
  89       edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
  90     except ValueError:
  91       print 'Unable to parse edit: %s' % line
  92   return edits
  93
  94
  95 def _ExecuteTool(toolname, build_directory, filename):
  96   """Executes the tool.
  97
  98   This is defined outside the class so it can be pickled for the multiprocessing
  99   module.
 100
 101   Args:
 102     toolname: Path to the tool to execute.
 103     build_directory: Directory that contains the compile database.
 104     filename: The file to run the tool over.
 105
 106   Returns:
 107     A dictionary that must contain the key "status" and a boolean value
 108     associated with it.
 109
 110     If status is True, then the generated edits are stored with the key "edits"
 111     in the dictionary.
 112
 113     Otherwise, the filename and the output from stderr are associated with the
 114     keys "filename" and "stderr" respectively.
 115   """
 116   command = subprocess.Popen((toolname, '-p', build_directory, filename),
 117                              stdout=subprocess.PIPE,
 118                              stderr=subprocess.PIPE)
 119   stdout, stderr = command.communicate()
 120   if command.returncode != 0:
 121     return {'status': False, 'filename': filename, 'stderr': stderr}
 122   else:
 123     return {'status': True,
 124             'edits': _ExtractEditsFromStdout(build_directory, stdout)}
 125
 126
 127 class _CompilerDispatcher(object):
 128   """Multiprocessing controller for running clang tools in parallel."""
 129
 130   def __init__(self, toolname, build_directory, filenames):
 131     """Initializer method.
 132
 133     Args:
 134       toolname: Path to the tool to execute.
 135       build_directory: Directory that contains the compile database.
 136       filenames: The files to run the tool over.
 137     """
 138     self.__toolname = toolname
 139     self.__build_directory = build_directory
 140     self.__filenames = filenames
 141     self.__success_count = 0
 142     self.__failed_count = 0
 143     self.__edits = collections.defaultdict(list)
 144
 145   @property
 146   def edits(self):
 147     return self.__edits
 148
 149   @property
 150   def failed_count(self):
 151     return self.__failed_count
 152
 153   def Run(self):
 154     """Does the grunt work."""
 155     pool = multiprocessing.Pool()
 156     result_iterator = pool.imap_unordered(
 157         functools.partial(_ExecuteTool, self.__toolname,
 158                           self.__build_directory),
 159         self.__filenames)
 160     for result in result_iterator:
 161       self.__ProcessResult(result)
 162     sys.stdout.write('\n')
 163     sys.stdout.flush()
 164
 165   def __ProcessResult(self, result):
 166     """Handles result processing.
 167
 168     Args:
 169       result: The result dictionary returned by _ExecuteTool.
 170     """
 171     if result['status']:
 172       self.__success_count += 1
 173       for k, v in result['edits'].iteritems():
 174         self.__edits[k].extend(v)
 175     else:
 176       self.__failed_count += 1
 177       sys.stdout.write('\nFailed to process %s\n' % result['filename'])
 178       sys.stdout.write(result['stderr'])
 179       sys.stdout.write('\n')
 180     percentage = (
 181         float(self.__success_count + self.__failed_count) /
 182         len(self.__filenames)) * 100
 183     sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
 184         self.__success_count, self.__failed_count, percentage))
 185     sys.stdout.flush()
 186
 187
 188 def _ApplyEdits(edits, clang_format_diff_path):
 189   """Apply the generated edits.
 190
 191   Args:
 192     edits: A dict mapping filenames to Edit instances that apply to that file.
 193     clang_format_diff_path: Path to the clang-format-diff.py helper to help
 194       automatically reformat diffs to avoid style violations. Pass None if the
 195       clang-format step should be skipped.
 196   """
 197   edit_count = 0
 198   for k, v in edits.iteritems():
 199     # Sort the edits and iterate through them in reverse order. Sorting allows
 200     # duplicate edits to be quickly skipped, while reversing means that
 201     # subsequent edits don't need to have their offsets updated with each edit
 202     # applied.
 203     v.sort()
 204     last_edit = None
 205     with open(k, 'rb+') as f:
 206       contents = bytearray(f.read())
 207       for edit in reversed(v):
 208         if edit == last_edit:
 209           continue
 210         last_edit = edit
 211         contents[edit.offset:edit.offset + edit.length] = edit.replacement
 212         if not edit.replacement:
 213           _ExtendDeletionIfElementIsInList(contents, edit.offset)
 214         edit_count += 1
 215       f.seek(0)
 216       f.truncate()
 217       f.write(contents)
 218     if clang_format_diff_path:
 219       # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
 220       # uses python2.7. Use the deprecated interface until Chrome uses a newer
 221       # Python.
 222       if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
 223           pipes.quote(k), clang_format_diff_path), shell=True) != 0:
 224         print 'clang-format failed for %s' % k
 225   print 'Applied %d edits to %d files' % (edit_count, len(edits))
 226
 227
 228 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
 229
 230
 231 def _ExtendDeletionIfElementIsInList(contents, offset):
 232   """Extends the range of a deletion if the deleted element was part of a list.
 233
 234   This rewriter helper makes it easy for refactoring tools to remove elements
 235   from a list. Even if a matcher callback knows that it is removing an element
 236   from a list, it may not have enough information to accurately remove the list
 237   element; for example, another matcher callback may end up removing an adjacent
 238   list element, or all the list elements may end up being removed.
 239
 240   With this helper, refactoring tools can simply remove the list element and not
 241   worry about having to include the comma in the replacement.
 242
 243   Args:
 244     contents: A bytearray with the deletion already applied.
 245     offset: The offset in the bytearray where the deleted range used to be.
 246   """
 247   char_before = char_after = None
 248   left_trim_count = 0
 249   for byte in reversed(contents[:offset]):
 250     left_trim_count += 1
 251     if byte in _WHITESPACE_BYTES:
 252       continue
 253     if byte in (ord(','), ord(':'), ord('('), ord('{')):
 254       char_before = chr(byte)
 255     break
 256
 257   right_trim_count = 0
 258   for byte in contents[offset:]:
 259     right_trim_count += 1
 260     if byte in _WHITESPACE_BYTES:
 261       continue
 262     if byte == ord(','):
 263       char_after = chr(byte)
 264     break
 265
 266   if char_before:
 267     if char_after:
 268       del contents[offset:offset + right_trim_count]
 269     elif char_before in (',', ':'):
 270       del contents[offset - left_trim_count:offset]
 271
 272
 273 def main(argv):
 274   if len(argv) < 2:
 275     print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
 276     print '  <clang tool> is the clang tool that should be run.'
 277     print '  <compile db> is the directory that contains the compile database'
 278     print '  <path 1> <path2> ... can be used to filter what files are edited'
 279     return 1
 280
 281   clang_format_diff_path = os.path.join(
 282       os.path.dirname(os.path.realpath(__file__)),
 283       '../../../third_party/llvm/tools/clang/tools/clang-format',
 284       'clang-format-diff.py')
 285   # TODO(dcheng): Allow this to be controlled with a flag as well.
 286   if not os.path.isfile(clang_format_diff_path):
 287     clang_format_diff_path = None
 288
 289   filenames = frozenset(_GetFilesFromGit(argv[2:]))
 290   # Filter out files that aren't C/C++/Obj-C/Obj-C++.
 291   extensions = frozenset(('.c', '.cc', '.m', '.mm'))
 292   dispatcher = _CompilerDispatcher(argv[0], argv[1],
 293                                    [f for f in filenames
 294                                     if os.path.splitext(f)[1] in extensions])
 295   dispatcher.Run()
 296   # Filter out edits to files that aren't in the git repository, since it's not
 297   # useful to modify files that aren't under source control--typically, these
 298   # are generated files or files in a git submodule that's not part of Chromium.
 299   _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
 300                     if k in filenames},
 301               clang_format_diff_path)
 302   if dispatcher.failed_count != 0:
 303     return 2
 304   return 0
 305
 306
 307 if __name__ == '__main__':
 308   sys.exit(main(sys.argv[1:]))