3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
5 # The LLVM Compiler Infrastructure
7 # This file is distributed under the University of Illinois Open Source
8 # License. See LICENSE.TXT for details.
10 #===------------------------------------------------------------------------===#
13 clang-format git integration
14 ============================
16 This file provides a clang-format integration for git. Put it somewhere in your
17 path and ensure that it is executable. Then, "git clang-format" will invoke
18 clang-format on the changes in current files or a specific commit.
20 For further details, run:
35 usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
38 Run clang-format on all lines that differ between the working directory
39 and <commit>, which defaults to HEAD. Changes are only applied to the working
42 The following git-config settings set the default of the corresponding option:
49 # Name of the temporary index file in which save the output of clang-format.
50 # This file is created within the .git directory.
51 temp_index_basename = 'clang-format-index'
54 Range = collections.namedtuple('Range', 'start, count')
58 config = load_git_config()
60 # In order to keep '--' yet allow options after positionals, we need to
61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
62 # nargs=argparse.REMAINDER disallows options after positionals.)
65 idx = argv.index('--')
69 dash_dash = argv[idx:]
72 default_extensions = ','.join([
73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
80 p = argparse.ArgumentParser(
81 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
83 p.add_argument('--binary',
84 default=config.get('clangformat.binary', 'clang-format'),
85 help='path to clang-format'),
86 p.add_argument('--commit',
87 default=config.get('clangformat.commit', 'HEAD'),
88 help='default commit to use if none is specified'),
89 p.add_argument('--diff', action='store_true',
90 help='print a diff instead of applying the changes')
91 p.add_argument('--extensions',
92 default=config.get('clangformat.extensions',
94 help=('comma-separated list of file extensions to format, '
95 'excluding the period and case-insensitive')),
96 p.add_argument('-f', '--force', action='store_true',
97 help='allow changes to unstaged files')
98 p.add_argument('-p', '--patch', action='store_true',
99 help='select hunks interactively')
100 p.add_argument('-q', '--quiet', action='count', default=0,
101 help='print less information')
102 p.add_argument('--style',
103 default=config.get('clangformat.style', None),
104 help='passed to clang-format'),
105 p.add_argument('-v', '--verbose', action='count', default=0,
106 help='print extra information')
107 # We gather all the remaining positional arguments into 'args' since we need
108 # to use some heuristics to determine whether or not <commit> was present.
109 # However, to print pretty messages, we make use of metavar and help.
110 p.add_argument('args', nargs='*', metavar='<commit>',
111 help='revision from which to compute the diff')
112 p.add_argument('ignored', nargs='*', metavar='<file>...',
113 help='if specified, only consider differences in these files')
114 opts = p.parse_args(argv)
116 opts.verbose -= opts.quiet
119 commit, files = interpret_args(opts.args, dash_dash, opts.commit)
120 changed_lines = compute_diff_and_extract_lines(commit, files)
121 if opts.verbose >= 1:
122 ignored_files = set(changed_lines)
123 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
124 if opts.verbose >= 1:
125 ignored_files.difference_update(changed_lines)
127 print 'Ignoring changes in the following files (wrong extension):'
128 for filename in ignored_files:
131 print 'Running clang-format on the following files:'
132 for filename in changed_lines:
134 if not changed_lines:
135 print 'no modified files to format'
137 # The computed diff outputs absolute paths, so we must cd before accessing
140 old_tree = create_tree_from_workdir(changed_lines)
141 new_tree = run_clang_format_and_save_to_tree(changed_lines,
144 if opts.verbose >= 1:
145 print 'old tree:', old_tree
146 print 'new tree:', new_tree
147 if old_tree == new_tree:
148 if opts.verbose >= 0:
149 print 'clang-format did not modify any files'
151 print_diff(old_tree, new_tree)
153 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
154 patch_mode=opts.patch)
155 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
156 print 'changed files:'
157 for filename in changed_files:
161 def load_git_config(non_string_options=None):
162 """Return the git configuration as a dictionary.
164 All options are assumed to be strings unless in `non_string_options`, in which
165 is a dictionary mapping option name (in lower case) to either "--bool" or
167 if non_string_options is None:
168 non_string_options = {}
170 for entry in run('git', 'config', '--list', '--null').split('\0'):
172 name, value = entry.split('\n', 1)
173 if name in non_string_options:
174 value = run('git', 'config', non_string_options[name], name)
179 def interpret_args(args, dash_dash, default_commit):
180 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
182 It is assumed that "--" and everything that follows has been removed from
183 args and placed in `dash_dash`.
185 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
186 left (if present) is taken as commit. Otherwise, the first argument is
187 checked if it is a commit or a file. If commit is not given,
188 `default_commit` is used."""
191 commit = default_commit
193 die('at most one commit allowed; %d given' % len(args))
196 object_type = get_object_type(commit)
197 if object_type not in ('commit', 'tag'):
198 if object_type is None:
199 die("'%s' is not a commit" % commit)
201 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
202 files = dash_dash[1:]
204 if disambiguate_revision(args[0]):
208 commit = default_commit
211 commit = default_commit
216 def disambiguate_revision(value):
217 """Returns True if `value` is a revision, False if it is a file, or dies."""
218 # If `value` is ambiguous (neither a commit nor a file), the following
219 # command will die with an appropriate error message.
220 run('git', 'rev-parse', value, verbose=False)
221 object_type = get_object_type(value)
222 if object_type is None:
224 if object_type in ('commit', 'tag'):
226 die('`%s` is a %s, but a commit or filename was expected' %
227 (value, object_type))
230 def get_object_type(value):
231 """Returns a string description of an object's type, or None if it is not
232 a valid git object."""
233 cmd = ['git', 'cat-file', '-t', value]
234 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
235 stdout, stderr = p.communicate()
236 if p.returncode != 0:
238 return stdout.strip()
241 def compute_diff_and_extract_lines(commit, files):
242 """Calls compute_diff() followed by extract_lines()."""
243 diff_process = compute_diff(commit, files)
244 changed_lines = extract_lines(diff_process.stdout)
245 diff_process.stdout.close()
247 if diff_process.returncode != 0:
248 # Assume error was already printed to stderr.
253 def compute_diff(commit, files):
254 """Return a subprocess object producing the diff from `commit`.
256 The return value's `stdin` file object will produce a patch with the
257 differences between the working directory and `commit`, filtered on `files`
258 (if non-empty). Zero context lines are used in the patch."""
259 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
261 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
266 def extract_lines(patch_file):
267 """Extract the changed lines in `patch_file`.
269 The return value is a dictionary mapping filename to a list of (start_line,
272 The input must have been produced with ``-U0``, meaning unidiff format with
273 zero lines of context. The return value is a dict mapping filename to a
274 list of line `Range`s."""
276 for line in patch_file:
277 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
279 filename = match.group(1).rstrip('\r\n')
280 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
282 start_line = int(match.group(1))
285 line_count = int(match.group(3))
287 matches.setdefault(filename, []).append(Range(start_line, line_count))
291 def filter_by_extension(dictionary, allowed_extensions):
292 """Delete every key in `dictionary` that doesn't have an allowed extension.
294 `allowed_extensions` must be a collection of lowercase file extensions,
295 excluding the period."""
296 allowed_extensions = frozenset(allowed_extensions)
297 for filename in dictionary.keys():
298 base_ext = filename.rsplit('.', 1)
299 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
300 del dictionary[filename]
303 def cd_to_toplevel():
304 """Change to the top level of the git repository."""
305 toplevel = run('git', 'rev-parse', '--show-toplevel')
309 def create_tree_from_workdir(filenames):
310 """Create a new git tree with the given files from the working directory.
312 Returns the object ID (SHA-1) of the created tree."""
313 return create_tree(filenames, '--stdin')
316 def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
318 """Run clang-format on each file and save the result to a git tree.
320 Returns the object ID (SHA-1) of the created tree."""
321 def index_info_generator():
322 for filename, line_ranges in changed_lines.iteritems():
323 mode = oct(os.stat(filename).st_mode)
324 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
326 yield '%s %s\t%s' % (mode, blob_id, filename)
327 return create_tree(index_info_generator(), '--index-info')
330 def create_tree(input_lines, mode):
331 """Create a tree object from the given input.
333 If mode is '--stdin', it must be a list of filenames. If mode is
334 '--index-info' is must be a list of values suitable for "git update-index
335 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
337 assert mode in ('--stdin', '--index-info')
338 cmd = ['git', 'update-index', '--add', '-z', mode]
339 with temporary_index_file():
340 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
341 for line in input_lines:
342 p.stdin.write('%s\0' % line)
345 die('`%s` failed' % ' '.join(cmd))
346 tree_id = run('git', 'write-tree')
350 def clang_format_to_blob(filename, line_ranges, binary='clang-format',
352 """Run clang-format on the given file and save the result to a git blob.
354 Returns the object ID (SHA-1) of the created blob."""
355 clang_format_cmd = [binary, filename]
357 clang_format_cmd.extend(['-style='+style])
358 clang_format_cmd.extend([
359 '-lines=%s:%s' % (start_line, start_line+line_count-1)
360 for start_line, line_count in line_ranges])
362 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
363 stdout=subprocess.PIPE)
365 if e.errno == errno.ENOENT:
366 die('cannot find executable "%s"' % binary)
369 clang_format.stdin.close()
370 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
371 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
372 stdout=subprocess.PIPE)
373 clang_format.stdout.close()
374 stdout = hash_object.communicate()[0]
375 if hash_object.returncode != 0:
376 die('`%s` failed' % ' '.join(hash_object_cmd))
377 if clang_format.wait() != 0:
378 die('`%s` failed' % ' '.join(clang_format_cmd))
379 return stdout.rstrip('\r\n')
382 @contextlib.contextmanager
383 def temporary_index_file(tree=None):
384 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
385 the file afterward."""
386 index_path = create_temporary_index(tree)
387 old_index_path = os.environ.get('GIT_INDEX_FILE')
388 os.environ['GIT_INDEX_FILE'] = index_path
392 if old_index_path is None:
393 del os.environ['GIT_INDEX_FILE']
395 os.environ['GIT_INDEX_FILE'] = old_index_path
396 os.remove(index_path)
399 def create_temporary_index(tree=None):
400 """Create a temporary index file and return the created file's path.
402 If `tree` is not None, use that as the tree to read in. Otherwise, an
403 empty index is created."""
404 gitdir = run('git', 'rev-parse', '--git-dir')
405 path = os.path.join(gitdir, temp_index_basename)
408 run('git', 'read-tree', '--index-output='+path, tree)
412 def print_diff(old_tree, new_tree):
413 """Print the diff between the two trees to stdout."""
414 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
415 # is expected to be viewed by the user, and only the former does nice things
416 # like color and pagination.
417 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
420 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
421 """Apply the changes in `new_tree` to the working directory.
423 Bails if there are local changes in those files and not `force`. If
424 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
425 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
426 new_tree).rstrip('\0').split('\0')
428 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
430 print >>sys.stderr, ('The following files would be modified but '
431 'have unstaged changes:')
432 print >>sys.stderr, unstaged_files
433 print >>sys.stderr, 'Please commit, stage, or stash them first.'
436 # In patch mode, we could just as well create an index from the new tree
437 # and checkout from that, but then the user will be presented with a
438 # message saying "Discard ... from worktree". Instead, we use the old
439 # tree as the index and checkout from new_tree, which gives the slightly
440 # better message, "Apply ... to index and worktree". This is not quite
441 # right, since it won't be applied to the user's index, but oh well.
442 with temporary_index_file(old_tree):
443 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
444 index_tree = old_tree
446 with temporary_index_file(new_tree):
447 run('git', 'checkout-index', '-a', '-f')
451 def run(*args, **kwargs):
452 stdin = kwargs.pop('stdin', '')
453 verbose = kwargs.pop('verbose', True)
454 strip = kwargs.pop('strip', True)
456 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
457 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
458 stdin=subprocess.PIPE)
459 stdout, stderr = p.communicate(input=stdin)
460 if p.returncode == 0:
463 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
464 print >>sys.stderr, stderr.rstrip()
466 stdout = stdout.rstrip('\r\n')
469 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
471 print >>sys.stderr, stderr.rstrip()
476 print >>sys.stderr, 'error:', message
480 if __name__ == '__main__':