2 # Copyright 2021 The Chromium Authors
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 """Helper for adding or removing an include to/from source file(s).
7 clang-format already provides header sorting functionality; however, the
8 functionality is limited to sorting headers within a block of headers surrounded
9 by blank lines (these are a heuristic to avoid clang breaking ordering for
10 headers sensitive to inclusion order, e.g. <windows.h>).
12 As a result, inserting a new header is a bit more complex than simply inserting
13 the new header at the top and running clang-format.
15 This script implements additional logic to:
16 - classify different blocks of headers by type (C system, C++ system, user)
17 - find the appropriate insertion point for the new header
18 - creating a new header block if necessary
20 As a bonus, it does *also* sort the includes, though any sorting disagreements
21 with clang-format should be resolved in favor of clang-format.
23 It also supports removing a header with option `--remove`.
26 tools/add_header.py --header '<utility>' foo/bar.cc foo/baz.cc foo/baz.h
27 tools/add_header.py --header '<vector>' --remove foo/bar.cc foo/baz.cc foo/baz.h
36 # The specific values of these constants are also used as a sort key for
37 # ordering different header types in the correct relative order.
38 _HEADER_TYPE_C_SYSTEM = 0
39 _HEADER_TYPE_CXX_SYSTEM = 1
41 _HEADER_TYPE_INVALID = -1
44 def ClassifyHeader(decorated_name):
45 if IsCSystemHeader(decorated_name):
46 return _HEADER_TYPE_C_SYSTEM
47 elif IsCXXSystemHeader(decorated_name):
48 return _HEADER_TYPE_CXX_SYSTEM
49 elif IsUserHeader(decorated_name):
50 return _HEADER_TYPE_USER
52 return _HEADER_TYPE_INVALID
55 def UndecoratedName(decorated_name):
56 """Returns the undecorated version of decorated_name by removing "" or <>."""
57 assert IsSystemHeader(decorated_name) or IsUserHeader(decorated_name)
58 return decorated_name[1:-1]
61 def IsSystemHeader(decorated_name):
62 """Returns true if decorated_name looks like a system header."""
63 return decorated_name[0] == '<' and decorated_name[-1] == '>'
66 def IsCSystemHeader(decorated_name):
67 """Returns true if decoraed_name looks like a C system header."""
68 return IsSystemHeader(decorated_name) and UndecoratedName(
69 decorated_name).endswith('.h')
72 def IsCXXSystemHeader(decorated_name):
73 """Returns true if decoraed_name looks like a C++ system header."""
74 return IsSystemHeader(
75 decorated_name) and not UndecoratedName(decorated_name).endswith('.h')
78 def IsUserHeader(decorated_name):
79 """Returns true if decoraed_name looks like a user header."""
80 return decorated_name[0] == '"' and decorated_name[-1] == '"'
83 _EMPTY_LINE_RE = re.compile(r'\s*$')
84 _COMMENT_RE = re.compile(r'\s*//(.*)$')
85 _INCLUDE_RE = re.compile(
86 r'\s*#(import|include)\s+([<"].+?[">])\s*?(?://(.*))?$')
89 def FindIncludes(lines):
90 """Finds the block of #includes, assuming Google+Chrome C++ style source.
92 Note that this doesn't simply return a slice of the input lines, because
93 having the actual indices simplifies things when generatingn the updated
97 lines: The source text split into lines.
100 A tuple of begin, end indices that can be used to slice the input lines to
101 contain the includes to process. Returns -1, -1 if no such block of
102 input lines could be found.
105 for idx, line in enumerate(lines):
106 # Skip over any initial comments (e.g. the copyright boilerplate) or empty
108 # TODO(dcheng): This means that any preamble comment associated with the
109 # first header will be dropped. So far, this hasn't broken anything, but
110 # maybe this needs to be more clever.
111 # TODO(dcheng): #define and #undef should probably also be allowed.
112 if _EMPTY_LINE_RE.match(line) or _COMMENT_RE.match(line):
114 m = _INCLUDE_RE.match(line)
117 # No match, but no #includes have been seen yet. Keep scanning for the
120 # Give up, it's something weird that probably requires manual
130 class Include(object):
131 """Represents an #include/#import and any interesting metadata for it.
134 decorated_name: The name of the header file, decorated with <> for system
135 headers or "" for user headers.
137 directive: 'include' or 'import'
138 TODO(dcheng): In the future, this may need to support C++ modules.
140 preamble: Any comment lines that precede this include line, e.g.:
142 // This is a preamble comment
143 // for a header file.
146 would have a preamble of
148 ['// This is a preamble comment', '// for a header file.'].
150 inline_comment: Any comment that comes after the #include on the same line,
153 #include <windows.h> // For CreateWindowExW()
155 would be parsed with an inline comment of ' For CreateWindowExW'.
157 header_type: The header type corresponding to decorated_name as determined
160 is_primary_header: True if this is the primary related header of a C++
161 implementation file. Any primary header will be sorted to the top in its
165 def __init__(self, decorated_name, directive, preamble, inline_comment):
166 self.decorated_name = decorated_name
167 assert directive == 'include' or directive == 'import'
168 self.directive = directive
169 self.preamble = preamble
170 self.inline_comment = inline_comment
171 self.header_type = ClassifyHeader(decorated_name)
172 assert self.header_type != _HEADER_TYPE_INVALID
173 self.is_primary_header = False
176 return str((self.decorated_name, self.directive, self.preamble,
177 self.inline_comment, self.header_type, self.is_primary_header))
179 def ShouldInsertNewline(self, previous_include):
180 # Per the Google C++ style guide, different blocks of headers should be
181 # separated by an empty line.
182 return (self.is_primary_header != previous_include.is_primary_header
183 or self.header_type != previous_include.header_type)
186 """Generates a C++ source representation of this include."""
188 source.extend(self.preamble)
189 include_line = '#%s %s' % (self.directive, self.decorated_name)
190 if self.inline_comment:
191 include_line = include_line + ' //' + self.inline_comment
192 source.append(include_line)
193 return [line.rstrip() for line in source]
196 def ParseIncludes(lines):
197 """Parses lines into a list of Include objects. Returns None on failure.
200 lines: A list of strings representing C++ source text.
203 A list of Include objects representing the parsed input lines, or None if
204 the input lines could not be parsed.
209 if _EMPTY_LINE_RE.match(line):
211 # preamble contents are flushed when an #include directive is matched.
212 # If preamble is non-empty, that means there is a preamble separated
213 # from its #include directive by at least one newline. Just give up,
214 # since the sorter has no idea how to preserve structure in this case.
217 m = _INCLUDE_RE.match(line)
219 preamble.append(line)
221 includes.append(Include(m.group(2), m.group(1), preamble, m.group(3)))
223 # In theory, the caller should never pass a list of lines with a dangling
224 # preamble. But there's a test case that exercises this, and just in case it
225 # actually happens, fail a bit more gracefully.
231 def _DecomposePath(filename):
232 """Decomposes a filename into a list of directories and the basename.
235 filename: A filename!
238 A tuple of a list of directories and a string basename.
241 dirname, basename = os.path.split(filename)
243 dirname, last = os.path.split(dirname)
246 # Remove the extension from the basename.
247 basename = os.path.splitext(basename)[0]
248 return dirs, basename
252 r'(?:_(?:android|aura|chromeos|fuchsia|ios|linux|mac|ozone|posix|win|x11))?'
254 _TEST_SUFFIX = r'(?:_(?:browser|interactive_ui|perf|ui|unit)?test)?'
257 def MarkPrimaryInclude(includes, filename):
258 """Finds the primary header in includes and marks it as such.
260 Per the style guide, if moo.cc's main purpose is to implement or test the
261 functionality in moo.h, moo.h should be ordered first in the includes.
264 includes: A list of Include objects.
265 filename: The filename to use as the basis for finding the primary header.
267 # Header files never have a primary include.
268 if filename.endswith('.h'):
271 # First pass. Looking for exact match primary header.
272 exact_match_primary_header = f'{os.path.splitext(filename)[0]}.h'
273 for include in includes:
274 if IsUserHeader(include.decorated_name) and UndecoratedName(
275 include.decorated_name) == exact_match_primary_header:
276 include.is_primary_header = True
279 basis = _DecomposePath(filename)
281 # Second pass. The list of includes is searched in reverse order of length.
282 # Even though matching is fuzzy, moo_posix.h should take precedence over moo.h
283 # when considering moo_posix.cc.
284 includes.sort(key=lambda i: -len(i.decorated_name))
285 for include in includes:
286 if include.header_type != _HEADER_TYPE_USER:
288 to_test = _DecomposePath(UndecoratedName(include.decorated_name))
290 # If the basename to test is longer than the basis, just skip it and
291 # continue. moo.c should never match against moo_posix.h.
292 if len(to_test[1]) > len(basis[1]):
295 # The basename in the two paths being compared need to fuzzily match.
296 # This allows for situations where moo_posix.cc implements the interfaces
298 escaped_basename = re.escape(to_test[1])
299 if not (re.match(escaped_basename + _PLATFORM_SUFFIX + _TEST_SUFFIX + '$',
301 re.match(escaped_basename + _TEST_SUFFIX + _PLATFORM_SUFFIX + '$',
305 # The topmost directory name must match, and the rest of the directory path
306 # should be 'substantially similar'.
307 s = difflib.SequenceMatcher(None, to_test[0], basis[0])
308 first_matched = False
310 for match in s.get_matching_blocks():
311 if total_matched == 0 and match.a == 0 and match.b == 0:
313 total_matched += match.size
315 if not first_matched:
318 # 'Substantially similar' is defined to be:
319 # - no more than two differences
320 # - at least one match besides the topmost directory
321 total_differences = abs(total_matched -
322 len(to_test[0])) + abs(total_matched -
324 # Note: total_differences != 0 is mainly intended to allow more succinct
325 # tests (otherwise tests with just a basename would always trip the
326 # total_matched < 2 check).
327 if total_differences != 0 and (total_differences > 2 or total_matched < 2):
330 include.is_primary_header = True
334 def SerializeIncludes(includes):
335 """Turns includes back into the corresponding C++ source text.
338 includes: a list of Include objects.
341 A list of strings representing C++ source text.
346 # Must be included before ws2tcpip.h.
347 # Doesn't need to be included before <windows.h> with
348 # WIN32_LEAN_AND_MEAN but why chance it?
350 # Must be before lots of things, e.g. shellapi.h, winbase.h,
351 # versionhelpers.h, memoryapi.h, hidclass.h, ncrypt.h., ...
353 # Must be before iphlpapi.h.
355 # Must be before propkey.h.
357 # Must be before atlapp.h.
359 # Must be before intshcut.h.
361 # Must be before intshcut.h.
363 # Must be before uiautomation.h.
365 # Must be before tpcshrd.h.
369 # Ensure that headers are sorted as follows:
371 # 1. The primary header, if any, appears first.
372 # 2. All headers of the same type (e.g. C system, C++ system headers, et
373 # cetera) are grouped contiguously.
374 # 3. Any special sorting rules needed within each group for satisfying
375 # platform header idiosyncrasies. In practice, this only applies to C
377 # 4. The remaining headers without special sorting rules are sorted
380 # The for loop below that outputs the actual source text depends on #2 above
381 # to insert newlines between different groups of headers.
382 def SortKey(include):
383 def SpecialSortKey(include):
384 lower_name = include.decorated_name.lower()
385 for i in range(len(special_headers)):
386 if special_headers[i] == lower_name:
388 return len(special_headers)
390 return (not include.is_primary_header, include.header_type,
391 SpecialSortKey(include), include.decorated_name)
393 includes.sort(key=SortKey)
395 # Assume there's always at least one include.
396 previous_include = None
397 for include in includes:
398 if previous_include and include.ShouldInsertNewline(previous_include):
400 source.extend(include.ToSource())
401 previous_include = include
405 def AddHeaderToSource(filename, source, decorated_name, remove=False):
406 """Adds or removes the specified header into/from the source text, if needed.
409 filename: The name of the source file.
410 source: A string containing the contents of the source file.
411 decorated_name: The decorated name of the header to add or remove.
412 remove: If true, remove instead of adding.
415 None if no changes are needed or the modified source text otherwise.
417 lines = source.splitlines()
418 begin, end = FindIncludes(lines)
420 # No #includes in this file. Just give up.
421 # TODO(dcheng): Be more clever and insert it after the file-level comment or
422 # include guard as appropriate.
424 print(f'Skipping {filename}: unable to find includes!')
427 includes = ParseIncludes(lines[begin:end])
429 print(f'Skipping {filename}: unable to parse includes!')
434 if decorated_name == i.decorated_name:
438 print(f'Skipping {filename}: unable to find {decorated_name}')
441 if decorated_name in [i.decorated_name for i in includes]:
443 print(f'Skipping {filename}: no changes required!')
446 includes.append(Include(decorated_name, 'include', [], None))
448 MarkPrimaryInclude(includes, filename)
450 lines[begin:end] = SerializeIncludes(includes)
451 lines.append('') # To avoid eating the newline at the end of the file.
452 return '\n'.join(lines)
456 parser = argparse.ArgumentParser(
457 description='Mass add (or remove) a new header into a bunch of files.')
460 help='The decorated filename of the header to insert (e.g. "a" or <a>)',
462 parser.add_argument('--remove',
463 help='Remove the header file instead of adding it',
465 parser.add_argument('files', nargs='+')
466 args = parser.parse_args()
467 if ClassifyHeader(args.header) == _HEADER_TYPE_INVALID:
468 print('--header argument must be a decorated filename, e.g.')
469 print(' --header "<utility>"')
471 print(' --header \'"moo.h"\'')
473 operation = 'Removing' if args.remove else 'Inserting'
474 print(f'{operation} #include {args.header}...')
475 for filename in args.files:
476 with open(filename, 'r') as f:
477 new_source = AddHeaderToSource(os.path.normpath(filename), f.read(),
478 args.header, args.remove)
481 with open(filename, 'w', newline='\n') as f:
485 if __name__ == '__main__':