1 # Copyright (C) 2012 by the Massachusetts Institute of Technology.
4 # Export of this software from the United States of America may
5 # require a specific license from the United States Government.
6 # It is the responsibility of any person or organization contemplating
7 # export to obtain such a license before exporting.
9 # WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
10 # distribute this software and its documentation for any purpose and
11 # without fee is hereby granted, provided that the above copyright
12 # notice appear in all copies and that both that copyright notice and
13 # this permission notice appear in supporting documentation, and that
14 # the name of M.I.T. not be used in advertising or publicity pertaining
15 # to distribution of the software without specific, written prior
16 # permission. Furthermore if you modify this software you must label
17 # your software as modified software and not distribute it in such a
18 # fashion that it might be confused with the original M.I.T. software.
19 # M.I.T. makes no representations about the suitability of
20 # this software for any purpose. It is provided "as is" without express
21 # or implied warranty.
23 # This program checks for some kinds of MIT krb5 coding style
24 # violations in a single file. Checked violations include:
28 # Trailing whitespace and final blank lines
29 # Comment formatting errors
30 # Preprocessor statements in function bodies
32 # Space before paren in function call, or no space after if/for/while
33 # Parenthesized return expression
34 # Space after cast operator, or no space before * in cast operator
35 # Line broken before binary operator
36 # Lack of spaces around binary operator (sometimes)
37 # Assignment at the beginning of an if conditional
38 # Use of prohibited string functions
39 # Lack of braces around 2+ line flow control body
40 # Incorrect indentation as determined by emacs c-mode (if possible)
42 # This program does not check for the following:
44 # Anything outside of a function body except line length/whitespace
45 # Anything non-syntactic (proper cleanup flow control, naming, etc.)
47 # Implicit tests against NULL or '\0'
48 # Inner-scope variable declarations
49 # Over- or under-parenthesization
50 # Long or deeply nested function bodies
51 # Syntax of function calls through pointers
56 from subprocess import call
57 from tempfile import NamedTemporaryFile
60 print '%5d %s' % (ln, msg)
63 # If lines[0] indicates the krb5 C style, try to use emacs to reindent
64 # a copy of lines. Return None if the file does not use the krb5 C
65 # style or if the emacs batch reindent is unsuccessful.
66 def emacs_reindent(lines):
67 if 'c-basic-offset: 4; indent-tabs-mode: nil' not in lines[0]:
70 util_dir = os.path.dirname(sys.argv[0])
71 cstyle_el = os.path.join(util_dir, 'krb5-c-style.el')
72 reindent_el = os.path.join(util_dir, 'krb5-batch-reindent.el')
73 with NamedTemporaryFile(suffix='.c') as f:
74 f.write(''.join(lines))
76 args = ['emacs', '-q', '-batch', '-l', cstyle_el, '-l', reindent_el,
78 with open(os.devnull, 'w') as devnull:
80 st = call(args, stdin=devnull, stdout=devnull, stderr=devnull)
84 # Fail gracefully if emacs isn't installed.
87 ilines = f.readlines()
92 def check_length(line, ln):
93 if len(line) > 79 and not line.startswith(' * Copyright'):
94 warn(ln, 'Length exceeds 79 characters')
97 def check_tabs(line, ln, allow_tabs, seen_tab):
100 warn(ln, 'Tab character in file which does not allow tabs')
103 warn(ln, 'Tab character immediately following space')
104 if ' ' in line and seen_tab:
105 warn(ln, '8+ spaces in file which uses tabs')
108 def check_trailing_whitespace(line, ln):
109 if line and line[-1] in ' \t':
110 warn(ln, 'Trailing whitespace')
113 def check_comment(lines, ln):
114 align = lines[0].index('/*') + 1
115 if not lines[0].lstrip().startswith('/*'):
116 warn(ln, 'Multi-line comment begins after code')
117 for line in lines[1:]:
119 if len(line) <= align or line[align] != '*':
120 warn(ln, 'Comment line does not have * aligned with top')
121 elif line[:align].lstrip() != '':
122 warn(ln, 'Garbage before * in comment line')
123 if not lines[-1].rstrip().endswith('*/'):
124 warn(ln, 'Code after end of multi-line comment')
125 if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or
126 lines[-1].strip() != '*/'):
127 warn(ln, 'Comment is 3+ lines but is not formatted as block comment')
130 def check_preprocessor(line, ln):
131 if line.startswith('#'):
132 warn(ln, 'Preprocessor statement in function body')
135 def check_braces(line, ln):
136 # Strip out one-line initializer expressions.
137 line = re.sub(r'=\s*{.*}', '', line)
138 if line.lstrip().startswith('{') and not line.startswith('{'):
139 warn(ln, 'Un-cuddled open brace')
140 if re.search(r'{\s*\S', line):
141 warn(ln, 'Code on line after open brace')
142 if re.search(r'\S.*}', line):
143 warn(ln, 'Code on line before close brace')
146 # This test gives false positives on some function pointer type
147 # declarations or casts. Avoid this by using typedefs.
148 def check_space_before_paren(line, ln):
149 for m in re.finditer(r'([\w]+)(\s*)\(', line):
150 ident, ws = m.groups()
151 if ident in ('void', 'char', 'int', 'long', 'unsigned'):
153 elif ident in ('if', 'for', 'while', 'switch'):
155 warn(ln, 'No space after flow control keyword')
156 elif ident != 'return':
158 warn(ln, 'Space before parenthesis in function call')
160 if re.search(r' \)', line):
161 warn(ln, 'Space before close parenthesis')
164 def check_parenthesized_return(line, ln):
165 if re.search(r'return\s*\(.*\);', line):
166 warn(ln, 'Parenthesized return expression')
169 def check_cast(line, ln):
170 # We can't reliably distinguish cast operators from parenthesized
171 # expressions or function call parameters without a real C parser,
172 # so we use some heuristics. A cast operator is followed by an
173 # expression, which usually begins with an identifier or an open
174 # paren. A function call or parenthesized expression is never
175 # followed by an identifier and only rarely by an open paren. We
176 # won't detect a cast operator when it's followed by an expression
177 # beginning with '*', since it's hard to distinguish that from a
178 # multiplication operator. We will get false positives from
179 # "(*fp) (args)" and "if (condition) statement", but both of those
180 # are erroneous anyway.
181 for m in re.finditer(r'\(([^(]+)\)(\s*)[a-zA-Z_(]', line):
183 warn(ln, 'Space after cast operator (or inline if/while body)')
184 # Check for casts like (char*) which should have a space.
185 if re.search(r'[^\s\*]\*+$', m.group(1)):
186 warn(ln, 'No space before * in cast operator')
189 def check_binary_operator(line, ln):
190 binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)'
191 if re.match(r'\s*' + binop + r'\s', line):
192 warn(ln - 1, 'Line broken before binary operator')
193 for m in re.finditer(r'(\s|\w)' + binop + r'(\s|\w)', line):
194 before, op, after = m.groups()
195 if not before.isspace() and not after.isspace():
196 warn(ln, 'No space before or after binary operator')
197 elif not before.isspace():
198 warn(ln, 'No space before binary operator')
199 elif op not in ('-', '*', '&') and not after.isspace():
200 warn(ln, 'No space after binary operator')
203 def check_assignment_in_conditional(line, ln):
204 # Check specifically for if statements; we allow assignments in
206 if re.search(r'if\s*\(+\w+\s*=[^=]', line):
207 warn(ln, 'Assignment in if conditional')
211 return len(re.match('\s*', line).group(0).expandtabs())
214 def check_unbraced_flow_body(line, ln, lines):
215 if re.match(r'\s*do$', line):
216 warn(ln, 'do statement without braces')
219 m = re.match(r'\s*(})?\s*else(\s*if\s*\(.*\))?\s*({)?\s*$', line)
220 if m and (m.group(1) is None) != (m.group(3) is None):
221 warn(ln, 'One arm of if/else statement braced but not the other')
223 if (re.match('\s*(if|else if|for|while)\s*\(.*\)$', line) or
224 re.match('\s*else$', line)):
226 # Look at the next two lines (ln is 1-based so lines[ln] is next).
227 if indent(lines[ln]) > base and indent(lines[ln + 1]) > base:
228 warn(ln, 'Body is 2+ lines but has no braces')
231 def check_bad_string_fn(line, ln):
232 # This is intentionally pretty fuzzy so that we catch the whole scanf
233 if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line):
234 warn(ln, 'Prohibited string function')
237 def check_indentation(line, indented_lines, ln):
238 if not indented_lines:
241 if ln - 1 >= len(indented_lines):
242 # This should only happen when the emacs reindent removed
243 # blank lines from the input file, but check.
244 if line.strip() == '':
245 warn(ln, 'Trailing blank line')
248 if line != indented_lines[ln - 1].rstrip('\r\n'):
249 warn(ln, 'Indentation may be incorrect')
252 def check_file(lines):
253 # Check if this file allows tabs.
256 allow_tabs = 'indent-tabs-mode: nil' not in lines[0]
258 indented_lines = emacs_reindent(lines)
265 line = line.rstrip('\r\n')
266 seen_tab = seen_tab or ('\t' in line)
268 # Check line structure issues before altering the line.
269 check_indentation(line, indented_lines, ln)
270 check_length(line, ln)
271 check_tabs(line, ln, allow_tabs, seen_tab)
272 check_trailing_whitespace(line, ln)
274 # Strip out single-line comments the contents of string literals.
276 line = re.sub(r'/\*.*?\*/', '', line)
277 line = re.sub(r'"(\\.|[^"])*"', '""', line)
279 # Parse out and check multi-line comments. (Ignore code on
280 # the first or last line; check_comment will warn about it.)
281 if comment or '/*' in line:
284 check_comment(comment, ln - len(comment) + 1)
288 # Warn if we see a // comment and ignore anything following.
290 warn(ln, '// comment')
291 line = re.sub(r'//.*/', '', line)
293 if line.startswith('{'):
295 elif line.startswith('}'):
299 check_preprocessor(line, ln)
300 check_braces(line, ln)
301 check_space_before_paren(line, ln)
302 check_parenthesized_return(line, ln)
304 check_binary_operator(line, ln)
305 check_assignment_in_conditional(line, ln)
306 check_unbraced_flow_body(line, ln, lines)
307 check_bad_string_fn(line, ln)
310 warn(ln, 'Blank line at end of file')
313 if len(sys.argv) == 1:
314 lines = sys.stdin.readlines()
315 elif len(sys.argv) == 2:
316 f = open(sys.argv[1])
317 lines = f.readlines()
320 sys.stderr.write('Usage: cstyle-file [filename]\n')