2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 Check Python source code formatting, according to PEP 8:
27 http://www.python.org/dev/peps/pep-0008/
29 For usage and a list of options, try this:
32 This program and its regression test suite live here:
33 http://svn.browsershots.org/trunk/devtools/pep8/
34 http://trac.browsershots.org/browser/trunk/devtools/pep8/
36 Groups of errors and warnings:
47 You can add checks to this program by writing plugins. Each plugin is
48 a simple function that is called for each line of source code, either
52 - Raw line of text from the input file.
55 - Multi-line statements converted to a single line.
56 - Stripped left and right.
57 - Contents of strings replaced with 'xxx' of same length.
60 The check function requests physical or logical lines by the name of
63 def maximum_line_length(physical_line)
64 def extraneous_whitespace(logical_line)
65 def blank_lines(logical_line, blank_lines, indent_level, line_number)
67 The last example above demonstrates how check plugins can request
68 additional information with extra arguments. All attributes of the
69 Checker object are available. Some examples:
71 lines: a list of the raw lines from the input file
72 tokens: the tokens that contribute to this logical line
73 line_number: line number in the input file
74 blank_lines: blank lines before this one
75 indent_char: first indentation character in this file (' ' or '\t')
76 indent_level: indentation (with tabs expanded to multiples of 8)
77 previous_indent_level: indentation on previous line
78 previous_logical: previous logical line
80 The docstring of each check function shall be the relevant part of
81 text from PEP 8. It is printed if the user enables --show-pep8.
91 from optparse import OptionParser
92 from keyword import iskeyword
93 from fnmatch import fnmatch
96 __revision__ = '$Rev$'
98 default_exclude = '.svn,CVS,*.pyc,*.pyo'
100 indent_match = re.compile(r'([ \t]*)').match
101 raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
104 + - * / % ^ & | = < > >> <<
105 += -= *= /= %= ^= &= |= == <= >= >>= <<=
114 ##############################################################################
115 # Plugins (check functions) for physical lines
116 ##############################################################################
119 def tabs_or_spaces(physical_line, indent_char):
121 Never mix tabs and spaces.
123 The most popular way of indenting Python is with spaces only. The
124 second-most popular way is with tabs only. Code indented with a mixture
125 of tabs and spaces should be converted to using spaces exclusively. When
126 invoking the Python command line interpreter with the -t option, it issues
127 warnings about code that illegally mixes tabs and spaces. When using -tt
128 these warnings become errors. These options are highly recommended!
130 indent = indent_match(physical_line).group(1)
131 for offset, char in enumerate(indent):
132 if char != indent_char:
133 return offset, "E101 indentation contains mixed spaces and tabs"
136 def tabs_obsolete(physical_line):
138 For new projects, spaces-only are strongly recommended over tabs. Most
139 editors have features that make this easy to do.
141 indent = indent_match(physical_line).group(1)
142 if indent.count('\t'):
143 return indent.index('\t'), "W191 indentation contains tabs"
146 def trailing_whitespace(physical_line):
148 JCR: Trailing whitespace is superfluous.
150 physical_line = physical_line.rstrip('\n') # chr(10), newline
151 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
152 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
153 stripped = physical_line.rstrip()
154 if physical_line != stripped:
155 return len(stripped), "W291 trailing whitespace"
158 def trailing_blank_lines(physical_line, lines, line_number):
160 JCR: Trailing blank lines are superfluous.
162 if physical_line.strip() == '' and line_number == len(lines):
163 return 0, "W391 blank line at end of file"
166 def missing_newline(physical_line):
168 JCR: The last line should have a newline.
170 if physical_line.rstrip() == physical_line:
171 return len(physical_line), "W292 no newline at end of file"
174 def maximum_line_length(physical_line):
176 Limit all lines to a maximum of 79 characters.
178 There are still many devices around that are limited to 80 character
179 lines; plus, limiting windows to 80 characters makes it possible to have
180 several windows side-by-side. The default wrapping on such devices looks
181 ugly. Therefore, please limit all lines to a maximum of 79 characters.
182 For flowing long blocks of text (docstrings or comments), limiting the
183 length to 72 characters is recommended.
185 length = len(physical_line.rstrip())
187 return 79, "E501 line too long (%d characters)" % length
190 ##############################################################################
191 # Plugins (check functions) for logical lines
192 ##############################################################################
195 def blank_lines(logical_line, blank_lines, indent_level, line_number,
198 Separate top-level function and class definitions with two blank lines.
200 Method definitions inside a class are separated by a single blank line.
202 Extra blank lines may be used (sparingly) to separate groups of related
203 functions. Blank lines may be omitted between a bunch of related
204 one-liners (e.g. a set of dummy implementations).
206 Use blank lines in functions, sparingly, to indicate logical sections.
209 return # Don't expect blank lines before the first line
210 if previous_logical.startswith('@'):
211 return # Don't expect blank lines after function decorator
212 if (logical_line.startswith('def ') or
213 logical_line.startswith('class ') or
214 logical_line.startswith('@')):
215 if indent_level > 0 and blank_lines != 1:
216 return 0, "E301 expected 1 blank line, found %d" % blank_lines
217 if indent_level == 0 and blank_lines != 2:
218 return 0, "E302 expected 2 blank lines, found %d" % blank_lines
220 return 0, "E303 too many blank lines (%d)" % blank_lines
223 def extraneous_whitespace(logical_line):
225 Avoid extraneous whitespace in the following situations:
227 - Immediately inside parentheses, brackets or braces.
229 - Immediately before a comma, semicolon, or colon.
233 found = line.find(char + ' ')
235 return found + 1, "E201 whitespace after '%s'" % char
237 found = line.find(' ' + char)
238 if found > -1 and line[found - 1] != ',':
239 return found, "E202 whitespace before '%s'" % char
241 found = line.find(' ' + char)
243 return found, "E203 whitespace before '%s'" % char
246 def missing_whitespace(logical_line):
248 JCR: Each comma, semicolon or colon should be followed by whitespace.
251 for index in range(len(line) - 1):
253 if char in ',;:' and line[index + 1] != ' ':
254 before = line[:index]
255 if char == ':' and before.count('[') > before.count(']'):
256 continue # Slice syntax, no space required
257 return index, "E231 missing whitespace after '%s'" % char
260 def indentation(logical_line, previous_logical, indent_char,
261 indent_level, previous_indent_level):
263 Use 4 spaces per indentation level.
265 For really old code that you don't want to mess up, you can continue to
268 if indent_char == ' ' and indent_level % 4:
269 return 0, "E111 indentation is not a multiple of four"
270 indent_expect = previous_logical.endswith(':')
271 if indent_expect and indent_level <= previous_indent_level:
272 return 0, "E112 expected an indented block"
273 if indent_level > previous_indent_level and not indent_expect:
274 return 0, "E113 unexpected indentation"
277 def whitespace_before_parameters(logical_line, tokens):
279 Avoid extraneous whitespace in the following situations:
281 - Immediately before the open parenthesis that starts the argument
282 list of a function call.
284 - Immediately before the open parenthesis that starts an indexing or
287 prev_type = tokens[0][0]
288 prev_text = tokens[0][1]
289 prev_end = tokens[0][3]
290 for index in range(1, len(tokens)):
291 token_type, text, start, end, line = tokens[index]
292 if (token_type == tokenize.OP and
294 start != prev_end and
295 prev_type == tokenize.NAME and
296 (index < 2 or tokens[index - 2][1] != 'class') and
297 (not iskeyword(prev_text))):
298 return prev_end, "E211 whitespace before '%s'" % text
299 prev_type = token_type
304 def whitespace_around_operator(logical_line):
306 Avoid extraneous whitespace in the following situations:
308 - More than one space around an assignment (or other) operator to
309 align it with another.
312 for operator in operators:
313 found = line.find(' ' + operator)
315 return found, "E221 multiple spaces before operator"
316 found = line.find(operator + ' ')
318 return found, "E222 multiple spaces after operator"
319 found = line.find('\t' + operator)
321 return found, "E223 tab before operator"
322 found = line.find(operator + '\t')
324 return found, "E224 tab after operator"
327 def whitespace_around_comma(logical_line):
329 Avoid extraneous whitespace in the following situations:
331 - More than one space around an assignment (or other) operator to
332 align it with another.
334 JCR: This should also be applied around comma etc.
337 for separator in ',;:':
338 found = line.find(separator + ' ')
340 return found + 1, "E241 multiple spaces after '%s'" % separator
341 found = line.find(separator + '\t')
343 return found + 1, "E242 tab after '%s'" % separator
346 def imports_on_separate_lines(logical_line):
348 Imports should usually be on separate lines.
351 if line.startswith('import '):
352 found = line.find(',')
354 return found, "E401 multiple imports on one line"
357 def compound_statements(logical_line):
359 Compound statements (multiple statements on the same line) are
360 generally discouraged.
363 found = line.find(':')
364 if -1 < found < len(line) - 1:
365 before = line[:found]
366 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
367 before.count('[') <= before.count(']') and # [1:2] (slice)
368 not re.search(r'\blambda\b', before)): # lambda x: x
369 return found, "E701 multiple statements on one line (colon)"
370 found = line.find(';')
372 return found, "E702 multiple statements on one line (semicolon)"
375 def python_3000_has_key(logical_line):
377 The {}.has_key() method will be removed in the future version of
378 Python. Use the 'in' operation instead, like:
383 pos = logical_line.find('.has_key(')
385 return pos, "W601 .has_key() is deprecated, use 'in'"
388 def python_3000_raise_comma(logical_line):
390 When raising an exception, use "raise ValueError('message')"
391 instead of the older form "raise ValueError, 'message'".
393 The paren-using form is preferred because when the exception arguments
394 are long or include string formatting, you don't need to use line
395 continuation characters thanks to the containing parentheses. The older
396 form will be removed in Python 3000.
398 match = raise_comma_match(logical_line)
400 return match.start(1), "W602 deprecated form of raising exception"
403 ##############################################################################
405 ##############################################################################
408 def expand_indent(line):
410 Return the amount of indentation.
411 Tabs are expanded to the next multiple of 8.
413 >>> expand_indent(' ')
415 >>> expand_indent('\\t')
417 >>> expand_indent(' \\t')
419 >>> expand_indent(' \\t')
421 >>> expand_indent(' \\t')
427 result = result / 8 * 8 + 8
435 ##############################################################################
436 # Framework to run all checks
437 ##############################################################################
441 """Print a message."""
442 # print >> sys.stderr, options.prog + ': ' + text
443 # print >> sys.stderr, text
447 def find_checks(argument_name):
449 Find all globally visible functions where the first argument name
450 starts with argument_name.
453 function_type = type(find_checks)
454 for name, function in globals().iteritems():
455 if type(function) is function_type:
456 args = inspect.getargspec(function)[0]
457 if len(args) >= 1 and args[0].startswith(argument_name):
458 checks.append((name, function, args))
463 def mute_string(text):
465 Replace contents with 'xxx' to prevent syntax matching.
467 >>> mute_string('"abc"')
469 >>> mute_string("'''abc'''")
471 >>> mute_string("r'abc'")
476 # String modifiers (e.g. u or r)
477 if text.endswith('"'):
478 start += text.index('"')
479 elif text.endswith("'"):
480 start += text.index("'")
482 if text.endswith('"""') or text.endswith("'''"):
485 return text[:start] + 'x' * (end - start) + text[end:]
490 Load a Python source file, tokenize it, check coding style.
493 def __init__(self, filename):
494 self.filename = filename
495 self.lines = file(filename).readlines()
496 self.physical_checks = find_checks('physical_line')
497 self.logical_checks = find_checks('logical_line')
498 options.counters['physical lines'] = \
499 options.counters.get('physical lines', 0) + len(self.lines)
503 Get the next line from the input buffer.
505 self.line_number += 1
506 if self.line_number > len(self.lines):
508 return self.lines[self.line_number - 1]
510 def readline_check_physical(self):
512 Check and return the next physical line. This method can be
513 used to feed tokenize.generate_tokens.
515 line = self.readline()
517 self.check_physical(line)
520 def run_check(self, check, argument_names):
525 for name in argument_names:
526 arguments.append(getattr(self, name))
527 return check(*arguments)
529 def check_physical(self, line):
531 Run all physical checks on a raw input line.
533 self.physical_line = line
534 if self.indent_char is None and len(line) and line[0] in ' \t':
535 self.indent_char = line[0]
536 for name, check, argument_names in self.physical_checks:
537 result = self.run_check(check, argument_names)
538 if result is not None:
539 offset, text = result
540 self.report_error(self.line_number, offset, text, check)
542 def build_tokens_line(self):
544 Build a logical line from tokens.
550 for token in self.tokens:
551 token_type, text = token[0:2]
552 if token_type in (tokenize.COMMENT, tokenize.NL,
553 tokenize.INDENT, tokenize.DEDENT,
556 if token_type == tokenize.STRING:
557 text = mute_string(text)
559 end_line, end = previous[3]
560 start_line, start = token[2]
561 if end_line != start_line: # different row
562 if self.lines[end_line - 1][end - 1] not in '{[(':
565 elif end != start: # different column
566 fill = self.lines[end_line - 1][end:start]
569 self.mapping.append((length, token))
573 self.logical_line = ''.join(logical)
574 assert self.logical_line.lstrip() == self.logical_line
575 assert self.logical_line.rstrip() == self.logical_line
577 def check_logical(self):
579 Build a line from tokens and run all logical checks on it.
581 options.counters['logical lines'] = \
582 options.counters.get('logical lines', 0) + 1
583 self.build_tokens_line()
584 first_line = self.lines[self.mapping[0][1][2][0] - 1]
585 indent = first_line[:self.mapping[0][1][2][1]]
586 self.previous_indent_level = self.indent_level
587 self.indent_level = expand_indent(indent)
588 if options.verbose >= 2:
589 print self.logical_line[:80].rstrip()
590 for name, check, argument_names in self.logical_checks:
591 if options.verbose >= 3:
593 result = self.run_check(check, argument_names)
594 if result is not None:
595 offset, text = result
596 if type(offset) is tuple:
597 original_number, original_offset = offset
599 for token_offset, token in self.mapping:
600 if offset >= token_offset:
601 original_number = token[2][0]
602 original_offset = (token[2][1]
603 + offset - token_offset)
604 self.report_error(original_number, original_offset,
606 self.previous_logical = self.logical_line
610 Run all checks on the input file.
614 self.indent_char = None
615 self.indent_level = 0
616 self.previous_logical = ''
620 for token in tokenize.generate_tokens(self.readline_check_physical):
621 # print tokenize.tok_name[token[0]], repr(token)
622 self.tokens.append(token)
623 token_type, text = token[0:2]
624 if token_type == tokenize.OP and text in '([{':
626 if token_type == tokenize.OP and text in '}])':
628 if token_type == tokenize.NEWLINE and not parens:
632 if token_type == tokenize.NL and not parens:
633 self.blank_lines += 1
635 if token_type == tokenize.COMMENT:
636 source_line = token[4]
637 token_start = token[2][1]
638 if source_line[:token_start].strip() == '':
640 return self.file_errors
642 def report_error(self, line_number, offset, text, check):
644 Report an error, according to options.
646 if options.quiet == 1 and not self.file_errors:
647 message(self.filename)
648 self.file_errors += 1
650 options.counters[code] = options.counters.get(code, 0) + 1
651 options.messages[code] = text[5:]
654 if options.testsuite:
655 base = os.path.basename(self.filename)[:4]
658 if base[0] == 'E' and code[0] == 'W':
660 if ignore_code(code):
662 if options.counters[code] == 1 or options.repeat:
663 message("%s:%s:%d: %s" %
664 (self.filename, line_number, offset + 1, text))
665 if options.show_source:
666 line = self.lines[line_number - 1]
667 message(line.rstrip())
668 message(' ' * offset + '^')
669 if options.show_pep8:
670 message(check.__doc__.lstrip('\n').rstrip())
673 def input_file(filename):
675 Run all checks on a Python source file.
677 if excluded(filename) or not filename_match(filename):
680 message('checking ' + filename)
681 options.counters['files'] = options.counters.get('files', 0) + 1
682 errors = Checker(filename).check_all()
683 if options.testsuite and not errors:
684 message("%s: %s" % (filename, "no errors found"))
688 def input_dir(dirname):
690 Check all Python source files in this directory and all subdirectories.
692 dirname = dirname.rstrip('/')
693 if excluded(dirname):
696 for root, dirs, files in os.walk(dirname):
698 message('directory ' + root)
699 options.counters['directories'] = \
700 options.counters.get('directories', 0) + 1
706 for filename in files:
707 errors += input_file(os.path.join(root, filename))
711 def excluded(filename):
713 Check if options.exclude contains a pattern that matches filename.
715 basename = os.path.basename(filename)
716 for pattern in options.exclude:
717 if fnmatch(basename, pattern):
718 # print basename, 'excluded because it matches', pattern
722 def filename_match(filename):
724 Check if options.filename contains a pattern that matches filename.
725 If options.filename is unspecified, this always returns True.
727 if not options.filename:
729 for pattern in options.filename:
730 if fnmatch(filename, pattern):
734 def ignore_code(code):
736 Check if options.ignore contains a prefix of the error code.
738 for ignore in options.ignore:
739 if code.startswith(ignore):
743 def get_error_statistics():
744 """Get error statistics."""
745 return get_statistics("E")
748 def get_warning_statistics():
749 """Get warning statistics."""
750 return get_statistics("W")
753 def get_statistics(prefix=''):
755 Get statistics for message codes that start with the prefix.
757 prefix='' matches all errors and warnings
758 prefix='E' matches all errors
759 prefix='W' matches all warnings
760 prefix='E4' matches all errors that have to do with imports
763 keys = options.messages.keys()
766 if key.startswith(prefix):
767 stats.append('%-7s %s %s' %
768 (options.counters[key], key, options.messages[key]))
772 def print_statistics(prefix=''):
773 """Print overall statistics (number of errors and warnings)."""
774 for line in get_statistics(prefix):
778 def print_benchmark(elapsed):
780 Print benchmark numbers.
782 print '%-7.2f %s' % (elapsed, 'seconds elapsed')
783 keys = ['directories', 'files',
784 'logical lines', 'physical lines']
786 if key in options.counters:
787 print '%-7d %s per second (%d total)' % (
788 options.counters[key] / elapsed, key,
789 options.counters[key])
792 def process_options(arglist=None):
794 Process options passed either via arglist or via command line args.
797 usage = "%prog [options] input ..."
798 parser = OptionParser(usage)
799 parser.add_option('-v', '--verbose', default=0, action='count',
800 help="print status messages, or debug with -vv")
801 parser.add_option('-q', '--quiet', default=0, action='count',
802 help="report only file names, or nothing with -qq")
803 parser.add_option('--exclude', metavar='patterns', default=default_exclude,
804 help="skip matches (default %s)" % default_exclude)
805 parser.add_option('--filename', metavar='patterns',
806 help="only check matching files (e.g. *.py)")
807 parser.add_option('--ignore', metavar='errors', default='',
808 help="skip errors and warnings (e.g. E4,W)")
809 parser.add_option('--repeat', action='store_true',
810 help="show all occurrences of the same error")
811 parser.add_option('--show-source', action='store_true',
812 help="show source code for each error")
813 parser.add_option('--show-pep8', action='store_true',
814 help="show text of PEP 8 for each error")
815 parser.add_option('--statistics', action='store_true',
816 help="count errors and warnings")
817 parser.add_option('--benchmark', action='store_true',
818 help="measure processing speed")
819 parser.add_option('--testsuite', metavar='dir',
820 help="run regression tests from dir")
821 parser.add_option('--doctest', action='store_true',
822 help="run doctest on myself")
823 options, args = parser.parse_args(arglist)
824 if options.testsuite:
825 args.append(options.testsuite)
827 parser.error('input not specified')
828 options.prog = os.path.basename(sys.argv[0])
829 options.exclude = options.exclude.split(',')
830 for index in range(len(options.exclude)):
831 options.exclude[index] = options.exclude[index].rstrip('/')
833 options.filename = options.filename.split(',')
835 options.ignore = options.ignore.split(',')
838 options.counters = {}
839 options.messages = {}
846 Parse options and run checks on Python source.
848 options, args = process_options()
851 return doctest.testmod()
852 start_time = time.time()
855 if os.path.isdir(path):
856 errors += input_dir(path)
858 errors += input_file(path)
859 elapsed = time.time() - start_time
860 if options.statistics:
862 if options.benchmark:
863 print_benchmark(elapsed)
866 if __name__ == '__main__':