misc/pep8.py

   1 #!/usr/bin/python
   2 # pep8.py - Check Python source code formatting, according to PEP 8
   3 # Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
   4 #
   5 # Permission is hereby granted, free of charge, to any person
   6 # obtaining a copy of this software and associated documentation files
   7 # (the "Software"), to deal in the Software without restriction,
   8 # including without limitation the rights to use, copy, modify, merge,
   9 # publish, distribute, sublicense, and/or sell copies of the Software,
  10 # and to permit persons to whom the Software is furnished to do so,
  11 # subject to the following conditions:
  12 #
  13 # The above copyright notice and this permission notice shall be
  14 # included in all copies or substantial portions of the Software.
  15 #
  16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24
  25 """
  26 Check Python source code formatting, according to PEP 8:
  27 http://www.python.org/dev/peps/pep-0008/
  28
  29 For usage and a list of options, try this:
  30 $ python pep8.py -h
  31
  32 This program and its regression test suite live here:
  33 http://svn.browsershots.org/trunk/devtools/pep8/
  34 http://trac.browsershots.org/browser/trunk/devtools/pep8/
  35
  36 Groups of errors and warnings:
  37 E errors
  38 W warnings
  39 100 indentation
  40 200 whitespace
  41 300 blank lines
  42 400 imports
  43 500 line length
  44 600 deprecation
  45 700 statements
  46
  47 You can add checks to this program by writing plugins. Each plugin is
  48 a simple function that is called for each line of source code, either
  49 physical or logical.
  50
  51 Physical line:
  52 - Raw line of text from the input file.
  53
  54 Logical line:
  55 - Multi-line statements converted to a single line.
  56 - Stripped left and right.
  57 - Contents of strings replaced with 'xxx' of same length.
  58 - Comments removed.
  59
  60 The check function requests physical or logical lines by the name of
  61 the first argument:
  62
  63 def maximum_line_length(physical_line)
  64 def extraneous_whitespace(logical_line)
  65 def blank_lines(logical_line, blank_lines, indent_level, line_number)
  66
  67 The last example above demonstrates how check plugins can request
  68 additional information with extra arguments. All attributes of the
  69 Checker object are available. Some examples:
  70
  71 lines: a list of the raw lines from the input file
  72 tokens: the tokens that contribute to this logical line
  73 line_number: line number in the input file
  74 blank_lines: blank lines before this one
  75 indent_char: first indentation character in this file (' ' or '\t')
  76 indent_level: indentation (with tabs expanded to multiples of 8)
  77 previous_indent_level: indentation on previous line
  78 previous_logical: previous logical line
  79
  80 The docstring of each check function shall be the relevant part of
  81 text from PEP 8. It is printed if the user enables --show-pep8.
  82
  83 """
  84
  85 import os
  86 import sys
  87 import re
  88 import time
  89 import inspect
  90 import tokenize
  91 from optparse import OptionParser
  92 from keyword import iskeyword
  93 from fnmatch import fnmatch
  94
  95 __version__ = '0.2.0'
  96 __revision__ = '$Rev$'
  97
  98 default_exclude = '.svn,CVS,*.pyc,*.pyo'
  99
 100 indent_match = re.compile(r'([ \t]*)').match
 101 raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
 102
 103 operators = """
 104 +  -  *  /  %  ^  &  |  =  <  >  >>  <<
 105 += -= *= /= %= ^= &= |= == <= >= >>= <<=
 106 != <> :
 107 in is or not and
 108 """.split()
 109
 110 options = None
 111 args = None
 112
 113
 114 ##############################################################################
 115 # Plugins (check functions) for physical lines
 116 ##############################################################################
 117
 118
 119 def tabs_or_spaces(physical_line, indent_char):
 120     """
 121     Never mix tabs and spaces.
 122
 123     The most popular way of indenting Python is with spaces only.  The
 124     second-most popular way is with tabs only.  Code indented with a mixture
 125     of tabs and spaces should be converted to using spaces exclusively.  When
 126     invoking the Python command line interpreter with the -t option, it issues
 127     warnings about code that illegally mixes tabs and spaces.  When using -tt
 128     these warnings become errors.  These options are highly recommended!
 129     """
 130     indent = indent_match(physical_line).group(1)
 131     for offset, char in enumerate(indent):
 132         if char != indent_char:
 133             return offset, "E101 indentation contains mixed spaces and tabs"
 134
 135
 136 def tabs_obsolete(physical_line):
 137     """
 138     For new projects, spaces-only are strongly recommended over tabs.  Most
 139     editors have features that make this easy to do.
 140     """
 141     indent = indent_match(physical_line).group(1)
 142     if indent.count('\t'):
 143         return indent.index('\t'), "W191 indentation contains tabs"
 144
 145
 146 def trailing_whitespace(physical_line):
 147     """
 148     JCR: Trailing whitespace is superfluous.
 149     """
 150     physical_line = physical_line.rstrip('\n') # chr(10), newline
 151     physical_line = physical_line.rstrip('\r') # chr(13), carriage return
 152     physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
 153     stripped = physical_line.rstrip()
 154     if physical_line != stripped:
 155         return len(stripped), "W291 trailing whitespace"
 156
 157
 158 def trailing_blank_lines(physical_line, lines, line_number):
 159     """
 160     JCR: Trailing blank lines are superfluous.
 161     """
 162     if physical_line.strip() == '' and line_number == len(lines):
 163         return 0, "W391 blank line at end of file"
 164
 165
 166 def missing_newline(physical_line):
 167     """
 168     JCR: The last line should have a newline.
 169     """
 170     if physical_line.rstrip() == physical_line:
 171         return len(physical_line), "W292 no newline at end of file"
 172
 173
 174 def maximum_line_length(physical_line):
 175     """
 176     Limit all lines to a maximum of 79 characters.
 177
 178     There are still many devices around that are limited to 80 character
 179     lines; plus, limiting windows to 80 characters makes it possible to have
 180     several windows side-by-side.  The default wrapping on such devices looks
 181     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
 182     For flowing long blocks of text (docstrings or comments), limiting the
 183     length to 72 characters is recommended.
 184     """
 185     length = len(physical_line.rstrip())
 186     if length > 79:
 187         return 79, "E501 line too long (%d characters)" % length
 188
 189
 190 ##############################################################################
 191 # Plugins (check functions) for logical lines
 192 ##############################################################################
 193
 194
 195 def blank_lines(logical_line, blank_lines, indent_level, line_number,
 196                 previous_logical):
 197     """
 198     Separate top-level function and class definitions with two blank lines.
 199
 200     Method definitions inside a class are separated by a single blank line.
 201
 202     Extra blank lines may be used (sparingly) to separate groups of related
 203     functions.  Blank lines may be omitted between a bunch of related
 204     one-liners (e.g. a set of dummy implementations).
 205
 206     Use blank lines in functions, sparingly, to indicate logical sections.
 207     """
 208     if line_number == 1:
 209         return # Don't expect blank lines before the first line
 210     if previous_logical.startswith('@'):
 211         return # Don't expect blank lines after function decorator
 212     if (logical_line.startswith('def ') or
 213         logical_line.startswith('class ') or
 214         logical_line.startswith('@')):
 215         if indent_level > 0 and blank_lines != 1:
 216             return 0, "E301 expected 1 blank line, found %d" % blank_lines
 217         if indent_level == 0 and blank_lines != 2:
 218             return 0, "E302 expected 2 blank lines, found %d" % blank_lines
 219     if blank_lines > 2:
 220         return 0, "E303 too many blank lines (%d)" % blank_lines
 221
 222
 223 def extraneous_whitespace(logical_line):
 224     """
 225     Avoid extraneous whitespace in the following situations:
 226
 227     - Immediately inside parentheses, brackets or braces.
 228
 229     - Immediately before a comma, semicolon, or colon.
 230     """
 231     line = logical_line
 232     for char in '([{':
 233         found = line.find(char + ' ')
 234         if found > -1:
 235             return found + 1, "E201 whitespace after '%s'" % char
 236     for char in '}])':
 237         found = line.find(' ' + char)
 238         if found > -1 and line[found - 1] != ',':
 239             return found, "E202 whitespace before '%s'" % char
 240     for char in ',;:':
 241         found = line.find(' ' + char)
 242         if found > -1:
 243             return found, "E203 whitespace before '%s'" % char
 244
 245
 246 def missing_whitespace(logical_line):
 247     """
 248     JCR: Each comma, semicolon or colon should be followed by whitespace.
 249     """
 250     line = logical_line
 251     for index in range(len(line) - 1):
 252         char = line[index]
 253         if char in ',;:' and line[index + 1] != ' ':
 254             before = line[:index]
 255             if char == ':' and before.count('[') > before.count(']'):
 256                 continue # Slice syntax, no space required
 257             return index, "E231 missing whitespace after '%s'" % char
 258
 259
 260 def indentation(logical_line, previous_logical, indent_char,
 261                 indent_level, previous_indent_level):
 262     """
 263     Use 4 spaces per indentation level.
 264
 265     For really old code that you don't want to mess up, you can continue to
 266     use 8-space tabs.
 267     """
 268     if indent_char == ' ' and indent_level % 4:
 269         return 0, "E111 indentation is not a multiple of four"
 270     indent_expect = previous_logical.endswith(':')
 271     if indent_expect and indent_level <= previous_indent_level:
 272         return 0, "E112 expected an indented block"
 273     if indent_level > previous_indent_level and not indent_expect:
 274         return 0, "E113 unexpected indentation"
 275
 276
 277 def whitespace_before_parameters(logical_line, tokens):
 278     """
 279     Avoid extraneous whitespace in the following situations:
 280
 281     - Immediately before the open parenthesis that starts the argument
 282       list of a function call.
 283
 284     - Immediately before the open parenthesis that starts an indexing or
 285       slicing.
 286     """
 287     prev_type = tokens[0][0]
 288     prev_text = tokens[0][1]
 289     prev_end = tokens[0][3]
 290     for index in range(1, len(tokens)):
 291         token_type, text, start, end, line = tokens[index]
 292         if (token_type == tokenize.OP and
 293             text in '([' and
 294             start != prev_end and
 295             prev_type == tokenize.NAME and
 296             (index < 2 or tokens[index - 2][1] != 'class') and
 297             (not iskeyword(prev_text))):
 298             return prev_end, "E211 whitespace before '%s'" % text
 299         prev_type = token_type
 300         prev_text = text
 301         prev_end = end
 302
 303
 304 def whitespace_around_operator(logical_line):
 305     """
 306     Avoid extraneous whitespace in the following situations:
 307
 308     - More than one space around an assignment (or other) operator to
 309       align it with another.
 310     """
 311     line = logical_line
 312     for operator in operators:
 313         found = line.find('  ' + operator)
 314         if found > -1:
 315             return found, "E221 multiple spaces before operator"
 316         found = line.find(operator + '  ')
 317         if found > -1:
 318             return found, "E222 multiple spaces after operator"
 319         found = line.find('\t' + operator)
 320         if found > -1:
 321             return found, "E223 tab before operator"
 322         found = line.find(operator + '\t')
 323         if found > -1:
 324             return found, "E224 tab after operator"
 325
 326
 327 def whitespace_around_comma(logical_line):
 328     """
 329     Avoid extraneous whitespace in the following situations:
 330
 331     - More than one space around an assignment (or other) operator to
 332       align it with another.
 333
 334     JCR: This should also be applied around comma etc.
 335     """
 336     line = logical_line
 337     for separator in ',;:':
 338         found = line.find(separator + '  ')
 339         if found > -1:
 340             return found + 1, "E241 multiple spaces after '%s'" % separator
 341         found = line.find(separator + '\t')
 342         if found > -1:
 343             return found + 1, "E242 tab after '%s'" % separator
 344
 345
 346 def imports_on_separate_lines(logical_line):
 347     """
 348     Imports should usually be on separate lines.
 349     """
 350     line = logical_line
 351     if line.startswith('import '):
 352         found = line.find(',')
 353         if found > -1:
 354             return found, "E401 multiple imports on one line"
 355
 356
 357 def compound_statements(logical_line):
 358     """
 359     Compound statements (multiple statements on the same line) are
 360     generally discouraged.
 361     """
 362     line = logical_line
 363     found = line.find(':')
 364     if -1 < found < len(line) - 1:
 365         before = line[:found]
 366         if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
 367             before.count('[') <= before.count(']') and # [1:2] (slice)
 368             not re.search(r'\blambda\b', before)):     # lambda x: x
 369             return found, "E701 multiple statements on one line (colon)"
 370     found = line.find(';')
 371     if -1 < found:
 372         return found, "E702 multiple statements on one line (semicolon)"
 373
 374
 375 def python_3000_has_key(logical_line):
 376     """
 377     The {}.has_key() method will be removed in the future version of
 378     Python. Use the 'in' operation instead, like:
 379     d = {"a": 1, "b": 2}
 380     if "b" in d:
 381         print d["b"]
 382     """
 383     pos = logical_line.find('.has_key(')
 384     if pos > -1:
 385         return pos, "W601 .has_key() is deprecated, use 'in'"
 386
 387
 388 def python_3000_raise_comma(logical_line):
 389     """
 390     When raising an exception, use "raise ValueError('message')"
 391     instead of the older form "raise ValueError, 'message'".
 392
 393     The paren-using form is preferred because when the exception arguments
 394     are long or include string formatting, you don't need to use line
 395     continuation characters thanks to the containing parentheses.  The older
 396     form will be removed in Python 3000.
 397     """
 398     match = raise_comma_match(logical_line)
 399     if match:
 400         return match.start(1), "W602 deprecated form of raising exception"
 401
 402
 403 ##############################################################################
 404 # Helper functions
 405 ##############################################################################
 406
 407
 408 def expand_indent(line):
 409     """
 410     Return the amount of indentation.
 411     Tabs are expanded to the next multiple of 8.
 412
 413     >>> expand_indent('    ')
 414     4
 415     >>> expand_indent('\\t')
 416     8
 417     >>> expand_indent('    \\t')
 418     8
 419     >>> expand_indent('       \\t')
 420     8
 421     >>> expand_indent('        \\t')
 422     16
 423     """
 424     result = 0
 425     for char in line:
 426         if char == '\t':
 427             result = result / 8 * 8 + 8
 428         elif char == ' ':
 429             result += 1
 430         else:
 431             break
 432     return result
 433
 434
 435 ##############################################################################
 436 # Framework to run all checks
 437 ##############################################################################
 438
 439
 440 def message(text):
 441     """Print a message."""
 442     # print >> sys.stderr, options.prog + ': ' + text
 443     # print >> sys.stderr, text
 444     print text
 445
 446
 447 def find_checks(argument_name):
 448     """
 449     Find all globally visible functions where the first argument name
 450     starts with argument_name.
 451     """
 452     checks = []
 453     function_type = type(find_checks)
 454     for name, function in globals().iteritems():
 455         if type(function) is function_type:
 456             args = inspect.getargspec(function)[0]
 457             if len(args) >= 1 and args[0].startswith(argument_name):
 458                 checks.append((name, function, args))
 459     checks.sort()
 460     return checks
 461
 462
 463 def mute_string(text):
 464     """
 465     Replace contents with 'xxx' to prevent syntax matching.
 466
 467     >>> mute_string('"abc"')
 468     '"xxx"'
 469     >>> mute_string("'''abc'''")
 470     "'''xxx'''"
 471     >>> mute_string("r'abc'")
 472     "r'xxx'"
 473     """
 474     start = 1
 475     end = len(text) - 1
 476     # String modifiers (e.g. u or r)
 477     if text.endswith('"'):
 478         start += text.index('"')
 479     elif text.endswith("'"):
 480         start += text.index("'")
 481     # Triple quotes
 482     if text.endswith('"""') or text.endswith("'''"):
 483         start += 2
 484         end -= 2
 485     return text[:start] + 'x' * (end - start) + text[end:]
 486
 487
 488 class Checker:
 489     """
 490     Load a Python source file, tokenize it, check coding style.
 491     """
 492
 493     def __init__(self, filename):
 494         self.filename = filename
 495         self.lines = file(filename).readlines()
 496         self.physical_checks = find_checks('physical_line')
 497         self.logical_checks = find_checks('logical_line')
 498         options.counters['physical lines'] = \
 499             options.counters.get('physical lines', 0) + len(self.lines)
 500
 501     def readline(self):
 502         """
 503         Get the next line from the input buffer.
 504         """
 505         self.line_number += 1
 506         if self.line_number > len(self.lines):
 507             return ''
 508         return self.lines[self.line_number - 1]
 509
 510     def readline_check_physical(self):
 511         """
 512         Check and return the next physical line. This method can be
 513         used to feed tokenize.generate_tokens.
 514         """
 515         line = self.readline()
 516         if line:
 517             self.check_physical(line)
 518         return line
 519
 520     def run_check(self, check, argument_names):
 521         """
 522         Run a check plugin.
 523         """
 524         arguments = []
 525         for name in argument_names:
 526             arguments.append(getattr(self, name))
 527         return check(*arguments)
 528
 529     def check_physical(self, line):
 530         """
 531         Run all physical checks on a raw input line.
 532         """
 533         self.physical_line = line
 534         if self.indent_char is None and len(line) and line[0] in ' \t':
 535             self.indent_char = line[0]
 536         for name, check, argument_names in self.physical_checks:
 537             result = self.run_check(check, argument_names)
 538             if result is not None:
 539                 offset, text = result
 540                 self.report_error(self.line_number, offset, text, check)
 541
 542     def build_tokens_line(self):
 543         """
 544         Build a logical line from tokens.
 545         """
 546         self.mapping = []
 547         logical = []
 548         length = 0
 549         previous = None
 550         for token in self.tokens:
 551             token_type, text = token[0:2]
 552             if token_type in (tokenize.COMMENT, tokenize.NL,
 553                               tokenize.INDENT, tokenize.DEDENT,
 554                               tokenize.NEWLINE):
 555                 continue
 556             if token_type == tokenize.STRING:
 557                 text = mute_string(text)
 558             if previous:
 559                 end_line, end = previous[3]
 560                 start_line, start = token[2]
 561                 if end_line != start_line: # different row
 562                     if self.lines[end_line - 1][end - 1] not in '{[(':
 563                         logical.append(' ')
 564                         length += 1
 565                 elif end != start: # different column
 566                     fill = self.lines[end_line - 1][end:start]
 567                     logical.append(fill)
 568                     length += len(fill)
 569             self.mapping.append((length, token))
 570             logical.append(text)
 571             length += len(text)
 572             previous = token
 573         self.logical_line = ''.join(logical)
 574         assert self.logical_line.lstrip() == self.logical_line
 575         assert self.logical_line.rstrip() == self.logical_line
 576
 577     def check_logical(self):
 578         """
 579         Build a line from tokens and run all logical checks on it.
 580         """
 581         options.counters['logical lines'] = \
 582             options.counters.get('logical lines', 0) + 1
 583         self.build_tokens_line()
 584         first_line = self.lines[self.mapping[0][1][2][0] - 1]
 585         indent = first_line[:self.mapping[0][1][2][1]]
 586         self.previous_indent_level = self.indent_level
 587         self.indent_level = expand_indent(indent)
 588         if options.verbose >= 2:
 589             print self.logical_line[:80].rstrip()
 590         for name, check, argument_names in self.logical_checks:
 591             if options.verbose >= 3:
 592                 print '   ', name
 593             result = self.run_check(check, argument_names)
 594             if result is not None:
 595                 offset, text = result
 596                 if type(offset) is tuple:
 597                     original_number, original_offset = offset
 598                 else:
 599                     for token_offset, token in self.mapping:
 600                         if offset >= token_offset:
 601                             original_number = token[2][0]
 602                             original_offset = (token[2][1]
 603                                                + offset - token_offset)
 604                 self.report_error(original_number, original_offset,
 605                                   text, check)
 606         self.previous_logical = self.logical_line
 607
 608     def check_all(self):
 609         """
 610         Run all checks on the input file.
 611         """
 612         self.file_errors = 0
 613         self.line_number = 0
 614         self.indent_char = None
 615         self.indent_level = 0
 616         self.previous_logical = ''
 617         self.blank_lines = 0
 618         self.tokens = []
 619         parens = 0
 620         for token in tokenize.generate_tokens(self.readline_check_physical):
 621             # print tokenize.tok_name[token[0]], repr(token)
 622             self.tokens.append(token)
 623             token_type, text = token[0:2]
 624             if token_type == tokenize.OP and text in '([{':
 625                 parens += 1
 626             if token_type == tokenize.OP and text in '}])':
 627                 parens -= 1
 628             if token_type == tokenize.NEWLINE and not parens:
 629                 self.check_logical()
 630                 self.blank_lines = 0
 631                 self.tokens = []
 632             if token_type == tokenize.NL and not parens:
 633                 self.blank_lines += 1
 634                 self.tokens = []
 635             if token_type == tokenize.COMMENT:
 636                 source_line = token[4]
 637                 token_start = token[2][1]
 638                 if source_line[:token_start].strip() == '':
 639                     self.blank_lines = 0
 640         return self.file_errors
 641
 642     def report_error(self, line_number, offset, text, check):
 643         """
 644         Report an error, according to options.
 645         """
 646         if options.quiet == 1 and not self.file_errors:
 647             message(self.filename)
 648         self.file_errors += 1
 649         code = text[:4]
 650         options.counters[code] = options.counters.get(code, 0) + 1
 651         options.messages[code] = text[5:]
 652         if options.quiet:
 653             return
 654         if options.testsuite:
 655             base = os.path.basename(self.filename)[:4]
 656             if base == code:
 657                 return
 658             if base[0] == 'E' and code[0] == 'W':
 659                 return
 660         if ignore_code(code):
 661             return
 662         if options.counters[code] == 1 or options.repeat:
 663             message("%s:%s:%d: %s" %
 664                     (self.filename, line_number, offset + 1, text))
 665             if options.show_source:
 666                 line = self.lines[line_number - 1]
 667                 message(line.rstrip())
 668                 message(' ' * offset + '^')
 669             if options.show_pep8:
 670                 message(check.__doc__.lstrip('\n').rstrip())
 671
 672
 673 def input_file(filename):
 674     """
 675     Run all checks on a Python source file.
 676     """
 677     if excluded(filename) or not filename_match(filename):
 678         return {}
 679     if options.verbose:
 680         message('checking ' + filename)
 681     options.counters['files'] = options.counters.get('files', 0) + 1
 682     errors = Checker(filename).check_all()
 683     if options.testsuite and not errors:
 684         message("%s: %s" % (filename, "no errors found"))
 685     return errors
 686
 687
 688 def input_dir(dirname):
 689     """
 690     Check all Python source files in this directory and all subdirectories.
 691     """
 692     dirname = dirname.rstrip('/')
 693     if excluded(dirname):
 694         return 0
 695     errors = 0
 696     for root, dirs, files in os.walk(dirname):
 697         if options.verbose:
 698             message('directory ' + root)
 699         options.counters['directories'] = \
 700             options.counters.get('directories', 0) + 1
 701         dirs.sort()
 702         for subdir in dirs:
 703             if excluded(subdir):
 704                 dirs.remove(subdir)
 705         files.sort()
 706         for filename in files:
 707             errors += input_file(os.path.join(root, filename))
 708     return errors
 709
 710
 711 def excluded(filename):
 712     """
 713     Check if options.exclude contains a pattern that matches filename.
 714     """
 715     basename = os.path.basename(filename)
 716     for pattern in options.exclude:
 717         if fnmatch(basename, pattern):
 718             # print basename, 'excluded because it matches', pattern
 719             return True
 720
 721
 722 def filename_match(filename):
 723     """
 724     Check if options.filename contains a pattern that matches filename.
 725     If options.filename is unspecified, this always returns True.
 726     """
 727     if not options.filename:
 728         return True
 729     for pattern in options.filename:
 730         if fnmatch(filename, pattern):
 731             return True
 732
 733
 734 def ignore_code(code):
 735     """
 736     Check if options.ignore contains a prefix of the error code.
 737     """
 738     for ignore in options.ignore:
 739         if code.startswith(ignore):
 740             return True
 741
 742
 743 def get_error_statistics():
 744     """Get error statistics."""
 745     return get_statistics("E")
 746
 747
 748 def get_warning_statistics():
 749     """Get warning statistics."""
 750     return get_statistics("W")
 751
 752
 753 def get_statistics(prefix=''):
 754     """
 755     Get statistics for message codes that start with the prefix.
 756
 757     prefix='' matches all errors and warnings
 758     prefix='E' matches all errors
 759     prefix='W' matches all warnings
 760     prefix='E4' matches all errors that have to do with imports
 761     """
 762     stats = []
 763     keys = options.messages.keys()
 764     keys.sort()
 765     for key in keys:
 766         if key.startswith(prefix):
 767             stats.append('%-7s %s %s' %
 768                          (options.counters[key], key, options.messages[key]))
 769     return stats
 770
 771
 772 def print_statistics(prefix=''):
 773     """Print overall statistics (number of errors and warnings)."""
 774     for line in get_statistics(prefix):
 775         print line
 776
 777
 778 def print_benchmark(elapsed):
 779     """
 780     Print benchmark numbers.
 781     """
 782     print '%-7.2f %s' % (elapsed, 'seconds elapsed')
 783     keys = ['directories', 'files',
 784             'logical lines', 'physical lines']
 785     for key in keys:
 786         if key in options.counters:
 787             print '%-7d %s per second (%d total)' % (
 788                 options.counters[key] / elapsed, key,
 789                 options.counters[key])
 790
 791
 792 def process_options(arglist=None):
 793     """
 794     Process options passed either via arglist or via command line args.
 795     """
 796     global options, args
 797     usage = "%prog [options] input ..."
 798     parser = OptionParser(usage)
 799     parser.add_option('-v', '--verbose', default=0, action='count',
 800                       help="print status messages, or debug with -vv")
 801     parser.add_option('-q', '--quiet', default=0, action='count',
 802                       help="report only file names, or nothing with -qq")
 803     parser.add_option('--exclude', metavar='patterns', default=default_exclude,
 804                       help="skip matches (default %s)" % default_exclude)
 805     parser.add_option('--filename', metavar='patterns',
 806                       help="only check matching files (e.g. *.py)")
 807     parser.add_option('--ignore', metavar='errors', default='',
 808                       help="skip errors and warnings (e.g. E4,W)")
 809     parser.add_option('--repeat', action='store_true',
 810                       help="show all occurrences of the same error")
 811     parser.add_option('--show-source', action='store_true',
 812                       help="show source code for each error")
 813     parser.add_option('--show-pep8', action='store_true',
 814                       help="show text of PEP 8 for each error")
 815     parser.add_option('--statistics', action='store_true',
 816                       help="count errors and warnings")
 817     parser.add_option('--benchmark', action='store_true',
 818                       help="measure processing speed")
 819     parser.add_option('--testsuite', metavar='dir',
 820                       help="run regression tests from dir")
 821     parser.add_option('--doctest', action='store_true',
 822                       help="run doctest on myself")
 823     options, args = parser.parse_args(arglist)
 824     if options.testsuite:
 825         args.append(options.testsuite)
 826     if len(args) == 0:
 827         parser.error('input not specified')
 828     options.prog = os.path.basename(sys.argv[0])
 829     options.exclude = options.exclude.split(',')
 830     for index in range(len(options.exclude)):
 831         options.exclude[index] = options.exclude[index].rstrip('/')
 832     if options.filename:
 833         options.filename = options.filename.split(',')
 834     if options.ignore:
 835         options.ignore = options.ignore.split(',')
 836     else:
 837         options.ignore = []
 838     options.counters = {}
 839     options.messages = {}
 840
 841     return options, args
 842
 843
 844 def _main():
 845     """
 846     Parse options and run checks on Python source.
 847     """
 848     options, args = process_options()
 849     if options.doctest:
 850         import doctest
 851         return doctest.testmod()
 852     start_time = time.time()
 853     errors = 0
 854     for path in args:
 855         if os.path.isdir(path):
 856             errors += input_dir(path)
 857         else:
 858             errors += input_file(path)
 859     elapsed = time.time() - start_time
 860     if options.statistics:
 861         print_statistics()
 862     if options.benchmark:
 863         print_benchmark(elapsed)
 864     return errors > 0
 865
 866 if __name__ == '__main__':
 867     sys.exit(_main())