src/third_party/WebKit/Tools/Scripts/webkitpy/thirdparty/autopep8.py

   1 # Copyright (C) 2010-2011 Hideo Hattori
   2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
   3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining
   6 # a copy of this software and associated documentation files (the
   7 # "Software"), to deal in the Software without restriction, including
   8 # without limitation the rights to use, copy, modify, merge, publish,
   9 # distribute, sublicense, and/or sell copies of the Software, and to
  10 # permit persons to whom the Software is furnished to do so, subject to
  11 # the following conditions:
  12 #
  13 # The above copyright notice and this permission notice shall be
  14 # included in all copies or substantial portions of the Software.
  15 #
  16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24
  25 """Automatically formats Python code to conform to the PEP 8 style guide.
  26
  27 Fixes that only need be done once can be added by adding a function of the form
  28 "fix_<code>(source)" to this module. They should return the fixed source code.
  29 These fixes are picked up by apply_global_fixes().
  30
  31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
  32 documentation for more information.
  33
  34 """
  35
  36 from __future__ import absolute_import
  37 from __future__ import division
  38 from __future__ import print_function
  39 from __future__ import unicode_literals
  40
  41 import bisect
  42 import codecs
  43 import collections
  44 import copy
  45 import difflib
  46 import fnmatch
  47 import inspect
  48 import io
  49 import itertools
  50 import keyword
  51 import locale
  52 import os
  53 import re
  54 import signal
  55 import sys
  56 import token
  57 import tokenize
  58
  59 import pep8
  60
  61
  62 try:
  63     unicode
  64 except NameError:
  65     unicode = str
  66
  67
  68 __version__ = '1.0.3'
  69
  70
  71 CR = '\r'
  72 LF = '\n'
  73 CRLF = '\r\n'
  74
  75
  76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
  77
  78
  79 # For generating line shortening candidates.
  80 SHORTEN_OPERATOR_GROUPS = frozenset([
  81     frozenset([',']),
  82     frozenset(['%']),
  83     frozenset([',', '(', '[', '{']),
  84     frozenset(['%', '(', '[', '{']),
  85     frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
  86     frozenset(['%', '+', '-', '*', '/', '//']),
  87 ])
  88
  89
  90 DEFAULT_IGNORE = 'E24'
  91 DEFAULT_INDENT_SIZE = 4
  92
  93
  94 # W602 is handled separately due to the need to avoid "with_traceback".
  95 CODE_TO_2TO3 = {
  96     'E721': ['idioms'],
  97     'W601': ['has_key'],
  98     'W603': ['ne'],
  99     'W604': ['repr'],
 100     'W690': ['apply',
 101              'except',
 102              'exitfunc',
 103              'import',
 104              'numliterals',
 105              'operator',
 106              'paren',
 107              'reduce',
 108              'renames',
 109              'standarderror',
 110              'sys_exc',
 111              'throw',
 112              'tuple_params',
 113              'xreadlines']}
 114
 115
 116 def open_with_encoding(filename, encoding=None, mode='r'):
 117     """Return opened file with a specific encoding."""
 118     if not encoding:
 119         encoding = detect_encoding(filename)
 120
 121     return io.open(filename, mode=mode, encoding=encoding,
 122                    newline='')  # Preserve line endings
 123
 124
 125 def detect_encoding(filename):
 126     """Return file encoding."""
 127     try:
 128         with open(filename, 'rb') as input_file:
 129             from lib2to3.pgen2 import tokenize as lib2to3_tokenize
 130             encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
 131
 132         # Check for correctness of encoding
 133         with open_with_encoding(filename, encoding) as test_file:
 134             test_file.read()
 135
 136         return encoding
 137     except (LookupError, SyntaxError, UnicodeDecodeError):
 138         return 'latin-1'
 139
 140
 141 def readlines_from_file(filename):
 142     """Return contents of file."""
 143     with open_with_encoding(filename) as input_file:
 144         return input_file.readlines()
 145
 146
 147 def extended_blank_lines(logical_line,
 148                          blank_lines,
 149                          indent_level,
 150                          previous_logical):
 151     """Check for missing blank lines after class declaration."""
 152     if previous_logical.startswith('class '):
 153         if (
 154             logical_line.startswith(('def ', 'class ', '@')) or
 155             pep8.DOCSTRING_REGEX.match(logical_line)
 156         ):
 157             if indent_level and not blank_lines:
 158                 yield (0, 'E309 expected 1 blank line after class declaration')
 159     elif previous_logical.startswith('def '):
 160         if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
 161             yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
 162     elif pep8.DOCSTRING_REGEX.match(previous_logical):
 163         # Missing blank line between class docstring and method declaration.
 164         if (
 165             indent_level and
 166             not blank_lines and
 167             logical_line.startswith(('def ')) and
 168             '(self' in logical_line
 169         ):
 170             yield (0, 'E301 expected 1 blank line, found 0')
 171 pep8.register_check(extended_blank_lines)
 172
 173
 174 def continued_indentation(logical_line, tokens, indent_level, indent_char,
 175                           noqa):
 176     """Override pep8's function to provide indentation information."""
 177     first_row = tokens[0][2][0]
 178     nrows = 1 + tokens[-1][2][0] - first_row
 179     if noqa or nrows == 1:
 180         return
 181
 182     # indent_next tells us whether the next block is indented. Assuming
 183     # that it is indented by 4 spaces, then we should not allow 4-space
 184     # indents on the final continuation line. In turn, some other
 185     # indents are allowed to have an extra 4 spaces.
 186     indent_next = logical_line.endswith(':')
 187
 188     row = depth = 0
 189     valid_hangs = (
 190         (DEFAULT_INDENT_SIZE,)
 191         if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
 192                                      2 * DEFAULT_INDENT_SIZE)
 193     )
 194
 195     # Remember how many brackets were opened on each line.
 196     parens = [0] * nrows
 197
 198     # Relative indents of physical lines.
 199     rel_indent = [0] * nrows
 200
 201     # For each depth, collect a list of opening rows.
 202     open_rows = [[0]]
 203     # For each depth, memorize the hanging indentation.
 204     hangs = [None]
 205
 206     # Visual indents.
 207     indent_chances = {}
 208     last_indent = tokens[0][2]
 209     indent = [last_indent[1]]
 210
 211     last_token_multiline = None
 212     line = None
 213     last_line = ''
 214     last_line_begins_with_multiline = False
 215     for token_type, text, start, end, line in tokens:
 216
 217         newline = row < start[0] - first_row
 218         if newline:
 219             row = start[0] - first_row
 220             newline = (not last_token_multiline and
 221                        token_type not in (tokenize.NL, tokenize.NEWLINE))
 222             last_line_begins_with_multiline = last_token_multiline
 223
 224         if newline:
 225             # This is the beginning of a continuation line.
 226             last_indent = start
 227
 228             # Record the initial indent.
 229             rel_indent[row] = pep8.expand_indent(line) - indent_level
 230
 231             # Identify closing bracket.
 232             close_bracket = (token_type == tokenize.OP and text in ']})')
 233
 234             # Is the indent relative to an opening bracket line?
 235             for open_row in reversed(open_rows[depth]):
 236                 hang = rel_indent[row] - rel_indent[open_row]
 237                 hanging_indent = hang in valid_hangs
 238                 if hanging_indent:
 239                     break
 240             if hangs[depth]:
 241                 hanging_indent = (hang == hangs[depth])
 242
 243             visual_indent = (not close_bracket and hang > 0 and
 244                              indent_chances.get(start[1]))
 245
 246             if close_bracket and indent[depth]:
 247                 # Closing bracket for visual indent.
 248                 if start[1] != indent[depth]:
 249                     yield (start, 'E124 {0}'.format(indent[depth]))
 250             elif close_bracket and not hang:
 251                 pass
 252             elif indent[depth] and start[1] < indent[depth]:
 253                 # Visual indent is broken.
 254                 yield (start, 'E128 {0}'.format(indent[depth]))
 255             elif (hanging_indent or
 256                   (indent_next and
 257                    rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
 258                 # Hanging indent is verified.
 259                 if close_bracket:
 260                     yield (start, 'E123 {0}'.format(indent_level +
 261                                                     rel_indent[open_row]))
 262                 hangs[depth] = hang
 263             elif visual_indent is True:
 264                 # Visual indent is verified.
 265                 indent[depth] = start[1]
 266             elif visual_indent in (text, unicode):
 267                 # Ignore token lined up with matching one from a previous line.
 268                 pass
 269             else:
 270                 one_indented = (indent_level + rel_indent[open_row] +
 271                                 DEFAULT_INDENT_SIZE)
 272                 # Indent is broken.
 273                 if hang <= 0:
 274                     error = ('E122', one_indented)
 275                 elif indent[depth]:
 276                     error = ('E127', indent[depth])
 277                 elif hang > DEFAULT_INDENT_SIZE:
 278                     error = ('E126', one_indented)
 279                 else:
 280                     hangs[depth] = hang
 281                     error = ('E121', one_indented)
 282
 283                 yield (start, '{0} {1}'.format(*error))
 284
 285         # Look for visual indenting.
 286         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
 287                 and not indent[depth]):
 288             indent[depth] = start[1]
 289             indent_chances[start[1]] = True
 290         # Deal with implicit string concatenation.
 291         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
 292               text in ('u', 'ur', 'b', 'br')):
 293             indent_chances[start[1]] = unicode
 294         # Special case for the "if" statement because len("if (") is equal to
 295         # 4.
 296         elif not indent_chances and not row and not depth and text == 'if':
 297             indent_chances[end[1] + 1] = True
 298         elif text == ':' and line[end[1]:].isspace():
 299             open_rows[depth].append(row)
 300
 301         # Keep track of bracket depth.
 302         if token_type == tokenize.OP:
 303             if text in '([{':
 304                 depth += 1
 305                 indent.append(0)
 306                 hangs.append(None)
 307                 if len(open_rows) == depth:
 308                     open_rows.append([])
 309                 open_rows[depth].append(row)
 310                 parens[row] += 1
 311             elif text in ')]}' and depth > 0:
 312                 # Parent indents should not be more than this one.
 313                 prev_indent = indent.pop() or last_indent[1]
 314                 hangs.pop()
 315                 for d in range(depth):
 316                     if indent[d] > prev_indent:
 317                         indent[d] = 0
 318                 for ind in list(indent_chances):
 319                     if ind >= prev_indent:
 320                         del indent_chances[ind]
 321                 del open_rows[depth + 1:]
 322                 depth -= 1
 323                 if depth:
 324                     indent_chances[indent[depth]] = True
 325                 for idx in range(row, -1, -1):
 326                     if parens[idx]:
 327                         parens[idx] -= 1
 328                         break
 329             assert len(indent) == depth + 1
 330             if (
 331                 start[1] not in indent_chances and
 332                 # This is for purposes of speeding up E121 (GitHub #90).
 333                 not last_line.rstrip().endswith(',')
 334             ):
 335                 # Allow to line up tokens.
 336                 indent_chances[start[1]] = text
 337
 338         last_token_multiline = (start[0] != end[0])
 339         if last_token_multiline:
 340             rel_indent[end[0] - first_row] = rel_indent[row]
 341
 342         last_line = line
 343
 344     if (
 345         indent_next and
 346         not last_line_begins_with_multiline and
 347         pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
 348     ):
 349         pos = (start[0], indent[0] + 4)
 350         yield (pos, 'E125 {0}'.format(indent_level +
 351                                       2 * DEFAULT_INDENT_SIZE))
 352 del pep8._checks['logical_line'][pep8.continued_indentation]
 353 pep8.register_check(continued_indentation)
 354
 355
 356 class FixPEP8(object):
 357
 358     """Fix invalid code.
 359
 360     Fixer methods are prefixed "fix_". The _fix_source() method looks for these
 361     automatically.
 362
 363     The fixer method can take either one or two arguments (in addition to
 364     self). The first argument is "result", which is the error information from
 365     pep8. The second argument, "logical", is required only for logical-line
 366     fixes.
 367
 368     The fixer method can return the list of modified lines or None. An empty
 369     list would mean that no changes were made. None would mean that only the
 370     line reported in the pep8 error was modified. Note that the modified line
 371     numbers that are returned are indexed at 1. This typically would correspond
 372     with the line number reported in the pep8 error information.
 373
 374     [fixed method list]
 375         - e121,e122,e123,e124,e125,e126,e127,e128,e129
 376         - e201,e202,e203
 377         - e211
 378         - e221,e222,e223,e224,e225
 379         - e231
 380         - e251
 381         - e261,e262
 382         - e271,e272,e273,e274
 383         - e301,e302,e303
 384         - e401
 385         - e502
 386         - e701,e702
 387         - e711
 388         - w291
 389
 390     """
 391
 392     def __init__(self, filename,
 393                  options,
 394                  contents=None,
 395                  long_line_ignore_cache=None):
 396         self.filename = filename
 397         if contents is None:
 398             self.source = readlines_from_file(filename)
 399         else:
 400             sio = io.StringIO(contents)
 401             self.source = sio.readlines()
 402         self.options = options
 403         self.indent_word = _get_indentword(''.join(self.source))
 404
 405         self.long_line_ignore_cache = (
 406             set() if long_line_ignore_cache is None
 407             else long_line_ignore_cache)
 408
 409         # Many fixers are the same even though pep8 categorizes them
 410         # differently.
 411         self.fix_e115 = self.fix_e112
 412         self.fix_e116 = self.fix_e113
 413         self.fix_e121 = self._fix_reindent
 414         self.fix_e122 = self._fix_reindent
 415         self.fix_e123 = self._fix_reindent
 416         self.fix_e124 = self._fix_reindent
 417         self.fix_e126 = self._fix_reindent
 418         self.fix_e127 = self._fix_reindent
 419         self.fix_e128 = self._fix_reindent
 420         self.fix_e129 = self._fix_reindent
 421         self.fix_e202 = self.fix_e201
 422         self.fix_e203 = self.fix_e201
 423         self.fix_e211 = self.fix_e201
 424         self.fix_e221 = self.fix_e271
 425         self.fix_e222 = self.fix_e271
 426         self.fix_e223 = self.fix_e271
 427         self.fix_e226 = self.fix_e225
 428         self.fix_e227 = self.fix_e225
 429         self.fix_e228 = self.fix_e225
 430         self.fix_e241 = self.fix_e271
 431         self.fix_e242 = self.fix_e224
 432         self.fix_e261 = self.fix_e262
 433         self.fix_e272 = self.fix_e271
 434         self.fix_e273 = self.fix_e271
 435         self.fix_e274 = self.fix_e271
 436         self.fix_e309 = self.fix_e301
 437         self.fix_e501 = (
 438             self.fix_long_line_logically if
 439             options and (options.aggressive >= 2 or options.experimental) else
 440             self.fix_long_line_physically)
 441         self.fix_e703 = self.fix_e702
 442
 443         self._ws_comma_done = False
 444
 445     def _fix_source(self, results):
 446         try:
 447             (logical_start, logical_end) = _find_logical(self.source)
 448             logical_support = True
 449         except (SyntaxError, tokenize.TokenError):  # pragma: no cover
 450             logical_support = False
 451
 452         completed_lines = set()
 453         for result in sorted(results, key=_priority_key):
 454             if result['line'] in completed_lines:
 455                 continue
 456
 457             fixed_methodname = 'fix_' + result['id'].lower()
 458             if hasattr(self, fixed_methodname):
 459                 fix = getattr(self, fixed_methodname)
 460
 461                 line_index = result['line'] - 1
 462                 original_line = self.source[line_index]
 463
 464                 is_logical_fix = len(inspect.getargspec(fix).args) > 2
 465                 if is_logical_fix:
 466                     logical = None
 467                     if logical_support:
 468                         logical = _get_logical(self.source,
 469                                                result,
 470                                                logical_start,
 471                                                logical_end)
 472                         if logical and set(range(
 473                             logical[0][0] + 1,
 474                             logical[1][0] + 1)).intersection(
 475                                 completed_lines):
 476                             continue
 477
 478                     modified_lines = fix(result, logical)
 479                 else:
 480                     modified_lines = fix(result)
 481
 482                 if modified_lines is None:
 483                     # Force logical fixes to report what they modified.
 484                     assert not is_logical_fix
 485
 486                     if self.source[line_index] == original_line:
 487                         modified_lines = []
 488
 489                 if modified_lines:
 490                     completed_lines.update(modified_lines)
 491                 elif modified_lines == []:  # Empty list means no fix
 492                     if self.options.verbose >= 2:
 493                         print(
 494                             '--->  Not fixing {f} on line {l}'.format(
 495                                 f=result['id'], l=result['line']),
 496                             file=sys.stderr)
 497                 else:  # We assume one-line fix when None.
 498                     completed_lines.add(result['line'])
 499             else:
 500                 if self.options.verbose >= 3:
 501                     print(
 502                         "--->  '{0}' is not defined.".format(fixed_methodname),
 503                         file=sys.stderr)
 504
 505                     info = result['info'].strip()
 506                     print('--->  {0}:{1}:{2}:{3}'.format(self.filename,
 507                                                          result['line'],
 508                                                          result['column'],
 509                                                          info),
 510                           file=sys.stderr)
 511
 512     def fix(self):
 513         """Return a version of the source code with PEP 8 violations fixed."""
 514         pep8_options = {
 515             'ignore': self.options.ignore,
 516             'select': self.options.select,
 517             'max_line_length': self.options.max_line_length,
 518         }
 519         results = _execute_pep8(pep8_options, self.source)
 520
 521         if self.options.verbose:
 522             progress = {}
 523             for r in results:
 524                 if r['id'] not in progress:
 525                     progress[r['id']] = set()
 526                 progress[r['id']].add(r['line'])
 527             print('--->  {n} issue(s) to fix {progress}'.format(
 528                 n=len(results), progress=progress), file=sys.stderr)
 529
 530         if self.options.line_range:
 531             start, end = self.options.line_range
 532             results = [r for r in results
 533                        if start <= r['line'] <= end]
 534
 535         self._fix_source(filter_results(source=''.join(self.source),
 536                                         results=results,
 537                                         aggressive=self.options.aggressive))
 538
 539         if self.options.line_range:
 540             # If number of lines has changed then change line_range.
 541             count = sum(sline.count('\n')
 542                         for sline in self.source[start - 1:end])
 543             self.options.line_range[1] = start + count - 1
 544
 545         return ''.join(self.source)
 546
 547     def _fix_reindent(self, result):
 548         """Fix a badly indented line.
 549
 550         This is done by adding or removing from its initial indent only.
 551
 552         """
 553         num_indent_spaces = int(result['info'].split()[1])
 554         line_index = result['line'] - 1
 555         target = self.source[line_index]
 556
 557         self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
 558
 559     def fix_e112(self, result):
 560         """Fix under-indented comments."""
 561         line_index = result['line'] - 1
 562         target = self.source[line_index]
 563
 564         if not target.lstrip().startswith('#'):
 565             # Don't screw with invalid syntax.
 566             return []
 567
 568         self.source[line_index] = self.indent_word + target
 569
 570     def fix_e113(self, result):
 571         """Fix over-indented comments."""
 572         line_index = result['line'] - 1
 573         target = self.source[line_index]
 574
 575         indent = _get_indentation(target)
 576         stripped = target.lstrip()
 577
 578         if not stripped.startswith('#'):
 579             # Don't screw with invalid syntax.
 580             return []
 581
 582         self.source[line_index] = indent[1:] + stripped
 583
 584     def fix_e125(self, result):
 585         """Fix indentation undistinguish from the next logical line."""
 586         num_indent_spaces = int(result['info'].split()[1])
 587         line_index = result['line'] - 1
 588         target = self.source[line_index]
 589
 590         spaces_to_add = num_indent_spaces - len(_get_indentation(target))
 591         indent = len(_get_indentation(target))
 592         modified_lines = []
 593
 594         while len(_get_indentation(self.source[line_index])) >= indent:
 595             self.source[line_index] = (' ' * spaces_to_add +
 596                                        self.source[line_index])
 597             modified_lines.append(1 + line_index)  # Line indexed at 1.
 598             line_index -= 1
 599
 600         return modified_lines
 601
 602     def fix_e201(self, result):
 603         """Remove extraneous whitespace."""
 604         line_index = result['line'] - 1
 605         target = self.source[line_index]
 606         offset = result['column'] - 1
 607
 608         if is_probably_part_of_multiline(target):
 609             return []
 610
 611         fixed = fix_whitespace(target,
 612                                offset=offset,
 613                                replacement='')
 614
 615         self.source[line_index] = fixed
 616
 617     def fix_e224(self, result):
 618         """Remove extraneous whitespace around operator."""
 619         target = self.source[result['line'] - 1]
 620         offset = result['column'] - 1
 621         fixed = target[:offset] + target[offset:].replace('\t', ' ')
 622         self.source[result['line'] - 1] = fixed
 623
 624     def fix_e225(self, result):
 625         """Fix missing whitespace around operator."""
 626         target = self.source[result['line'] - 1]
 627         offset = result['column'] - 1
 628         fixed = target[:offset] + ' ' + target[offset:]
 629
 630         # Only proceed if non-whitespace characters match.
 631         # And make sure we don't break the indentation.
 632         if (
 633             fixed.replace(' ', '') == target.replace(' ', '') and
 634             _get_indentation(fixed) == _get_indentation(target)
 635         ):
 636             self.source[result['line'] - 1] = fixed
 637         else:
 638             return []
 639
 640     def fix_e231(self, result):
 641         """Add missing whitespace."""
 642         # Optimize for comma case. This will fix all commas in the full source
 643         # code in one pass. Don't do this more than once. If it fails the first
 644         # time, there is no point in trying again.
 645         if ',' in result['info'] and not self._ws_comma_done:
 646             self._ws_comma_done = True
 647             original = ''.join(self.source)
 648             new = refactor(original, ['ws_comma'])
 649             if original.strip() != new.strip():
 650                 self.source = [new]
 651                 return range(1, 1 + len(original))
 652
 653         line_index = result['line'] - 1
 654         target = self.source[line_index]
 655         offset = result['column']
 656         fixed = target[:offset] + ' ' + target[offset:]
 657         self.source[line_index] = fixed
 658
 659     def fix_e251(self, result):
 660         """Remove whitespace around parameter '=' sign."""
 661         line_index = result['line'] - 1
 662         target = self.source[line_index]
 663
 664         # This is necessary since pep8 sometimes reports columns that goes
 665         # past the end of the physical line. This happens in cases like,
 666         # foo(bar\n=None)
 667         c = min(result['column'] - 1,
 668                 len(target) - 1)
 669
 670         if target[c].strip():
 671             fixed = target
 672         else:
 673             fixed = target[:c].rstrip() + target[c:].lstrip()
 674
 675         # There could be an escaped newline
 676         #
 677         #     def foo(a=\
 678         #             1)
 679         if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
 680             self.source[line_index] = fixed.rstrip('\n\r \t\\')
 681             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
 682             return [line_index + 1, line_index + 2]  # Line indexed at 1
 683
 684         self.source[result['line'] - 1] = fixed
 685
 686     def fix_e262(self, result):
 687         """Fix spacing after comment hash."""
 688         target = self.source[result['line'] - 1]
 689         offset = result['column']
 690
 691         code = target[:offset].rstrip(' \t#')
 692         comment = target[offset:].lstrip(' \t#')
 693
 694         fixed = code + ('  # ' + comment if comment.strip() else '\n')
 695
 696         self.source[result['line'] - 1] = fixed
 697
 698     def fix_e271(self, result):
 699         """Fix extraneous whitespace around keywords."""
 700         line_index = result['line'] - 1
 701         target = self.source[line_index]
 702         offset = result['column'] - 1
 703
 704         if is_probably_part_of_multiline(target):
 705             return []
 706
 707         fixed = fix_whitespace(target,
 708                                offset=offset,
 709                                replacement=' ')
 710
 711         if fixed == target:
 712             return []
 713         else:
 714             self.source[line_index] = fixed
 715
 716     def fix_e301(self, result):
 717         """Add missing blank line."""
 718         cr = '\n'
 719         self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
 720
 721     def fix_e302(self, result):
 722         """Add missing 2 blank lines."""
 723         add_linenum = 2 - int(result['info'].split()[-1])
 724         cr = '\n' * add_linenum
 725         self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
 726
 727     def fix_e303(self, result):
 728         """Remove extra blank lines."""
 729         delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
 730         delete_linenum = max(1, delete_linenum)
 731
 732         # We need to count because pep8 reports an offset line number if there
 733         # are comments.
 734         cnt = 0
 735         line = result['line'] - 2
 736         modified_lines = []
 737         while cnt < delete_linenum and line >= 0:
 738             if not self.source[line].strip():
 739                 self.source[line] = ''
 740                 modified_lines.append(1 + line)  # Line indexed at 1
 741                 cnt += 1
 742             line -= 1
 743
 744         return modified_lines
 745
 746     def fix_e304(self, result):
 747         """Remove blank line following function decorator."""
 748         line = result['line'] - 2
 749         if not self.source[line].strip():
 750             self.source[line] = ''
 751
 752     def fix_e401(self, result):
 753         """Put imports on separate lines."""
 754         line_index = result['line'] - 1
 755         target = self.source[line_index]
 756         offset = result['column'] - 1
 757
 758         if not target.lstrip().startswith('import'):
 759             return []
 760
 761         indentation = re.split(pattern=r'\bimport\b',
 762                                string=target, maxsplit=1)[0]
 763         fixed = (target[:offset].rstrip('\t ,') + '\n' +
 764                  indentation + 'import ' + target[offset:].lstrip('\t ,'))
 765         self.source[line_index] = fixed
 766
 767     def fix_long_line_logically(self, result, logical):
 768         """Try to make lines fit within --max-line-length characters."""
 769         if (
 770             not logical or
 771             len(logical[2]) == 1 or
 772             self.source[result['line'] - 1].lstrip().startswith('#')
 773         ):
 774             return self.fix_long_line_physically(result)
 775
 776         start_line_index = logical[0][0]
 777         end_line_index = logical[1][0]
 778         logical_lines = logical[2]
 779
 780         previous_line = get_item(self.source, start_line_index - 1, default='')
 781         next_line = get_item(self.source, end_line_index + 1, default='')
 782
 783         single_line = join_logical_line(''.join(logical_lines))
 784
 785         try:
 786             fixed = self.fix_long_line(
 787                 target=single_line,
 788                 previous_line=previous_line,
 789                 next_line=next_line,
 790                 original=''.join(logical_lines))
 791         except (SyntaxError, tokenize.TokenError):
 792             return self.fix_long_line_physically(result)
 793
 794         if fixed:
 795             for line_index in range(start_line_index, end_line_index + 1):
 796                 self.source[line_index] = ''
 797             self.source[start_line_index] = fixed
 798             return range(start_line_index + 1, end_line_index + 1)
 799         else:
 800             return []
 801
 802     def fix_long_line_physically(self, result):
 803         """Try to make lines fit within --max-line-length characters."""
 804         line_index = result['line'] - 1
 805         target = self.source[line_index]
 806
 807         previous_line = get_item(self.source, line_index - 1, default='')
 808         next_line = get_item(self.source, line_index + 1, default='')
 809
 810         try:
 811             fixed = self.fix_long_line(
 812                 target=target,
 813                 previous_line=previous_line,
 814                 next_line=next_line,
 815                 original=target)
 816         except (SyntaxError, tokenize.TokenError):
 817             return []
 818
 819         if fixed:
 820             self.source[line_index] = fixed
 821             return [line_index + 1]
 822         else:
 823             return []
 824
 825     def fix_long_line(self, target, previous_line,
 826                       next_line, original):
 827         cache_entry = (target, previous_line, next_line)
 828         if cache_entry in self.long_line_ignore_cache:
 829             return []
 830
 831         if target.lstrip().startswith('#'):
 832             # Wrap commented lines.
 833             return shorten_comment(
 834                 line=target,
 835                 max_line_length=self.options.max_line_length,
 836                 last_comment=not next_line.lstrip().startswith('#'))
 837
 838         fixed = get_fixed_long_line(
 839             target=target,
 840             previous_line=previous_line,
 841             original=original,
 842             indent_word=self.indent_word,
 843             max_line_length=self.options.max_line_length,
 844             aggressive=self.options.aggressive,
 845             experimental=self.options.experimental,
 846             verbose=self.options.verbose)
 847         if fixed and not code_almost_equal(original, fixed):
 848             return fixed
 849         else:
 850             self.long_line_ignore_cache.add(cache_entry)
 851             return None
 852
 853     def fix_e502(self, result):
 854         """Remove extraneous escape of newline."""
 855         line_index = result['line'] - 1
 856         target = self.source[line_index]
 857         self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
 858
 859     def fix_e701(self, result):
 860         """Put colon-separated compound statement on separate lines."""
 861         line_index = result['line'] - 1
 862         target = self.source[line_index]
 863         c = result['column']
 864
 865         fixed_source = (target[:c] + '\n' +
 866                         _get_indentation(target) + self.indent_word +
 867                         target[c:].lstrip('\n\r \t\\'))
 868         self.source[result['line'] - 1] = fixed_source
 869         return [result['line'], result['line'] + 1]
 870
 871     def fix_e702(self, result, logical):
 872         """Put semicolon-separated compound statement on separate lines."""
 873         if not logical:
 874             return []  # pragma: no cover
 875         logical_lines = logical[2]
 876
 877         line_index = result['line'] - 1
 878         target = self.source[line_index]
 879
 880         if target.rstrip().endswith('\\'):
 881             # Normalize '1; \\\n2' into '1; 2'.
 882             self.source[line_index] = target.rstrip('\n \r\t\\')
 883             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
 884             return [line_index + 1, line_index + 2]
 885
 886         if target.rstrip().endswith(';'):
 887             self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
 888             return [line_index + 1]
 889
 890         offset = result['column'] - 1
 891         first = target[:offset].rstrip(';').rstrip()
 892         second = (_get_indentation(logical_lines[0]) +
 893                   target[offset:].lstrip(';').lstrip())
 894
 895         self.source[line_index] = first + '\n' + second
 896         return [line_index + 1]
 897
 898     def fix_e711(self, result):
 899         """Fix comparison with None."""
 900         line_index = result['line'] - 1
 901         target = self.source[line_index]
 902         offset = result['column'] - 1
 903
 904         right_offset = offset + 2
 905         if right_offset >= len(target):
 906             return []
 907
 908         left = target[:offset].rstrip()
 909         center = target[offset:right_offset]
 910         right = target[right_offset:].lstrip()
 911
 912         if not right.startswith('None'):
 913             return []
 914
 915         if center.strip() == '==':
 916             new_center = 'is'
 917         elif center.strip() == '!=':
 918             new_center = 'is not'
 919         else:
 920             return []
 921
 922         self.source[line_index] = ' '.join([left, new_center, right])
 923
 924     def fix_e712(self, result):
 925         """Fix comparison with boolean."""
 926         line_index = result['line'] - 1
 927         target = self.source[line_index]
 928         offset = result['column'] - 1
 929
 930         # Handle very easy "not" special cases.
 931         if re.match(r'^\s*if \w+ == False:$', target):
 932             self.source[line_index] = re.sub(r'if (\w+) == False:',
 933                                              r'if not \1:', target, count=1)
 934         elif re.match(r'^\s*if \w+ != True:$', target):
 935             self.source[line_index] = re.sub(r'if (\w+) != True:',
 936                                              r'if not \1:', target, count=1)
 937         else:
 938             right_offset = offset + 2
 939             if right_offset >= len(target):
 940                 return []
 941
 942             left = target[:offset].rstrip()
 943             center = target[offset:right_offset]
 944             right = target[right_offset:].lstrip()
 945
 946             # Handle simple cases only.
 947             new_right = None
 948             if center.strip() == '==':
 949                 if re.match(r'\bTrue\b', right):
 950                     new_right = re.sub(r'\bTrue\b *', '', right, count=1)
 951             elif center.strip() == '!=':
 952                 if re.match(r'\bFalse\b', right):
 953                     new_right = re.sub(r'\bFalse\b *', '', right, count=1)
 954
 955             if new_right is None:
 956                 return []
 957
 958             if new_right[0].isalnum():
 959                 new_right = ' ' + new_right
 960
 961             self.source[line_index] = left + new_right
 962
 963     def fix_e713(self, result):
 964         """Fix non-membership check."""
 965         line_index = result['line'] - 1
 966         target = self.source[line_index]
 967
 968         # Handle very easy case only.
 969         if re.match(r'^\s*if not \w+ in \w+:$', target):
 970             self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
 971                                              r'if \1 not in \2:',
 972                                              target,
 973                                              count=1)
 974
 975     def fix_w291(self, result):
 976         """Remove trailing whitespace."""
 977         fixed_line = self.source[result['line'] - 1].rstrip()
 978         self.source[result['line'] - 1] = fixed_line + '\n'
 979
 980
 981 def get_fixed_long_line(target, previous_line, original,
 982                         indent_word='    ', max_line_length=79,
 983                         aggressive=False, experimental=False, verbose=False):
 984     """Break up long line and return result.
 985
 986     Do this by generating multiple reformatted candidates and then
 987     ranking the candidates to heuristically select the best option.
 988
 989     """
 990     indent = _get_indentation(target)
 991     source = target[len(indent):]
 992     assert source.lstrip() == source
 993
 994     # Check for partial multiline.
 995     tokens = list(generate_tokens(source))
 996
 997     candidates = shorten_line(
 998         tokens, source, indent,
 999         indent_word,
1000         max_line_length,
1001         aggressive=aggressive,
1002         experimental=experimental,
1003         previous_line=previous_line)
1004
1005     # Also sort alphabetically as a tie breaker (for determinism).
1006     candidates = sorted(
1007         sorted(set(candidates).union([target, original])),
1008         key=lambda x: line_shortening_rank(x,
1009                                            indent_word,
1010                                            max_line_length,
1011                                            experimental))
1012
1013     if verbose >= 4:
1014         print(('-' * 79 + '\n').join([''] + candidates + ['']),
1015               file=codecs.getwriter('utf-8')(sys.stderr.buffer
1016                                              if hasattr(sys.stderr,
1017                                                         'buffer')
1018                                              else sys.stderr))
1019
1020     if candidates:
1021         return candidates[0]
1022
1023
1024 def join_logical_line(logical_line):
1025     """Return single line based on logical line input."""
1026     indentation = _get_indentation(logical_line)
1027
1028     return indentation + untokenize_without_newlines(
1029         generate_tokens(logical_line.lstrip())) + '\n'
1030
1031
1032 def untokenize_without_newlines(tokens):
1033     """Return source code based on tokens."""
1034     text = ''
1035     last_row = 0
1036     last_column = -1
1037
1038     for t in tokens:
1039         token_string = t[1]
1040         (start_row, start_column) = t[2]
1041         (end_row, end_column) = t[3]
1042
1043         if start_row > last_row:
1044             last_column = 0
1045         if (
1046             (start_column > last_column or token_string == '\n') and
1047             not text.endswith(' ')
1048         ):
1049             text += ' '
1050
1051         if token_string != '\n':
1052             text += token_string
1053
1054         last_row = end_row
1055         last_column = end_column
1056
1057     return text
1058
1059
1060 def _find_logical(source_lines):
1061     # Make a variable which is the index of all the starts of lines.
1062     logical_start = []
1063     logical_end = []
1064     last_newline = True
1065     parens = 0
1066     for t in generate_tokens(''.join(source_lines)):
1067         if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1068                     tokenize.INDENT, tokenize.NL,
1069                     tokenize.ENDMARKER]:
1070             continue
1071         if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1072             last_newline = True
1073             logical_end.append((t[3][0] - 1, t[2][1]))
1074             continue
1075         if last_newline and not parens:
1076             logical_start.append((t[2][0] - 1, t[2][1]))
1077             last_newline = False
1078         if t[0] == tokenize.OP:
1079             if t[1] in '([{':
1080                 parens += 1
1081             elif t[1] in '}])':
1082                 parens -= 1
1083     return (logical_start, logical_end)
1084
1085
1086 def _get_logical(source_lines, result, logical_start, logical_end):
1087     """Return the logical line corresponding to the result.
1088
1089     Assumes input is already E702-clean.
1090
1091     """
1092     row = result['line'] - 1
1093     col = result['column'] - 1
1094     ls = None
1095     le = None
1096     for i in range(0, len(logical_start), 1):
1097         assert logical_end
1098         x = logical_end[i]
1099         if x[0] > row or (x[0] == row and x[1] > col):
1100             le = x
1101             ls = logical_start[i]
1102             break
1103     if ls is None:
1104         return None
1105     original = source_lines[ls[0]:le[0] + 1]
1106     return ls, le, original
1107
1108
1109 def get_item(items, index, default=None):
1110     if 0 <= index < len(items):
1111         return items[index]
1112     else:
1113         return default
1114
1115
1116 def reindent(source, indent_size):
1117     """Reindent all lines."""
1118     reindenter = Reindenter(source)
1119     return reindenter.run(indent_size)
1120
1121
1122 def code_almost_equal(a, b):
1123     """Return True if code is similar.
1124
1125     Ignore whitespace when comparing specific line.
1126
1127     """
1128     split_a = split_and_strip_non_empty_lines(a)
1129     split_b = split_and_strip_non_empty_lines(b)
1130
1131     if len(split_a) != len(split_b):
1132         return False
1133
1134     for index in range(len(split_a)):
1135         if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1136             return False
1137
1138     return True
1139
1140
1141 def split_and_strip_non_empty_lines(text):
1142     """Return lines split by newline.
1143
1144     Ignore empty lines.
1145
1146     """
1147     return [line.strip() for line in text.splitlines() if line.strip()]
1148
1149
1150 def fix_e265(source, aggressive=False):  # pylint: disable=unused-argument
1151     """Format block comments."""
1152     if '#' not in source:
1153         # Optimization.
1154         return source
1155
1156     ignored_line_numbers = multiline_string_lines(
1157         source,
1158         include_docstrings=True) | set(commented_out_code_lines(source))
1159
1160     fixed_lines = []
1161     sio = io.StringIO(source)
1162     for (line_number, line) in enumerate(sio.readlines(), start=1):
1163         if (
1164             line.lstrip().startswith('#') and
1165             line_number not in ignored_line_numbers
1166         ):
1167             indentation = _get_indentation(line)
1168             line = line.lstrip()
1169
1170             # Normalize beginning if not a shebang.
1171             if len(line) > 1:
1172                 if (
1173                     # Leave multiple spaces like '#    ' alone.
1174                     (line.count('#') > 1 or line[1].isalnum())
1175                     # Leave stylistic outlined blocks alone.
1176                     and not line.rstrip().endswith('#')
1177                 ):
1178                     line = '# ' + line.lstrip('# \t')
1179
1180             fixed_lines.append(indentation + line)
1181         else:
1182             fixed_lines.append(line)
1183
1184     return ''.join(fixed_lines)
1185
1186
1187 def refactor(source, fixer_names, ignore=None):
1188     """Return refactored code using lib2to3.
1189
1190     Skip if ignore string is produced in the refactored code.
1191
1192     """
1193     from lib2to3 import pgen2
1194     try:
1195         new_text = refactor_with_2to3(source,
1196                                       fixer_names=fixer_names)
1197     except (pgen2.parse.ParseError,
1198             SyntaxError,
1199             UnicodeDecodeError,
1200             UnicodeEncodeError):
1201         return source
1202
1203     if ignore:
1204         if ignore in new_text and ignore not in source:
1205             return source
1206
1207     return new_text
1208
1209
1210 def code_to_2to3(select, ignore):
1211     fixes = set()
1212     for code, fix in CODE_TO_2TO3.items():
1213         if code_match(code, select=select, ignore=ignore):
1214             fixes |= set(fix)
1215     return fixes
1216
1217
1218 def fix_2to3(source, aggressive=True, select=None, ignore=None):
1219     """Fix various deprecated code (via lib2to3)."""
1220     if not aggressive:
1221         return source
1222
1223     select = select or []
1224     ignore = ignore or []
1225
1226     return refactor(source,
1227                     code_to_2to3(select=select,
1228                                  ignore=ignore))
1229
1230
1231 def fix_w602(source, aggressive=True):
1232     """Fix deprecated form of raising exception."""
1233     if not aggressive:
1234         return source
1235
1236     return refactor(source, ['raise'],
1237                     ignore='with_traceback')
1238
1239
1240 def find_newline(source):
1241     """Return type of newline used in source.
1242
1243     Input is a list of lines.
1244
1245     """
1246     assert not isinstance(source, unicode)
1247
1248     counter = collections.defaultdict(int)
1249     for line in source:
1250         if line.endswith(CRLF):
1251             counter[CRLF] += 1
1252         elif line.endswith(CR):
1253             counter[CR] += 1
1254         elif line.endswith(LF):
1255             counter[LF] += 1
1256
1257     return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1258
1259
1260 def _get_indentword(source):
1261     """Return indentation type."""
1262     indent_word = '    '  # Default in case source has no indentation
1263     try:
1264         for t in generate_tokens(source):
1265             if t[0] == token.INDENT:
1266                 indent_word = t[1]
1267                 break
1268     except (SyntaxError, tokenize.TokenError):
1269         pass
1270     return indent_word
1271
1272
1273 def _get_indentation(line):
1274     """Return leading whitespace."""
1275     if line.strip():
1276         non_whitespace_index = len(line) - len(line.lstrip())
1277         return line[:non_whitespace_index]
1278     else:
1279         return ''
1280
1281
1282 def get_diff_text(old, new, filename):
1283     """Return text of unified diff between old and new."""
1284     newline = '\n'
1285     diff = difflib.unified_diff(
1286         old, new,
1287         'original/' + filename,
1288         'fixed/' + filename,
1289         lineterm=newline)
1290
1291     text = ''
1292     for line in diff:
1293         text += line
1294
1295         # Work around missing newline (http://bugs.python.org/issue2142).
1296         if text and not line.endswith(newline):
1297             text += newline + r'\ No newline at end of file' + newline
1298
1299     return text
1300
1301
1302 def _priority_key(pep8_result):
1303     """Key for sorting PEP8 results.
1304
1305     Global fixes should be done first. This is important for things like
1306     indentation.
1307
1308     """
1309     priority = [
1310         # Fix multiline colon-based before semicolon based.
1311         'e701',
1312         # Break multiline statements early.
1313         'e702',
1314         # Things that make lines longer.
1315         'e225', 'e231',
1316         # Remove extraneous whitespace before breaking lines.
1317         'e201',
1318         # Shorten whitespace in comment before resorting to wrapping.
1319         'e262'
1320     ]
1321     middle_index = 10000
1322     lowest_priority = [
1323         # We need to shorten lines last since the logical fixer can get in a
1324         # loop, which causes us to exit early.
1325         'e501'
1326     ]
1327     key = pep8_result['id'].lower()
1328     try:
1329         return priority.index(key)
1330     except ValueError:
1331         try:
1332             return middle_index + lowest_priority.index(key) + 1
1333         except ValueError:
1334             return middle_index
1335
1336
1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1338                  aggressive=False, experimental=False, previous_line=''):
1339     """Separate line at OPERATOR.
1340
1341     Multiple candidates will be yielded.
1342
1343     """
1344     for candidate in _shorten_line(tokens=tokens,
1345                                    source=source,
1346                                    indentation=indentation,
1347                                    indent_word=indent_word,
1348                                    aggressive=aggressive,
1349                                    previous_line=previous_line):
1350         yield candidate
1351
1352     if aggressive:
1353         for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1354             shortened = _shorten_line_at_tokens(
1355                 tokens=tokens,
1356                 source=source,
1357                 indentation=indentation,
1358                 indent_word=indent_word,
1359                 key_token_strings=key_token_strings,
1360                 aggressive=aggressive)
1361
1362             if shortened is not None and shortened != source:
1363                 yield shortened
1364
1365     if experimental:
1366         for shortened in _shorten_line_at_tokens_new(
1367                 tokens=tokens,
1368                 source=source,
1369                 indentation=indentation,
1370                 max_line_length=max_line_length):
1371
1372             yield shortened
1373
1374
1375 def _shorten_line(tokens, source, indentation, indent_word,
1376                   aggressive=False, previous_line=''):
1377     """Separate line at OPERATOR.
1378
1379     The input is expected to be free of newlines except for inside multiline
1380     strings and at the end.
1381
1382     Multiple candidates will be yielded.
1383
1384     """
1385     for (token_type,
1386          token_string,
1387          start_offset,
1388          end_offset) in token_offsets(tokens):
1389
1390         if (
1391             token_type == tokenize.COMMENT and
1392             not is_probably_part_of_multiline(previous_line) and
1393             not is_probably_part_of_multiline(source) and
1394             not source[start_offset + 1:].strip().lower().startswith(
1395                 ('noqa', 'pragma:', 'pylint:'))
1396         ):
1397             # Move inline comments to previous line.
1398             first = source[:start_offset]
1399             second = source[start_offset:]
1400             yield (indentation + second.strip() + '\n' +
1401                    indentation + first.strip() + '\n')
1402         elif token_type == token.OP and token_string != '=':
1403             # Don't break on '=' after keyword as this violates PEP 8.
1404
1405             assert token_type != token.INDENT
1406
1407             first = source[:end_offset]
1408
1409             second_indent = indentation
1410             if first.rstrip().endswith('('):
1411                 second_indent += indent_word
1412             elif '(' in first:
1413                 second_indent += ' ' * (1 + first.find('('))
1414             else:
1415                 second_indent += indent_word
1416
1417             second = (second_indent + source[end_offset:].lstrip())
1418             if (
1419                 not second.strip() or
1420                 second.lstrip().startswith('#')
1421             ):
1422                 continue
1423
1424             # Do not begin a line with a comma
1425             if second.lstrip().startswith(','):
1426                 continue
1427             # Do end a line with a dot
1428             if first.rstrip().endswith('.'):
1429                 continue
1430             if token_string in '+-*/':
1431                 fixed = first + ' \\' + '\n' + second
1432             else:
1433                 fixed = first + '\n' + second
1434
1435             # Only fix if syntax is okay.
1436             if check_syntax(normalize_multiline(fixed)
1437                             if aggressive else fixed):
1438                 yield indentation + fixed
1439
1440
1441 # A convenient way to handle tokens.
1442 Token = collections.namedtuple('Token', ['token_type', 'token_string',
1443                                          'spos', 'epos', 'line'])
1444
1445
1446 class ReformattedLines(object):
1447
1448     """The reflowed lines of atoms.
1449
1450     Each part of the line is represented as an "atom." They can be moved
1451     around when need be to get the optimal formatting.
1452
1453     """
1454
1455     ###########################################################################
1456     # Private Classes
1457
1458     class _Indent(object):
1459
1460         """Represent an indentation in the atom stream."""
1461
1462         def __init__(self, indent_amt):
1463             self._indent_amt = indent_amt
1464
1465         def emit(self):
1466             return ' ' * self._indent_amt
1467
1468         @property
1469         def size(self):
1470             return self._indent_amt
1471
1472     class _Space(object):
1473
1474         """Represent a space in the atom stream."""
1475
1476         def emit(self):
1477             return ' '
1478
1479         @property
1480         def size(self):
1481             return 1
1482
1483     class _LineBreak(object):
1484
1485         """Represent a line break in the atom stream."""
1486
1487         def emit(self):
1488             return '\n'
1489
1490         @property
1491         def size(self):
1492             return 0
1493
1494     def __init__(self, max_line_length):
1495         self._max_line_length = max_line_length
1496         self._lines = []
1497         self._bracket_depth = 0
1498         self._prev_item = None
1499         self._prev_prev_item = None
1500
1501     def __repr__(self):
1502         return self.emit()
1503
1504     ###########################################################################
1505     # Public Methods
1506
1507     def add(self, obj, indent_amt, break_after_open_bracket):
1508         if isinstance(obj, Atom):
1509             self._add_item(obj, indent_amt)
1510             return
1511
1512         self._add_container(obj, indent_amt, break_after_open_bracket)
1513
1514     def add_comment(self, item):
1515         num_spaces = 2
1516         if len(self._lines) > 1:
1517             if isinstance(self._lines[-1], self._Space):
1518                 num_spaces -= 1
1519             if len(self._lines) > 2:
1520                 if isinstance(self._lines[-2], self._Space):
1521                     num_spaces -= 1
1522
1523         while num_spaces > 0:
1524             self._lines.append(self._Space())
1525             num_spaces -= 1
1526         self._lines.append(item)
1527
1528     def add_indent(self, indent_amt):
1529         self._lines.append(self._Indent(indent_amt))
1530
1531     def add_line_break(self, indent):
1532         self._lines.append(self._LineBreak())
1533         self.add_indent(len(indent))
1534
1535     def add_line_break_at(self, index, indent_amt):
1536         self._lines.insert(index, self._LineBreak())
1537         self._lines.insert(index + 1, self._Indent(indent_amt))
1538
1539     def add_space_if_needed(self, curr_text, equal=False):
1540         if (
1541             not self._lines or isinstance(
1542                 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
1543         ):
1544             return
1545
1546         prev_text = unicode(self._prev_item)
1547         prev_prev_text = (
1548             unicode(self._prev_prev_item) if self._prev_prev_item else '')
1549
1550         if (
1551             # The previous item was a keyword or identifier and the current
1552             # item isn't an operator that doesn't require a space.
1553             ((self._prev_item.is_keyword or self._prev_item.is_string or
1554               self._prev_item.is_name or self._prev_item.is_number) and
1555              (curr_text[0] not in '([{.,:}])' or
1556               (curr_text[0] == '=' and equal))) or
1557
1558             # Don't place spaces around a '.', unless it's in an 'import'
1559             # statement.
1560             ((prev_prev_text != 'from' and prev_text[-1] != '.' and
1561               curr_text != 'import') and
1562
1563              # Don't place a space before a colon.
1564              curr_text[0] != ':' and
1565
1566              # Don't split up ending brackets by spaces.
1567              ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
1568
1569               # Put a space after a colon or comma.
1570               prev_text[-1] in ':,' or
1571
1572               # Put space around '=' if asked to.
1573               (equal and prev_text == '=') or
1574
1575               # Put spaces around non-unary arithmetic operators.
1576               ((self._prev_prev_item and
1577                 (prev_text not in '+-' and
1578                  (self._prev_prev_item.is_name or
1579                   self._prev_prev_item.is_number or
1580                   self._prev_prev_item.is_string)) and
1581                 prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
1582         ):
1583             self._lines.append(self._Space())
1584
1585     def previous_item(self):
1586         """Return the previous non-whitespace item."""
1587         return self._prev_item
1588
1589     def fits_on_current_line(self, item_extent):
1590         return self.current_size() + item_extent <= self._max_line_length
1591
1592     def current_size(self):
1593         """The size of the current line minus the indentation."""
1594         size = 0
1595         for item in reversed(self._lines):
1596             size += item.size
1597             if isinstance(item, self._LineBreak):
1598                 break
1599
1600         return size
1601
1602     def line_empty(self):
1603         return (self._lines and
1604                 isinstance(self._lines[-1],
1605                            (self._LineBreak, self._Indent)))
1606
1607     def emit(self):
1608         string = ''
1609         for item in self._lines:
1610             if isinstance(item, self._LineBreak):
1611                 string = string.rstrip()
1612             string += item.emit()
1613
1614         return string.rstrip() + '\n'
1615
1616     ###########################################################################
1617     # Private Methods
1618
1619     def _add_item(self, item, indent_amt):
1620         """Add an item to the line.
1621
1622         Reflow the line to get the best formatting after the item is
1623         inserted. The bracket depth indicates if the item is being
1624         inserted inside of a container or not.
1625
1626         """
1627         if self._prev_item and self._prev_item.is_string and item.is_string:
1628             # Place consecutive string literals on separate lines.
1629             self._lines.append(self._LineBreak())
1630             self._lines.append(self._Indent(indent_amt))
1631
1632         item_text = unicode(item)
1633         if self._lines and self._bracket_depth:
1634             # Adding the item into a container.
1635             self._prevent_default_initializer_splitting(item, indent_amt)
1636
1637             if item_text in '.,)]}':
1638                 self._split_after_delimiter(item, indent_amt)
1639
1640         elif self._lines and not self.line_empty():
1641             # Adding the item outside of a container.
1642             if self.fits_on_current_line(len(item_text)):
1643                 self._enforce_space(item)
1644
1645             else:
1646                 # Line break for the new item.
1647                 self._lines.append(self._LineBreak())
1648                 self._lines.append(self._Indent(indent_amt))
1649
1650         self._lines.append(item)
1651         self._prev_item, self._prev_prev_item = item, self._prev_item
1652
1653         if item_text in '([{':
1654             self._bracket_depth += 1
1655
1656         elif item_text in '}])':
1657             self._bracket_depth -= 1
1658             assert self._bracket_depth >= 0
1659
1660     def _add_container(self, container, indent_amt, break_after_open_bracket):
1661         actual_indent = indent_amt + 1
1662
1663         if (
1664             unicode(self._prev_item) != '=' and
1665             not self.line_empty() and
1666             not self.fits_on_current_line(
1667                 container.size + self._bracket_depth + 2)
1668         ):
1669
1670             if unicode(container)[0] == '(' and self._prev_item.is_name:
1671                 # Don't split before the opening bracket of a call.
1672                 break_after_open_bracket = True
1673                 actual_indent = indent_amt + 4
1674             elif (
1675                 break_after_open_bracket or
1676                 unicode(self._prev_item) not in '([{'
1677             ):
1678                 # If the container doesn't fit on the current line and the
1679                 # current line isn't empty, place the container on the next
1680                 # line.
1681                 self._lines.append(self._LineBreak())
1682                 self._lines.append(self._Indent(indent_amt))
1683                 break_after_open_bracket = False
1684         else:
1685             actual_indent = self.current_size() + 1
1686             break_after_open_bracket = False
1687
1688         if isinstance(container, (ListComprehension, IfExpression)):
1689             actual_indent = indent_amt
1690
1691         # Increase the continued indentation only if recursing on a
1692         # container.
1693         container.reflow(self, ' ' * actual_indent,
1694                          break_after_open_bracket=break_after_open_bracket)
1695
1696     def _prevent_default_initializer_splitting(self, item, indent_amt):
1697         """Prevent splitting between a default initializer.
1698
1699         When there is a default initializer, it's best to keep it all on
1700         the same line. It's nicer and more readable, even if it goes
1701         over the maximum allowable line length. This goes back along the
1702         current line to determine if we have a default initializer, and,
1703         if so, to remove extraneous whitespaces and add a line
1704         break/indent before it if needed.
1705
1706         """
1707         if unicode(item) == '=':
1708             # This is the assignment in the initializer. Just remove spaces for
1709             # now.
1710             self._delete_whitespace()
1711             return
1712
1713         if (not self._prev_item or not self._prev_prev_item or
1714                 unicode(self._prev_item) != '='):
1715             return
1716
1717         self._delete_whitespace()
1718         prev_prev_index = self._lines.index(self._prev_prev_item)
1719
1720         if (
1721             isinstance(self._lines[prev_prev_index - 1], self._Indent) or
1722             self.fits_on_current_line(item.size + 1)
1723         ):
1724             # The default initializer is already the only item on this line.
1725             # Don't insert a newline here.
1726             return
1727
1728         # Replace the space with a newline/indent combo.
1729         if isinstance(self._lines[prev_prev_index - 1], self._Space):
1730             del self._lines[prev_prev_index - 1]
1731
1732         self.add_line_break_at(self._lines.index(self._prev_prev_item),
1733                                indent_amt)
1734
1735     def _split_after_delimiter(self, item, indent_amt):
1736         """Split the line only after a delimiter."""
1737         self._delete_whitespace()
1738
1739         if self.fits_on_current_line(item.size):
1740             return
1741
1742         last_space = None
1743         for item in reversed(self._lines):
1744             if (
1745                 last_space and
1746                 (not isinstance(item, Atom) or not item.is_colon)
1747             ):
1748                 break
1749             else:
1750                 last_space = None
1751             if isinstance(item, self._Space):
1752                 last_space = item
1753             if isinstance(item, (self._LineBreak, self._Indent)):
1754                 return
1755
1756         if not last_space:
1757             return
1758
1759         self.add_line_break_at(self._lines.index(last_space), indent_amt)
1760
1761     def _enforce_space(self, item):
1762         """Enforce a space in certain situations.
1763
1764         There are cases where we will want a space where normally we
1765         wouldn't put one. This just enforces the addition of a space.
1766
1767         """
1768         if isinstance(self._lines[-1],
1769                       (self._Space, self._LineBreak, self._Indent)):
1770             return
1771
1772         if not self._prev_item:
1773             return
1774
1775         item_text = unicode(item)
1776         prev_text = unicode(self._prev_item)
1777
1778         # Prefer a space around a '.' in an import statement, and between the
1779         # 'import' and '('.
1780         if (
1781             (item_text == '.' and prev_text == 'from') or
1782             (item_text == 'import' and prev_text == '.') or
1783             (item_text == '(' and prev_text == 'import')
1784         ):
1785             self._lines.append(self._Space())
1786
1787     def _delete_whitespace(self):
1788         """Delete all whitespace from the end of the line."""
1789         while isinstance(self._lines[-1], (self._Space, self._LineBreak,
1790                                            self._Indent)):
1791             del self._lines[-1]
1792
1793
1794 class Atom(object):
1795
1796     """The smallest unbreakable unit that can be reflowed."""
1797
1798     def __init__(self, atom):
1799         self._atom = atom
1800
1801     def __repr__(self):
1802         return self._atom.token_string
1803
1804     def __len__(self):
1805         return self.size
1806
1807     def reflow(
1808         self, reflowed_lines, continued_indent, extent,
1809         break_after_open_bracket=False,
1810         is_list_comp_or_if_expr=False,
1811         next_is_dot=False
1812     ):
1813         if self._atom.token_type == tokenize.COMMENT:
1814             reflowed_lines.add_comment(self)
1815             return
1816
1817         total_size = extent if extent else self.size
1818
1819         if self._atom.token_string not in ',:([{}])':
1820             # Some atoms will need an extra 1-sized space token after them.
1821             total_size += 1
1822
1823         prev_item = reflowed_lines.previous_item()
1824         if (
1825             not is_list_comp_or_if_expr and
1826             not reflowed_lines.fits_on_current_line(total_size) and
1827             not (next_is_dot and
1828                  reflowed_lines.fits_on_current_line(self.size + 1)) and
1829             not reflowed_lines.line_empty() and
1830             not self.is_colon and
1831             not (prev_item and prev_item.is_name and
1832                  unicode(self) == '(')
1833         ):
1834             # Start a new line if there is already something on the line and
1835             # adding this atom would make it go over the max line length.
1836             reflowed_lines.add_line_break(continued_indent)
1837         else:
1838             reflowed_lines.add_space_if_needed(unicode(self))
1839
1840         reflowed_lines.add(self, len(continued_indent),
1841                            break_after_open_bracket)
1842
1843     def emit(self):
1844         return self.__repr__()
1845
1846     @property
1847     def is_keyword(self):
1848         return keyword.iskeyword(self._atom.token_string)
1849
1850     @property
1851     def is_string(self):
1852         return self._atom.token_type == tokenize.STRING
1853
1854     @property
1855     def is_name(self):
1856         return self._atom.token_type == tokenize.NAME
1857
1858     @property
1859     def is_number(self):
1860         return self._atom.token_type == tokenize.NUMBER
1861
1862     @property
1863     def is_comma(self):
1864         return self._atom.token_string == ','
1865
1866     @property
1867     def is_colon(self):
1868         return self._atom.token_string == ':'
1869
1870     @property
1871     def size(self):
1872         return len(self._atom.token_string)
1873
1874
1875 class Container(object):
1876
1877     """Base class for all container types."""
1878
1879     def __init__(self, items):
1880         self._items = items
1881
1882     def __repr__(self):
1883         string = ''
1884         last_was_keyword = False
1885
1886         for item in self._items:
1887             if item.is_comma:
1888                 string += ', '
1889             elif item.is_colon:
1890                 string += ': '
1891             else:
1892                 item_string = unicode(item)
1893                 if (
1894                     string and
1895                     (last_was_keyword or
1896                      (not string.endswith(tuple('([{,.:}]) ')) and
1897                       not item_string.startswith(tuple('([{,.:}])'))))
1898                 ):
1899                     string += ' '
1900                 string += item_string
1901
1902             last_was_keyword = item.is_keyword
1903         return string
1904
1905     def __iter__(self):
1906         for element in self._items:
1907             yield element
1908
1909     def __getitem__(self, idx):
1910         return self._items[idx]
1911
1912     def reflow(self, reflowed_lines, continued_indent,
1913                break_after_open_bracket=False):
1914         last_was_container = False
1915         for (index, item) in enumerate(self._items):
1916             next_item = get_item(self._items, index + 1)
1917
1918             if isinstance(item, Atom):
1919                 is_list_comp_or_if_expr = (
1920                     isinstance(self, (ListComprehension, IfExpression)))
1921                 item.reflow(reflowed_lines, continued_indent,
1922                             self._get_extent(index),
1923                             is_list_comp_or_if_expr=is_list_comp_or_if_expr,
1924                             next_is_dot=(next_item and
1925                                          unicode(next_item) == '.'))
1926                 if last_was_container and item.is_comma:
1927                     reflowed_lines.add_line_break(continued_indent)
1928                 last_was_container = False
1929             else:  # isinstance(item, Container)
1930                 reflowed_lines.add(item, len(continued_indent),
1931                                    break_after_open_bracket)
1932                 last_was_container = not isinstance(item, (ListComprehension,
1933                                                            IfExpression))
1934
1935             if (
1936                 break_after_open_bracket and index == 0 and
1937                 # Prefer to keep empty containers together instead of
1938                 # separating them.
1939                 unicode(item) == self.open_bracket and
1940                 (not next_item or unicode(next_item) != self.close_bracket) and
1941                 (len(self._items) != 3 or not isinstance(next_item, Atom))
1942             ):
1943                 reflowed_lines.add_line_break(continued_indent)
1944                 break_after_open_bracket = False
1945             else:
1946                 next_next_item = get_item(self._items, index + 2)
1947                 if (
1948                     unicode(item) not in ['.', '%', 'in'] and
1949                     next_item and not isinstance(next_item, Container) and
1950                     unicode(next_item) != ':' and
1951                     next_next_item and (not isinstance(next_next_item, Atom) or
1952                                         unicode(next_item) == 'not') and
1953                     not reflowed_lines.line_empty() and
1954                     not reflowed_lines.fits_on_current_line(
1955                         self._get_extent(index + 1) + 2)
1956                 ):
1957                     reflowed_lines.add_line_break(continued_indent)
1958
1959     def _get_extent(self, index):
1960         """The extent of the full element.
1961
1962         E.g., the length of a function call or keyword.
1963
1964         """
1965         extent = 0
1966         prev_item = get_item(self._items, index - 1)
1967         seen_dot = prev_item and unicode(prev_item) == '.'
1968         while index < len(self._items):
1969             item = get_item(self._items, index)
1970             index += 1
1971
1972             if isinstance(item, (ListComprehension, IfExpression)):
1973                 break
1974
1975             if isinstance(item, Container):
1976                 if prev_item and prev_item.is_name:
1977                     if seen_dot:
1978                         extent += 1
1979                     else:
1980                         extent += item.size
1981
1982                     prev_item = item
1983                     continue
1984             elif (unicode(item) not in ['.', '=', ':', 'not'] and
1985                   not item.is_name and not item.is_string):
1986                 break
1987
1988             if unicode(item) == '.':
1989                 seen_dot = True
1990
1991             extent += item.size
1992             prev_item = item
1993
1994         return extent
1995
1996     @property
1997     def is_string(self):
1998         return False
1999
2000     @property
2001     def size(self):
2002         return len(self.__repr__())
2003
2004     @property
2005     def is_keyword(self):
2006         return False
2007
2008     @property
2009     def is_name(self):
2010         return False
2011
2012     @property
2013     def is_comma(self):
2014         return False
2015
2016     @property
2017     def is_colon(self):
2018         return False
2019
2020     @property
2021     def open_bracket(self):
2022         return None
2023
2024     @property
2025     def close_bracket(self):
2026         return None
2027
2028
2029 class Tuple(Container):
2030
2031     """A high-level representation of a tuple."""
2032
2033     @property
2034     def open_bracket(self):
2035         return '('
2036
2037     @property
2038     def close_bracket(self):
2039         return ')'
2040
2041
2042 class List(Container):
2043
2044     """A high-level representation of a list."""
2045
2046     @property
2047     def open_bracket(self):
2048         return '['
2049
2050     @property
2051     def close_bracket(self):
2052         return ']'
2053
2054
2055 class DictOrSet(Container):
2056
2057     """A high-level representation of a dictionary or set."""
2058
2059     @property
2060     def open_bracket(self):
2061         return '{'
2062
2063     @property
2064     def close_bracket(self):
2065         return '}'
2066
2067
2068 class ListComprehension(Container):
2069
2070     """A high-level representation of a list comprehension."""
2071
2072     @property
2073     def size(self):
2074         length = 0
2075         for item in self._items:
2076             if isinstance(item, IfExpression):
2077                 break
2078             length += item.size
2079         return length
2080
2081
2082 class IfExpression(Container):
2083
2084     """A high-level representation of an if-expression."""
2085
2086
2087 def _parse_container(tokens, index, for_or_if=None):
2088     """Parse a high-level container, such as a list, tuple, etc."""
2089
2090     # Store the opening bracket.
2091     items = [Atom(Token(*tokens[index]))]
2092     index += 1
2093
2094     num_tokens = len(tokens)
2095     while index < num_tokens:
2096         tok = Token(*tokens[index])
2097
2098         if tok.token_string in ',)]}':
2099             # First check if we're at the end of a list comprehension or
2100             # if-expression. Don't add the ending token as part of the list
2101             # comprehension or if-expression, because they aren't part of those
2102             # constructs.
2103             if for_or_if == 'for':
2104                 return (ListComprehension(items), index - 1)
2105
2106             elif for_or_if == 'if':
2107                 return (IfExpression(items), index - 1)
2108
2109             # We've reached the end of a container.
2110             items.append(Atom(tok))
2111
2112             # If not, then we are at the end of a container.
2113             if tok.token_string == ')':
2114                 # The end of a tuple.
2115                 return (Tuple(items), index)
2116
2117             elif tok.token_string == ']':
2118                 # The end of a list.
2119                 return (List(items), index)
2120
2121             elif tok.token_string == '}':
2122                 # The end of a dictionary or set.
2123                 return (DictOrSet(items), index)
2124
2125         elif tok.token_string in '([{':
2126             # A sub-container is being defined.
2127             (container, index) = _parse_container(tokens, index)
2128             items.append(container)
2129
2130         elif tok.token_string == 'for':
2131             (container, index) = _parse_container(tokens, index, 'for')
2132             items.append(container)
2133
2134         elif tok.token_string == 'if':
2135             (container, index) = _parse_container(tokens, index, 'if')
2136             items.append(container)
2137
2138         else:
2139             items.append(Atom(tok))
2140
2141         index += 1
2142
2143     return (None, None)
2144
2145
2146 def _parse_tokens(tokens):
2147     """Parse the tokens.
2148
2149     This converts the tokens into a form where we can manipulate them
2150     more easily.
2151
2152     """
2153
2154     index = 0
2155     parsed_tokens = []
2156
2157     num_tokens = len(tokens)
2158     while index < num_tokens:
2159         tok = Token(*tokens[index])
2160
2161         assert tok.token_type != token.INDENT
2162         if tok.token_type == tokenize.NEWLINE:
2163             # There's only one newline and it's at the end.
2164             break
2165
2166         if tok.token_string in '([{':
2167             (container, index) = _parse_container(tokens, index)
2168             if not container:
2169                 return None
2170             parsed_tokens.append(container)
2171         else:
2172             parsed_tokens.append(Atom(tok))
2173
2174         index += 1
2175
2176     return parsed_tokens
2177
2178
2179 def _reflow_lines(parsed_tokens, indentation, max_line_length,
2180                   start_on_prefix_line):
2181     """Reflow the lines so that it looks nice."""
2182
2183     if unicode(parsed_tokens[0]) == 'def':
2184         # A function definition gets indented a bit more.
2185         continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2186     else:
2187         continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2188
2189     break_after_open_bracket = not start_on_prefix_line
2190
2191     lines = ReformattedLines(max_line_length)
2192     lines.add_indent(len(indentation.lstrip('\r\n')))
2193
2194     if not start_on_prefix_line:
2195         # If splitting after the opening bracket will cause the first element
2196         # to be aligned weirdly, don't try it.
2197         first_token = get_item(parsed_tokens, 0)
2198         second_token = get_item(parsed_tokens, 1)
2199
2200         if (
2201             first_token and second_token and
2202             unicode(second_token)[0] == '(' and
2203             len(indentation) + len(first_token) + 1 == len(continued_indent)
2204         ):
2205             return None
2206
2207     for item in parsed_tokens:
2208         lines.add_space_if_needed(unicode(item), equal=True)
2209
2210         save_continued_indent = continued_indent
2211         if start_on_prefix_line and isinstance(item, Container):
2212             start_on_prefix_line = False
2213             continued_indent = ' ' * (lines.current_size() + 1)
2214
2215         item.reflow(lines, continued_indent, break_after_open_bracket)
2216         continued_indent = save_continued_indent
2217
2218     return lines.emit()
2219
2220
2221 def _shorten_line_at_tokens_new(tokens, source, indentation,
2222                                 max_line_length):
2223     """Shorten the line taking its length into account.
2224
2225     The input is expected to be free of newlines except for inside
2226     multiline strings and at the end.
2227
2228     """
2229     # Yield the original source so to see if it's a better choice than the
2230     # shortened candidate lines we generate here.
2231     yield indentation + source
2232
2233     parsed_tokens = _parse_tokens(tokens)
2234
2235     if parsed_tokens:
2236         # Perform two reflows. The first one starts on the same line as the
2237         # prefix. The second starts on the line after the prefix.
2238         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2239                               start_on_prefix_line=True)
2240         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2241             yield fixed
2242
2243         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2244                               start_on_prefix_line=False)
2245         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2246             yield fixed
2247
2248
2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2250                             key_token_strings, aggressive):
2251     """Separate line by breaking at tokens in key_token_strings.
2252
2253     The input is expected to be free of newlines except for inside
2254     multiline strings and at the end.
2255
2256     """
2257     offsets = []
2258     for (index, _t) in enumerate(token_offsets(tokens)):
2259         (token_type,
2260          token_string,
2261          start_offset,
2262          end_offset) = _t
2263
2264         assert token_type != token.INDENT
2265
2266         if token_string in key_token_strings:
2267             # Do not break in containers with zero or one items.
2268             unwanted_next_token = {
2269                 '(': ')',
2270                 '[': ']',
2271                 '{': '}'}.get(token_string)
2272             if unwanted_next_token:
2273                 if (
2274                     get_item(tokens,
2275                              index + 1,
2276                              default=[None, None])[1] == unwanted_next_token or
2277                     get_item(tokens,
2278                              index + 2,
2279                              default=[None, None])[1] == unwanted_next_token
2280                 ):
2281                     continue
2282
2283             if (
2284                 index > 2 and token_string == '(' and
2285                 tokens[index - 1][1] in ',(%['
2286             ):
2287                 # Don't split after a tuple start, or before a tuple start if
2288                 # the tuple is in a list.
2289                 continue
2290
2291             if end_offset < len(source) - 1:
2292                 # Don't split right before newline.
2293                 offsets.append(end_offset)
2294         else:
2295             # Break at adjacent strings. These were probably meant to be on
2296             # separate lines in the first place.
2297             previous_token = get_item(tokens, index - 1)
2298             if (
2299                 token_type == tokenize.STRING and
2300                 previous_token and previous_token[0] == tokenize.STRING
2301             ):
2302                 offsets.append(start_offset)
2303
2304     current_indent = None
2305     fixed = None
2306     for line in split_at_offsets(source, offsets):
2307         if fixed:
2308             fixed += '\n' + current_indent + line
2309
2310             for symbol in '([{':
2311                 if line.endswith(symbol):
2312                     current_indent += indent_word
2313         else:
2314             # First line.
2315             fixed = line
2316             assert not current_indent
2317             current_indent = indent_word
2318
2319     assert fixed is not None
2320
2321     if check_syntax(normalize_multiline(fixed)
2322                     if aggressive > 1 else fixed):
2323         return indentation + fixed
2324     else:
2325         return None
2326
2327
2328 def token_offsets(tokens):
2329     """Yield tokens and offsets."""
2330     end_offset = 0
2331     previous_end_row = 0
2332     previous_end_column = 0
2333     for t in tokens:
2334         token_type = t[0]
2335         token_string = t[1]
2336         (start_row, start_column) = t[2]
2337         (end_row, end_column) = t[3]
2338
2339         # Account for the whitespace between tokens.
2340         end_offset += start_column
2341         if previous_end_row == start_row:
2342             end_offset -= previous_end_column
2343
2344         # Record the start offset of the token.
2345         start_offset = end_offset
2346
2347         # Account for the length of the token itself.
2348         end_offset += len(token_string)
2349
2350         yield (token_type,
2351                token_string,
2352                start_offset,
2353                end_offset)
2354
2355         previous_end_row = end_row
2356         previous_end_column = end_column
2357
2358
2359 def normalize_multiline(line):
2360     """Normalize multiline-related code that will cause syntax error.
2361
2362     This is for purposes of checking syntax.
2363
2364     """
2365     if line.startswith('def ') and line.rstrip().endswith(':'):
2366         return line + ' pass'
2367     elif line.startswith('return '):
2368         return 'def _(): ' + line
2369     elif line.startswith('@'):
2370         return line + 'def _(): pass'
2371     elif line.startswith('class '):
2372         return line + ' pass'
2373     elif line.startswith('if '):
2374         return line + ' pass'
2375     else:
2376         return line
2377
2378
2379 def fix_whitespace(line, offset, replacement):
2380     """Replace whitespace at offset and return fixed line."""
2381     # Replace escaped newlines too
2382     left = line[:offset].rstrip('\n\r \t\\')
2383     right = line[offset:].lstrip('\n\r \t\\')
2384     if right.startswith('#'):
2385         return line
2386     else:
2387         return left + replacement + right
2388
2389
2390 def _execute_pep8(pep8_options, source):
2391     """Execute pep8 via python method calls."""
2392     class QuietReport(pep8.BaseReport):
2393
2394         """Version of checker that does not print."""
2395
2396         def __init__(self, options):
2397             super(QuietReport, self).__init__(options)
2398             self.__full_error_results = []
2399
2400         def error(self, line_number, offset, text, _):
2401             """Collect errors."""
2402             code = super(QuietReport, self).error(line_number, offset, text, _)
2403             if code:
2404                 self.__full_error_results.append(
2405                     {'id': code,
2406                      'line': line_number,
2407                      'column': offset + 1,
2408                      'info': text})
2409
2410         def full_error_results(self):
2411             """Return error results in detail.
2412
2413             Results are in the form of a list of dictionaries. Each
2414             dictionary contains 'id', 'line', 'column', and 'info'.
2415
2416             """
2417             return self.__full_error_results
2418
2419     checker = pep8.Checker('', lines=source,
2420                            reporter=QuietReport, **pep8_options)
2421     checker.check_all()
2422     return checker.report.full_error_results()
2423
2424
2425 def _remove_leading_and_normalize(line):
2426     return line.lstrip().rstrip(CR + LF) + '\n'
2427
2428
2429 class Reindenter(object):
2430
2431     """Reindents badly-indented code to uniformly use four-space indentation.
2432
2433     Released to the public domain, by Tim Peters, 03 October 2000.
2434
2435     """
2436
2437     def __init__(self, input_text):
2438         sio = io.StringIO(input_text)
2439         source_lines = sio.readlines()
2440
2441         self.string_content_line_numbers = multiline_string_lines(input_text)
2442
2443         # File lines, rstripped & tab-expanded. Dummy at start is so
2444         # that we can use tokenize's 1-based line numbering easily.
2445         # Note that a line is all-blank iff it is a newline.
2446         self.lines = []
2447         for line_number, line in enumerate(source_lines, start=1):
2448             # Do not modify if inside a multiline string.
2449             if line_number in self.string_content_line_numbers:
2450                 self.lines.append(line)
2451             else:
2452                 # Only expand leading tabs.
2453                 self.lines.append(_get_indentation(line).expandtabs() +
2454                                   _remove_leading_and_normalize(line))
2455
2456         self.lines.insert(0, None)
2457         self.index = 1  # index into self.lines of next line
2458         self.input_text = input_text
2459
2460     def run(self, indent_size=DEFAULT_INDENT_SIZE):
2461         """Fix indentation and return modified line numbers.
2462
2463         Line numbers are indexed at 1.
2464
2465         """
2466         if indent_size < 1:
2467             return self.input_text
2468
2469         try:
2470             stats = _reindent_stats(tokenize.generate_tokens(self.getline))
2471         except (SyntaxError, tokenize.TokenError):
2472             return self.input_text
2473         # Remove trailing empty lines.
2474         lines = self.lines
2475         while lines and lines[-1] == '\n':
2476             lines.pop()
2477         # Sentinel.
2478         stats.append((len(lines), 0))
2479         # Map count of leading spaces to # we want.
2480         have2want = {}
2481         # Program after transformation.
2482         after = []
2483         # Copy over initial empty lines -- there's nothing to do until
2484         # we see a line with *something* on it.
2485         i = stats[0][0]
2486         after.extend(lines[1:i])
2487         for i in range(len(stats) - 1):
2488             thisstmt, thislevel = stats[i]
2489             nextstmt = stats[i + 1][0]
2490             have = _leading_space_count(lines[thisstmt])
2491             want = thislevel * indent_size
2492             if want < 0:
2493                 # A comment line.
2494                 if have:
2495                     # An indented comment line. If we saw the same
2496                     # indentation before, reuse what it most recently
2497                     # mapped to.
2498                     want = have2want.get(have, -1)
2499                     if want < 0:
2500                         # Then it probably belongs to the next real stmt.
2501                         for j in range(i + 1, len(stats) - 1):
2502                             jline, jlevel = stats[j]
2503                             if jlevel >= 0:
2504                                 if have == _leading_space_count(lines[jline]):
2505                                     want = jlevel * indent_size
2506                                 break
2507                     if want < 0:            # Maybe it's a hanging
2508                                             # comment like this one,
2509                         # in which case we should shift it like its base
2510                         # line got shifted.
2511                         for j in range(i - 1, -1, -1):
2512                             jline, jlevel = stats[j]
2513                             if jlevel >= 0:
2514                                 want = (have + _leading_space_count(
2515                                         after[jline - 1]) -
2516                                         _leading_space_count(lines[jline]))
2517                                 break
2518                     if want < 0:
2519                         # Still no luck -- leave it alone.
2520                         want = have
2521                 else:
2522                     want = 0
2523             assert want >= 0
2524             have2want[have] = want
2525             diff = want - have
2526             if diff == 0 or have == 0:
2527                 after.extend(lines[thisstmt:nextstmt])
2528             else:
2529                 for line_number, line in enumerate(lines[thisstmt:nextstmt],
2530                                                    start=thisstmt):
2531                     if line_number in self.string_content_line_numbers:
2532                         after.append(line)
2533                     elif diff > 0:
2534                         if line == '\n':
2535                             after.append(line)
2536                         else:
2537                             after.append(' ' * diff + line)
2538                     else:
2539                         remove = min(_leading_space_count(line), -diff)
2540                         after.append(line[remove:])
2541
2542         return ''.join(after)
2543
2544     def getline(self):
2545         """Line-getter for tokenize."""
2546         if self.index >= len(self.lines):
2547             line = ''
2548         else:
2549             line = self.lines[self.index]
2550             self.index += 1
2551         return line
2552
2553
2554 def _reindent_stats(tokens):
2555     """Return list of (lineno, indentlevel) pairs.
2556
2557     One for each stmt and comment line. indentlevel is -1 for comment lines, as
2558     a signal that tokenize doesn't know what to do about them; indeed, they're
2559     our headache!
2560
2561     """
2562     find_stmt = 1  # Next token begins a fresh stmt?
2563     level = 0  # Current indent level.
2564     stats = []
2565
2566     for t in tokens:
2567         token_type = t[0]
2568         sline = t[2][0]
2569         line = t[4]
2570
2571         if token_type == tokenize.NEWLINE:
2572             # A program statement, or ENDMARKER, will eventually follow,
2573             # after some (possibly empty) run of tokens of the form
2574             #     (NL | COMMENT)* (INDENT | DEDENT+)?
2575             find_stmt = 1
2576
2577         elif token_type == tokenize.INDENT:
2578             find_stmt = 1
2579             level += 1
2580
2581         elif token_type == tokenize.DEDENT:
2582             find_stmt = 1
2583             level -= 1
2584
2585         elif token_type == tokenize.COMMENT:
2586             if find_stmt:
2587                 stats.append((sline, -1))
2588                 # But we're still looking for a new stmt, so leave
2589                 # find_stmt alone.
2590
2591         elif token_type == tokenize.NL:
2592             pass
2593
2594         elif find_stmt:
2595             # This is the first "real token" following a NEWLINE, so it
2596             # must be the first token of the next program statement, or an
2597             # ENDMARKER.
2598             find_stmt = 0
2599             if line:   # Not endmarker.
2600                 stats.append((sline, level))
2601
2602     return stats
2603
2604
2605 def _leading_space_count(line):
2606     """Return number of leading spaces in line."""
2607     i = 0
2608     while i < len(line) and line[i] == ' ':
2609         i += 1
2610     return i
2611
2612
2613 def refactor_with_2to3(source_text, fixer_names):
2614     """Use lib2to3 to refactor the source.
2615
2616     Return the refactored source code.
2617
2618     """
2619     from lib2to3.refactor import RefactoringTool
2620     fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
2621     tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
2622
2623     from lib2to3.pgen2 import tokenize as lib2to3_tokenize
2624     try:
2625         return unicode(tool.refactor_string(source_text, name=''))
2626     except lib2to3_tokenize.TokenError:
2627         return source_text
2628
2629
2630 def check_syntax(code):
2631     """Return True if syntax is okay."""
2632     try:
2633         return compile(code, '<string>', 'exec')
2634     except (SyntaxError, TypeError, UnicodeDecodeError):
2635         return False
2636
2637
2638 def filter_results(source, results, aggressive):
2639     """Filter out spurious reports from pep8.
2640
2641     If aggressive is True, we allow possibly unsafe fixes (E711, E712).
2642
2643     """
2644     non_docstring_string_line_numbers = multiline_string_lines(
2645         source, include_docstrings=False)
2646     all_string_line_numbers = multiline_string_lines(
2647         source, include_docstrings=True)
2648
2649     commented_out_code_line_numbers = commented_out_code_lines(source)
2650
2651     for r in results:
2652         issue_id = r['id'].lower()
2653
2654         if r['line'] in non_docstring_string_line_numbers:
2655             if issue_id.startswith(('e1', 'e501', 'w191')):
2656                 continue
2657
2658         if r['line'] in all_string_line_numbers:
2659             if issue_id in ['e501']:
2660                 continue
2661
2662         # We must offset by 1 for lines that contain the trailing contents of
2663         # multiline strings.
2664         if not aggressive and (r['line'] + 1) in all_string_line_numbers:
2665             # Do not modify multiline strings in non-aggressive mode. Remove
2666             # trailing whitespace could break doctests.
2667             if issue_id.startswith(('w29', 'w39')):
2668                 continue
2669
2670         if aggressive <= 0:
2671             if issue_id.startswith(('e711', 'w6')):
2672                 continue
2673
2674         if aggressive <= 1:
2675             if issue_id.startswith(('e712', 'e713')):
2676                 continue
2677
2678         if r['line'] in commented_out_code_line_numbers:
2679             if issue_id.startswith(('e26', 'e501')):
2680                 continue
2681
2682         yield r
2683
2684
2685 def multiline_string_lines(source, include_docstrings=False):
2686     """Return line numbers that are within multiline strings.
2687
2688     The line numbers are indexed at 1.
2689
2690     Docstrings are ignored.
2691
2692     """
2693     line_numbers = set()
2694     previous_token_type = ''
2695     try:
2696         for t in generate_tokens(source):
2697             token_type = t[0]
2698             start_row = t[2][0]
2699             end_row = t[3][0]
2700
2701             if token_type == tokenize.STRING and start_row != end_row:
2702                 if (
2703                     include_docstrings or
2704                     previous_token_type != tokenize.INDENT
2705                 ):
2706                     # We increment by one since we want the contents of the
2707                     # string.
2708                     line_numbers |= set(range(1 + start_row, 1 + end_row))
2709
2710             previous_token_type = token_type
2711     except (SyntaxError, tokenize.TokenError):
2712         pass
2713
2714     return line_numbers
2715
2716
2717 def commented_out_code_lines(source):
2718     """Return line numbers of comments that are likely code.
2719
2720     Commented-out code is bad practice, but modifying it just adds even more
2721     clutter.
2722
2723     """
2724     line_numbers = []
2725     try:
2726         for t in generate_tokens(source):
2727             token_type = t[0]
2728             token_string = t[1]
2729             start_row = t[2][0]
2730             line = t[4]
2731
2732             # Ignore inline comments.
2733             if not line.lstrip().startswith('#'):
2734                 continue
2735
2736             if token_type == tokenize.COMMENT:
2737                 stripped_line = token_string.lstrip('#').strip()
2738                 if (
2739                     ' ' in stripped_line and
2740                     '#' not in stripped_line and
2741                     check_syntax(stripped_line)
2742                 ):
2743                     line_numbers.append(start_row)
2744     except (SyntaxError, tokenize.TokenError):
2745         pass
2746
2747     return line_numbers
2748
2749
2750 def shorten_comment(line, max_line_length, last_comment=False):
2751     """Return trimmed or split long comment line.
2752
2753     If there are no comments immediately following it, do a text wrap.
2754     Doing this wrapping on all comments in general would lead to jagged
2755     comment text.
2756
2757     """
2758     assert len(line) > max_line_length
2759     line = line.rstrip()
2760
2761     # PEP 8 recommends 72 characters for comment text.
2762     indentation = _get_indentation(line) + '# '
2763     max_line_length = min(max_line_length,
2764                           len(indentation) + 72)
2765
2766     MIN_CHARACTER_REPEAT = 5
2767     if (
2768         len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
2769         not line[-1].isalnum()
2770     ):
2771         # Trim comments that end with things like ---------
2772         return line[:max_line_length] + '\n'
2773     elif last_comment and re.match(r'\s*#+\s*\w+', line):
2774         import textwrap
2775         split_lines = textwrap.wrap(line.lstrip(' \t#'),
2776                                     initial_indent=indentation,
2777                                     subsequent_indent=indentation,
2778                                     width=max_line_length,
2779                                     break_long_words=False,
2780                                     break_on_hyphens=False)
2781         return '\n'.join(split_lines) + '\n'
2782     else:
2783         return line + '\n'
2784
2785
2786 def normalize_line_endings(lines, newline):
2787     """Return fixed line endings.
2788
2789     All lines will be modified to use the most common line ending.
2790
2791     """
2792     return [line.rstrip('\n\r') + newline for line in lines]
2793
2794
2795 def mutual_startswith(a, b):
2796     return b.startswith(a) or a.startswith(b)
2797
2798
2799 def code_match(code, select, ignore):
2800     if ignore:
2801         assert not isinstance(ignore, unicode)
2802         for ignored_code in [c.strip() for c in ignore]:
2803             if mutual_startswith(code.lower(), ignored_code.lower()):
2804                 return False
2805
2806     if select:
2807         assert not isinstance(select, unicode)
2808         for selected_code in [c.strip() for c in select]:
2809             if mutual_startswith(code.lower(), selected_code.lower()):
2810                 return True
2811         return False
2812
2813     return True
2814
2815
2816 def fix_code(source, options=None):
2817     """Return fixed source code."""
2818     if not options:
2819         options = parse_args([''])
2820
2821     if not isinstance(source, unicode):
2822         source = source.decode(locale.getpreferredencoding())
2823
2824     sio = io.StringIO(source)
2825     return fix_lines(sio.readlines(), options=options)
2826
2827
2828 def fix_lines(source_lines, options, filename=''):
2829     """Return fixed source code."""
2830     # Transform everything to line feed. Then change them back to original
2831     # before returning fixed source code.
2832     original_newline = find_newline(source_lines)
2833     tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
2834
2835     # Keep a history to break out of cycles.
2836     previous_hashes = set()
2837
2838     if options.line_range:
2839         fixed_source = apply_local_fixes(tmp_source, options)
2840     else:
2841         # Apply global fixes only once (for efficiency).
2842         fixed_source = apply_global_fixes(tmp_source, options)
2843
2844     passes = 0
2845     long_line_ignore_cache = set()
2846     while hash(fixed_source) not in previous_hashes:
2847         if options.pep8_passes >= 0 and passes > options.pep8_passes:
2848             break
2849         passes += 1
2850
2851         previous_hashes.add(hash(fixed_source))
2852
2853         tmp_source = copy.copy(fixed_source)
2854
2855         fix = FixPEP8(
2856             filename,
2857             options,
2858             contents=tmp_source,
2859             long_line_ignore_cache=long_line_ignore_cache)
2860
2861         fixed_source = fix.fix()
2862
2863     sio = io.StringIO(fixed_source)
2864     return ''.join(normalize_line_endings(sio.readlines(), original_newline))
2865
2866
2867 def fix_file(filename, options=None, output=None):
2868     if not options:
2869         options = parse_args([filename])
2870
2871     original_source = readlines_from_file(filename)
2872
2873     fixed_source = original_source
2874
2875     if options.in_place or output:
2876         encoding = detect_encoding(filename)
2877
2878     if output:
2879         output = codecs.getwriter(encoding)(output.buffer
2880                                             if hasattr(output, 'buffer')
2881                                             else output)
2882
2883         output = LineEndingWrapper(output)
2884
2885     fixed_source = fix_lines(fixed_source, options, filename=filename)
2886
2887     if options.diff:
2888         new = io.StringIO(fixed_source)
2889         new = new.readlines()
2890         diff = get_diff_text(original_source, new, filename)
2891         if output:
2892             output.write(diff)
2893             output.flush()
2894         else:
2895             return diff
2896     elif options.in_place:
2897         fp = open_with_encoding(filename, encoding=encoding,
2898                                 mode='w')
2899         fp.write(fixed_source)
2900         fp.close()
2901     else:
2902         if output:
2903             output.write(fixed_source)
2904             output.flush()
2905         else:
2906             return fixed_source
2907
2908
2909 def global_fixes():
2910     """Yield multiple (code, function) tuples."""
2911     for function in globals().values():
2912         if inspect.isfunction(function):
2913             arguments = inspect.getargspec(function)[0]
2914             if arguments[:1] != ['source']:
2915                 continue
2916
2917             code = extract_code_from_function(function)
2918             if code:
2919                 yield (code, function)
2920
2921
2922 def apply_global_fixes(source, options, where='global'):
2923     """Run global fixes on source code.
2924
2925     These are fixes that only need be done once (unlike those in
2926     FixPEP8, which are dependent on pep8).
2927
2928     """
2929     if code_match('E101', select=options.select, ignore=options.ignore):
2930         source = reindent(source,
2931                           indent_size=options.indent_size)
2932
2933     for (code, function) in global_fixes():
2934         if code_match(code, select=options.select, ignore=options.ignore):
2935             if options.verbose:
2936                 print('--->  Applying {0} fix for {1}'.format(where,
2937                                                               code.upper()),
2938                       file=sys.stderr)
2939             source = function(source,
2940                               aggressive=options.aggressive)
2941
2942     source = fix_2to3(source,
2943                       aggressive=options.aggressive,
2944                       select=options.select,
2945                       ignore=options.ignore)
2946
2947     return source
2948
2949
2950 def apply_local_fixes(source, options):
2951     """Ananologus to apply_global_fixes, but runs only those which makes sense
2952     for the given line_range.
2953
2954     Do as much as we can without breaking code.
2955
2956     """
2957     def find_ge(a, x):
2958         """Find leftmost item greater than or equal to x."""
2959         i = bisect.bisect_left(a, x)
2960         if i != len(a):
2961             return i, a[i]
2962         return len(a) - 1, a[-1]
2963
2964     def find_le(a, x):
2965         """Find rightmost value less than or equal to x."""
2966         i = bisect.bisect_right(a, x)
2967         if i:
2968             return i - 1, a[i - 1]
2969         return 0, a[0]
2970
2971     def local_fix(source, start_log, end_log,
2972                   start_lines, end_lines, indents, last_line):
2973         """apply_global_fixes to the source between start_log and end_log.
2974
2975         The subsource must be the correct syntax of a complete python program
2976         (but all lines may share an indentation). The subsource's shared indent
2977         is removed, fixes are applied and the indent prepended back. Taking
2978         care to not reindent strings.
2979
2980         last_line is the strict cut off (options.line_range[1]), so that
2981         lines after last_line are not modified.
2982
2983         """
2984         if end_log < start_log:
2985             return source
2986
2987         ind = indents[start_log]
2988         indent = _get_indentation(source[start_lines[start_log]])
2989
2990         sl = slice(start_lines[start_log], end_lines[end_log] + 1)
2991
2992         subsource = source[sl]
2993         # Remove indent from subsource.
2994         if ind:
2995             for line_no in start_lines[start_log:end_log + 1]:
2996                 pos = line_no - start_lines[start_log]
2997                 subsource[pos] = subsource[pos][ind:]
2998
2999         # Fix indentation of subsource.
3000         fixed_subsource = apply_global_fixes(''.join(subsource),
3001                                              options,
3002                                              where='local')
3003         fixed_subsource = fixed_subsource.splitlines(True)
3004
3005         # Add back indent for non multi-line strings lines.
3006         msl = multiline_string_lines(''.join(fixed_subsource),
3007                                      include_docstrings=False)
3008         for i, line in enumerate(fixed_subsource):
3009             if not i + 1 in msl:
3010                 fixed_subsource[i] = indent + line if line != '\n' else line
3011
3012         # We make a special case to look at the final line, if it's a multiline
3013         # *and* the cut off is somewhere inside it, we take the fixed
3014         # subset up until last_line, this assumes that the number of lines
3015         # does not change in this multiline line.
3016         changed_lines = len(fixed_subsource)
3017         if (start_lines[end_log] != end_lines[end_log]
3018                 and end_lines[end_log] > last_line):
3019             after_end = end_lines[end_log] - last_line
3020             fixed_subsource = (fixed_subsource[:-after_end] +
3021                                source[sl][-after_end:])
3022             changed_lines -= after_end
3023
3024             options.line_range[1] = (options.line_range[0] +
3025                                      changed_lines - 1)
3026
3027         return (source[:start_lines[start_log]] +
3028                 fixed_subsource +
3029                 source[end_lines[end_log] + 1:])
3030
3031     def is_continued_stmt(line,
3032                           continued_stmts=frozenset(['else', 'elif',
3033                                                      'finally', 'except'])):
3034         return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
3035
3036     assert options.line_range
3037     start, end = options.line_range
3038     start -= 1
3039     end -= 1
3040     last_line = end  # We shouldn't modify lines after this cut-off.
3041
3042     try:
3043         logical = _find_logical(source)
3044     except (SyntaxError, tokenize.TokenError):
3045         return ''.join(source)
3046
3047     if not logical[0]:
3048         # Just blank lines, this should imply that it will become '\n' ?
3049         return apply_global_fixes(source, options)
3050
3051     start_lines, indents = zip(*logical[0])
3052     end_lines, _ = zip(*logical[1])
3053
3054     source = source.splitlines(True)
3055
3056     start_log, start = find_ge(start_lines, start)
3057     end_log, end = find_le(start_lines, end)
3058
3059     # Look behind one line, if it's indented less than current indent
3060     # then we can move to this previous line knowing that its
3061     # indentation level will not be changed.
3062     if (start_log > 0
3063             and indents[start_log - 1] < indents[start_log]
3064             and not is_continued_stmt(source[start_log - 1])):
3065         start_log -= 1
3066         start = start_lines[start_log]
3067
3068     while start < end:
3069
3070         if is_continued_stmt(source[start]):
3071             start_log += 1
3072             start = start_lines[start_log]
3073             continue
3074
3075         ind = indents[start_log]
3076         for t in itertools.takewhile(lambda t: t[1][1] >= ind,
3077                                      enumerate(logical[0][start_log:])):
3078             n_log, n = start_log + t[0], t[1][0]
3079         # start shares indent up to n.
3080
3081         if n <= end:
3082             source = local_fix(source, start_log, n_log,
3083                                start_lines, end_lines,
3084                                indents, last_line)
3085             start_log = n_log if n == end else n_log + 1
3086             start = start_lines[start_log]
3087             continue
3088
3089         else:
3090             # Look at the line after end and see if allows us to reindent.
3091             after_end_log, after_end = find_ge(start_lines, end + 1)
3092
3093             if indents[after_end_log] > indents[start_log]:
3094                 start_log, start = find_ge(start_lines, start + 1)
3095                 continue
3096
3097             if (indents[after_end_log] == indents[start_log]
3098                     and is_continued_stmt(source[after_end])):
3099                 # find n, the beginning of the last continued statement
3100                 # Apply fix to previous block if there is one.
3101                 only_block = True
3102                 for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
3103                     if n_ind == ind and not is_continued_stmt(source[n]):
3104                         n_log = start_lines.index(n)
3105                         source = local_fix(source, start_log, n_log - 1,
3106                                            start_lines, end_lines,
3107                                            indents, last_line)
3108                         start_log = n_log + 1
3109                         start = start_lines[start_log]
3110                         only_block = False
3111                         break
3112                 if only_block:
3113                     end_log, end = find_le(start_lines, end - 1)
3114                 continue
3115
3116             source = local_fix(source, start_log, end_log,
3117                                start_lines, end_lines,
3118                                indents, last_line)
3119             break
3120
3121     return ''.join(source)
3122
3123
3124 def extract_code_from_function(function):
3125     """Return code handled by function."""
3126     if not function.__name__.startswith('fix_'):
3127         return None
3128
3129     code = re.sub('^fix_', '', function.__name__)
3130     if not code:
3131         return None
3132
3133     try:
3134         int(code[1:])
3135     except ValueError:
3136         return None
3137
3138     return code
3139
3140
3141 def create_parser():
3142     """Return command-line parser."""
3143     # Do import locally to be friendly to those who use autopep8 as a library
3144     # and are supporting Python 2.6.
3145     import argparse
3146
3147     parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3148                                      prog='autopep8')
3149     parser.add_argument('--version', action='version',
3150                         version='%(prog)s ' + __version__)
3151     parser.add_argument('-v', '--verbose', action='count', dest='verbose',
3152                         default=0,
3153                         help='print verbose messages; '
3154                         'multiple -v result in more verbose messages')
3155     parser.add_argument('-d', '--diff', action='store_true', dest='diff',
3156                         help='print the diff for the fixed source')
3157     parser.add_argument('-i', '--in-place', action='store_true',
3158                         help='make changes to files in place')
3159     parser.add_argument('-r', '--recursive', action='store_true',
3160                         help='run recursively over directories; '
3161                         'must be used with --in-place or --diff')
3162     parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3163                         help='number of parallel jobs; '
3164                         'match CPU count if value is less than 1')
3165     parser.add_argument('-p', '--pep8-passes', metavar='n',
3166                         default=-1, type=int,
3167                         help='maximum number of additional pep8 passes '
3168                         '(default: infinite)')
3169     parser.add_argument('-a', '--aggressive', action='count', default=0,
3170                         help='enable non-whitespace changes; '
3171                         'multiple -a result in more aggressive changes')
3172     parser.add_argument('--experimental', action='store_true',
3173                         help='enable experimental fixes')
3174     parser.add_argument('--exclude', metavar='globs',
3175                         help='exclude file/directory names that match these '
3176                         'comma-separated globs')
3177     parser.add_argument('--list-fixes', action='store_true',
3178                         help='list codes for fixes; '
3179                         'used by --ignore and --select')
3180     parser.add_argument('--ignore', metavar='errors', default='',
3181                         help='do not fix these errors/warnings '
3182                         '(default: {0})'.format(DEFAULT_IGNORE))
3183     parser.add_argument('--select', metavar='errors', default='',
3184                         help='fix only these errors/warnings (e.g. E4,W)')
3185     parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3186                         help='set maximum allowed line length '
3187                         '(default: %(default)s)')
3188     parser.add_argument('--range', metavar='line', dest='line_range',
3189                         default=None, type=int, nargs=2,
3190                         help='only fix errors found within this inclusive '
3191                         'range of line numbers (e.g. 1 99); '
3192                         'line numbers are indexed at 1')
3193     parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3194                         type=int, metavar='n',
3195                         help='number of spaces per indent level '
3196                              '(default %(default)s)')
3197     parser.add_argument('files', nargs='*',
3198                         help="files to format or '-' for standard in")
3199
3200     return parser
3201
3202
3203 def parse_args(arguments):
3204     """Parse command-line options."""
3205     parser = create_parser()
3206     args = parser.parse_args(arguments)
3207
3208     if not args.files and not args.list_fixes:
3209         parser.error('incorrect number of arguments')
3210
3211     args.files = [decode_filename(name) for name in args.files]
3212
3213     if '-' in args.files:
3214         if len(args.files) > 1:
3215             parser.error('cannot mix stdin and regular files')
3216
3217         if args.diff:
3218             parser.error('--diff cannot be used with standard input')
3219
3220         if args.in_place:
3221             parser.error('--in-place cannot be used with standard input')
3222
3223         if args.recursive:
3224             parser.error('--recursive cannot be used with standard input')
3225
3226     if len(args.files) > 1 and not (args.in_place or args.diff):
3227         parser.error('autopep8 only takes one filename as argument '
3228                      'unless the "--in-place" or "--diff" args are '
3229                      'used')
3230
3231     if args.recursive and not (args.in_place or args.diff):
3232         parser.error('--recursive must be used with --in-place or --diff')
3233
3234     if args.exclude and not args.recursive:
3235         parser.error('--exclude is only relevant when used with --recursive')
3236
3237     if args.in_place and args.diff:
3238         parser.error('--in-place and --diff are mutually exclusive')
3239
3240     if args.max_line_length <= 0:
3241         parser.error('--max-line-length must be greater than 0')
3242
3243     if args.select:
3244         args.select = args.select.split(',')
3245
3246     if args.ignore:
3247         args.ignore = args.ignore.split(',')
3248     elif not args.select:
3249         if args.aggressive:
3250             # Enable everything by default if aggressive.
3251             args.select = ['E', 'W']
3252         else:
3253             args.ignore = DEFAULT_IGNORE.split(',')
3254
3255     if args.exclude:
3256         args.exclude = args.exclude.split(',')
3257     else:
3258         args.exclude = []
3259
3260     if args.jobs < 1:
3261         # Do not import multiprocessing globally in case it is not supported
3262         # on the platform.
3263         import multiprocessing
3264         args.jobs = multiprocessing.cpu_count()
3265
3266     if args.jobs > 1 and not args.in_place:
3267         parser.error('parallel jobs requires --in-place')
3268
3269     if args.line_range:
3270         if args.line_range[0] <= 0:
3271             parser.error('--range must be positive numbers')
3272         if args.line_range[0] > args.line_range[1]:
3273             parser.error('First value of --range should be less than or equal '
3274                          'to the second')
3275
3276     return args
3277
3278
3279 def decode_filename(filename):
3280     """Return Unicode filename."""
3281     if isinstance(filename, unicode):
3282         return filename
3283     else:
3284         return filename.decode(sys.getfilesystemencoding())
3285
3286
3287 def supported_fixes():
3288     """Yield pep8 error codes that autopep8 fixes.
3289
3290     Each item we yield is a tuple of the code followed by its
3291     description.
3292
3293     """
3294     yield ('E101', docstring_summary(reindent.__doc__))
3295
3296     instance = FixPEP8(filename=None, options=None, contents='')
3297     for attribute in dir(instance):
3298         code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
3299         if code:
3300             yield (
3301                 code.group(1).upper(),
3302                 re.sub(r'\s+', ' ',
3303                        docstring_summary(getattr(instance, attribute).__doc__))
3304             )
3305
3306     for (code, function) in sorted(global_fixes()):
3307         yield (code.upper() + (4 - len(code)) * ' ',
3308                re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
3309
3310     for code in sorted(CODE_TO_2TO3):
3311         yield (code.upper() + (4 - len(code)) * ' ',
3312                re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
3313
3314
3315 def docstring_summary(docstring):
3316     """Return summary of docstring."""
3317     return docstring.split('\n')[0]
3318
3319
3320 def line_shortening_rank(candidate, indent_word, max_line_length,
3321                          experimental=False):
3322     """Return rank of candidate.
3323
3324     This is for sorting candidates.
3325
3326     """
3327     if not candidate.strip():
3328         return 0
3329
3330     rank = 0
3331     lines = candidate.split('\n')
3332
3333     offset = 0
3334     if (
3335         not lines[0].lstrip().startswith('#') and
3336         lines[0].rstrip()[-1] not in '([{'
3337     ):
3338         for (opening, closing) in ('()', '[]', '{}'):
3339             # Don't penalize empty containers that aren't split up. Things like
3340             # this "foo(\n    )" aren't particularly good.
3341             opening_loc = lines[0].find(opening)
3342             closing_loc = lines[0].find(closing)
3343             if opening_loc >= 0:
3344                 if closing_loc < 0 or closing_loc != opening_loc + 1:
3345                     offset = max(offset, 1 + opening_loc)
3346
3347     current_longest = max(offset + len(x.strip()) for x in lines)
3348
3349     rank += 4 * max(0, current_longest - max_line_length)
3350
3351     rank += len(lines)
3352
3353     # Too much variation in line length is ugly.
3354     rank += 2 * standard_deviation(len(line) for line in lines)
3355
3356     bad_staring_symbol = {
3357         '(': ')',
3358         '[': ']',
3359         '{': '}'}.get(lines[0][-1])
3360
3361     if len(lines) > 1:
3362         if (
3363             bad_staring_symbol and
3364             lines[1].lstrip().startswith(bad_staring_symbol)
3365         ):
3366             rank += 20
3367
3368     for lineno, current_line in enumerate(lines):
3369         current_line = current_line.strip()
3370
3371         if current_line.startswith('#'):
3372             continue
3373
3374         for bad_start in ['.', '%', '+', '-', '/']:
3375             if current_line.startswith(bad_start):
3376                 rank += 100
3377
3378             # Do not tolerate operators on their own line.
3379             if current_line == bad_start:
3380                 rank += 1000
3381
3382         if current_line.endswith(('(', '[', '{', '.')):
3383             # Avoid lonely opening. They result in longer lines.
3384             if len(current_line) <= len(indent_word):
3385                 rank += 100
3386
3387             # Avoid the ugliness of ", (\n".
3388             if (
3389                 current_line.endswith('(') and
3390                 current_line[:-1].rstrip().endswith(',')
3391             ):
3392                 rank += 100
3393
3394             # Also avoid the ugliness of "foo.\nbar"
3395             if current_line.endswith('.'):
3396                 rank += 100
3397
3398             if has_arithmetic_operator(current_line):
3399                 rank += 100
3400
3401         if current_line.endswith(('%', '(', '[', '{')):
3402             rank -= 20
3403
3404         # Try to break list comprehensions at the "for".
3405         if current_line.startswith('for '):
3406             rank -= 50
3407
3408         if current_line.endswith('\\'):
3409             # If a line ends in \-newline, it may be part of a
3410             # multiline string. In that case, we would like to know
3411             # how long that line is without the \-newline. If it's
3412             # longer than the maximum, or has comments, then we assume
3413             # that the \-newline is an okay candidate and only
3414             # penalize it a bit.
3415             total_len = len(current_line)
3416             lineno += 1
3417             while lineno < len(lines):
3418                 total_len += len(lines[lineno])
3419
3420                 if lines[lineno].lstrip().startswith('#'):
3421                     total_len = max_line_length
3422                     break
3423
3424                 if not lines[lineno].endswith('\\'):
3425                     break
3426
3427                 lineno += 1
3428
3429             if total_len < max_line_length:
3430                 rank += 10
3431             else:
3432                 rank += 100 if experimental else 1
3433
3434         # Prefer breaking at commas rather than colon.
3435         if ',' in current_line and current_line.endswith(':'):
3436             rank += 10
3437
3438         rank += 10 * count_unbalanced_brackets(current_line)
3439
3440     return max(0, rank)
3441
3442
3443 def standard_deviation(numbers):
3444     """Return standard devation."""
3445     numbers = list(numbers)
3446     if not numbers:
3447         return 0
3448     mean = sum(numbers) / len(numbers)
3449     return (sum((n - mean) ** 2 for n in numbers) /
3450             len(numbers)) ** .5
3451
3452
3453 def has_arithmetic_operator(line):
3454     """Return True if line contains any arithmetic operators."""
3455     for operator in pep8.ARITHMETIC_OP:
3456         if operator in line:
3457             return True
3458
3459     return False
3460
3461
3462 def count_unbalanced_brackets(line):
3463     """Return number of unmatched open/close brackets."""
3464     count = 0
3465     for opening, closing in ['()', '[]', '{}']:
3466         count += abs(line.count(opening) - line.count(closing))
3467
3468     return count
3469
3470
3471 def split_at_offsets(line, offsets):
3472     """Split line at offsets.
3473
3474     Return list of strings.
3475
3476     """
3477     result = []
3478
3479     previous_offset = 0
3480     current_offset = 0
3481     for current_offset in sorted(offsets):
3482         if current_offset < len(line) and previous_offset != current_offset:
3483             result.append(line[previous_offset:current_offset].strip())
3484         previous_offset = current_offset
3485
3486     result.append(line[current_offset:])
3487
3488     return result
3489
3490
3491 class LineEndingWrapper(object):
3492
3493     r"""Replace line endings to work with sys.stdout.
3494
3495     It seems that sys.stdout expects only '\n' as the line ending, no matter
3496     the platform. Otherwise, we get repeated line endings.
3497
3498     """
3499
3500     def __init__(self, output):
3501         self.__output = output
3502
3503     def write(self, s):
3504         self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
3505
3506     def flush(self):
3507         self.__output.flush()
3508
3509
3510 def match_file(filename, exclude):
3511     """Return True if file is okay for modifying/recursing."""
3512     base_name = os.path.basename(filename)
3513
3514     if base_name.startswith('.'):
3515         return False
3516
3517     for pattern in exclude:
3518         if fnmatch.fnmatch(base_name, pattern):
3519             return False
3520
3521     if not os.path.isdir(filename) and not is_python_file(filename):
3522         return False
3523
3524     return True
3525
3526
3527 def find_files(filenames, recursive, exclude):
3528     """Yield filenames."""
3529     while filenames:
3530         name = filenames.pop(0)
3531         if recursive and os.path.isdir(name):
3532             for root, directories, children in os.walk(name):
3533                 filenames += [os.path.join(root, f) for f in children
3534                               if match_file(os.path.join(root, f),
3535                                             exclude)]
3536                 directories[:] = [d for d in directories
3537                                   if match_file(os.path.join(root, d),
3538                                                 exclude)]
3539         else:
3540             yield name
3541
3542
3543 def _fix_file(parameters):
3544     """Helper function for optionally running fix_file() in parallel."""
3545     if parameters[1].verbose:
3546         print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
3547     try:
3548         fix_file(*parameters)
3549     except IOError as error:
3550         print(unicode(error), file=sys.stderr)
3551
3552
3553 def fix_multiple_files(filenames, options, output=None):
3554     """Fix list of files.
3555
3556     Optionally fix files recursively.
3557
3558     """
3559     filenames = find_files(filenames, options.recursive, options.exclude)
3560     if options.jobs > 1:
3561         import multiprocessing
3562         pool = multiprocessing.Pool(options.jobs)
3563         pool.map(_fix_file,
3564                  [(name, options) for name in filenames])
3565     else:
3566         for name in filenames:
3567             _fix_file((name, options, output))
3568
3569
3570 def is_python_file(filename):
3571     """Return True if filename is Python file."""
3572     if filename.endswith('.py'):
3573         return True
3574
3575     try:
3576         with open_with_encoding(filename) as f:
3577             first_line = f.readlines(1)[0]
3578     except (IOError, IndexError):
3579         return False
3580
3581     if not PYTHON_SHEBANG_REGEX.match(first_line):
3582         return False
3583
3584     return True
3585
3586
3587 def is_probably_part_of_multiline(line):
3588     """Return True if line is likely part of a multiline string.
3589
3590     When multiline strings are involved, pep8 reports the error as being
3591     at the start of the multiline string, which doesn't work for us.
3592
3593     """
3594     return (
3595         '"""' in line or
3596         "'''" in line or
3597         line.rstrip().endswith('\\')
3598     )
3599
3600
3601 def main():
3602     """Tool main."""
3603     try:
3604         # Exit on broken pipe.
3605         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
3606     except AttributeError:  # pragma: no cover
3607         # SIGPIPE is not available on Windows.
3608         pass
3609
3610     try:
3611         args = parse_args(sys.argv[1:])
3612
3613         if args.list_fixes:
3614             for code, description in sorted(supported_fixes()):
3615                 print('{code} - {description}'.format(
3616                     code=code, description=description))
3617             return 0
3618
3619         if args.files == ['-']:
3620             assert not args.in_place
3621
3622             # LineEndingWrapper is unnecessary here due to the symmetry between
3623             # standard in and standard out.
3624             sys.stdout.write(fix_code(sys.stdin.read(), args))
3625         else:
3626             if args.in_place or args.diff:
3627                 args.files = list(set(args.files))
3628             else:
3629                 assert len(args.files) == 1
3630                 assert not args.recursive
3631
3632             fix_multiple_files(args.files, args, sys.stdout)
3633     except KeyboardInterrupt:
3634         return 1  # pragma: no cover
3635
3636
3637 class CachedTokenizer(object):
3638
3639     """A one-element cache around tokenize.generate_tokens().
3640
3641     Original code written by Ned Batchelder, in coverage.py.
3642
3643     """
3644
3645     def __init__(self):
3646         self.last_text = None
3647         self.last_tokens = None
3648
3649     def generate_tokens(self, text):
3650         """A stand-in for tokenize.generate_tokens()."""
3651         if text != self.last_text:
3652             string_io = io.StringIO(text)
3653             self.last_tokens = list(
3654                 tokenize.generate_tokens(string_io.readline)
3655             )
3656             self.last_text = text
3657         return self.last_tokens
3658
3659 _cached_tokenizer = CachedTokenizer()
3660 generate_tokens = _cached_tokenizer.generate_tokens
3661
3662
3663 if __name__ == '__main__':
3664     sys.exit(main())