1 # Copyright (C) 2010-2011 Hideo Hattori
2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 """Automatically formats Python code to conform to the PEP 8 style guide.
27 Fixes that only need be done once can be added by adding a function of the form
28 "fix_<code>(source)" to this module. They should return the fixed source code.
29 These fixes are picked up by apply_global_fixes().
31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
32 documentation for more information.
36 from __future__ import absolute_import
37 from __future__ import division
38 from __future__ import print_function
39 from __future__ import unicode_literals
76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
79 # For generating line shortening candidates.
80 SHORTEN_OPERATOR_GROUPS = frozenset([
83 frozenset([',', '(', '[', '{']),
84 frozenset(['%', '(', '[', '{']),
85 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
86 frozenset(['%', '+', '-', '*', '/', '//']),
90 DEFAULT_IGNORE = 'E24'
91 DEFAULT_INDENT_SIZE = 4
94 # W602 is handled separately due to the need to avoid "with_traceback".
116 def open_with_encoding(filename, encoding=None, mode='r'):
117 """Return opened file with a specific encoding."""
119 encoding = detect_encoding(filename)
121 return io.open(filename, mode=mode, encoding=encoding,
122 newline='') # Preserve line endings
125 def detect_encoding(filename):
126 """Return file encoding."""
128 with open(filename, 'rb') as input_file:
129 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
130 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
132 # Check for correctness of encoding
133 with open_with_encoding(filename, encoding) as test_file:
137 except (LookupError, SyntaxError, UnicodeDecodeError):
141 def readlines_from_file(filename):
142 """Return contents of file."""
143 with open_with_encoding(filename) as input_file:
144 return input_file.readlines()
147 def extended_blank_lines(logical_line,
151 """Check for missing blank lines after class declaration."""
152 if previous_logical.startswith('class '):
154 logical_line.startswith(('def ', 'class ', '@')) or
155 pep8.DOCSTRING_REGEX.match(logical_line)
157 if indent_level and not blank_lines:
158 yield (0, 'E309 expected 1 blank line after class declaration')
159 elif previous_logical.startswith('def '):
160 if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
161 yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
162 elif pep8.DOCSTRING_REGEX.match(previous_logical):
163 # Missing blank line between class docstring and method declaration.
167 logical_line.startswith(('def ')) and
168 '(self' in logical_line
170 yield (0, 'E301 expected 1 blank line, found 0')
171 pep8.register_check(extended_blank_lines)
174 def continued_indentation(logical_line, tokens, indent_level, indent_char,
176 """Override pep8's function to provide indentation information."""
177 first_row = tokens[0][2][0]
178 nrows = 1 + tokens[-1][2][0] - first_row
179 if noqa or nrows == 1:
182 # indent_next tells us whether the next block is indented. Assuming
183 # that it is indented by 4 spaces, then we should not allow 4-space
184 # indents on the final continuation line. In turn, some other
185 # indents are allowed to have an extra 4 spaces.
186 indent_next = logical_line.endswith(':')
190 (DEFAULT_INDENT_SIZE,)
191 if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
192 2 * DEFAULT_INDENT_SIZE)
195 # Remember how many brackets were opened on each line.
198 # Relative indents of physical lines.
199 rel_indent = [0] * nrows
201 # For each depth, collect a list of opening rows.
203 # For each depth, memorize the hanging indentation.
208 last_indent = tokens[0][2]
209 indent = [last_indent[1]]
211 last_token_multiline = None
214 last_line_begins_with_multiline = False
215 for token_type, text, start, end, line in tokens:
217 newline = row < start[0] - first_row
219 row = start[0] - first_row
220 newline = (not last_token_multiline and
221 token_type not in (tokenize.NL, tokenize.NEWLINE))
222 last_line_begins_with_multiline = last_token_multiline
225 # This is the beginning of a continuation line.
228 # Record the initial indent.
229 rel_indent[row] = pep8.expand_indent(line) - indent_level
231 # Identify closing bracket.
232 close_bracket = (token_type == tokenize.OP and text in ']})')
234 # Is the indent relative to an opening bracket line?
235 for open_row in reversed(open_rows[depth]):
236 hang = rel_indent[row] - rel_indent[open_row]
237 hanging_indent = hang in valid_hangs
241 hanging_indent = (hang == hangs[depth])
243 visual_indent = (not close_bracket and hang > 0 and
244 indent_chances.get(start[1]))
246 if close_bracket and indent[depth]:
247 # Closing bracket for visual indent.
248 if start[1] != indent[depth]:
249 yield (start, 'E124 {0}'.format(indent[depth]))
250 elif close_bracket and not hang:
252 elif indent[depth] and start[1] < indent[depth]:
253 # Visual indent is broken.
254 yield (start, 'E128 {0}'.format(indent[depth]))
255 elif (hanging_indent or
257 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
258 # Hanging indent is verified.
260 yield (start, 'E123 {0}'.format(indent_level +
261 rel_indent[open_row]))
263 elif visual_indent is True:
264 # Visual indent is verified.
265 indent[depth] = start[1]
266 elif visual_indent in (text, unicode):
267 # Ignore token lined up with matching one from a previous line.
270 one_indented = (indent_level + rel_indent[open_row] +
274 error = ('E122', one_indented)
276 error = ('E127', indent[depth])
277 elif hang > DEFAULT_INDENT_SIZE:
278 error = ('E126', one_indented)
281 error = ('E121', one_indented)
283 yield (start, '{0} {1}'.format(*error))
285 # Look for visual indenting.
286 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
287 and not indent[depth]):
288 indent[depth] = start[1]
289 indent_chances[start[1]] = True
290 # Deal with implicit string concatenation.
291 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
292 text in ('u', 'ur', 'b', 'br')):
293 indent_chances[start[1]] = unicode
294 # Special case for the "if" statement because len("if (") is equal to
296 elif not indent_chances and not row and not depth and text == 'if':
297 indent_chances[end[1] + 1] = True
298 elif text == ':' and line[end[1]:].isspace():
299 open_rows[depth].append(row)
301 # Keep track of bracket depth.
302 if token_type == tokenize.OP:
307 if len(open_rows) == depth:
309 open_rows[depth].append(row)
311 elif text in ')]}' and depth > 0:
312 # Parent indents should not be more than this one.
313 prev_indent = indent.pop() or last_indent[1]
315 for d in range(depth):
316 if indent[d] > prev_indent:
318 for ind in list(indent_chances):
319 if ind >= prev_indent:
320 del indent_chances[ind]
321 del open_rows[depth + 1:]
324 indent_chances[indent[depth]] = True
325 for idx in range(row, -1, -1):
329 assert len(indent) == depth + 1
331 start[1] not in indent_chances and
332 # This is for purposes of speeding up E121 (GitHub #90).
333 not last_line.rstrip().endswith(',')
335 # Allow to line up tokens.
336 indent_chances[start[1]] = text
338 last_token_multiline = (start[0] != end[0])
339 if last_token_multiline:
340 rel_indent[end[0] - first_row] = rel_indent[row]
346 not last_line_begins_with_multiline and
347 pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
349 pos = (start[0], indent[0] + 4)
350 yield (pos, 'E125 {0}'.format(indent_level +
351 2 * DEFAULT_INDENT_SIZE))
352 del pep8._checks['logical_line'][pep8.continued_indentation]
353 pep8.register_check(continued_indentation)
356 class FixPEP8(object):
360 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
363 The fixer method can take either one or two arguments (in addition to
364 self). The first argument is "result", which is the error information from
365 pep8. The second argument, "logical", is required only for logical-line
368 The fixer method can return the list of modified lines or None. An empty
369 list would mean that no changes were made. None would mean that only the
370 line reported in the pep8 error was modified. Note that the modified line
371 numbers that are returned are indexed at 1. This typically would correspond
372 with the line number reported in the pep8 error information.
375 - e121,e122,e123,e124,e125,e126,e127,e128,e129
378 - e221,e222,e223,e224,e225
382 - e271,e272,e273,e274
392 def __init__(self, filename,
395 long_line_ignore_cache=None):
396 self.filename = filename
398 self.source = readlines_from_file(filename)
400 sio = io.StringIO(contents)
401 self.source = sio.readlines()
402 self.options = options
403 self.indent_word = _get_indentword(''.join(self.source))
405 self.long_line_ignore_cache = (
406 set() if long_line_ignore_cache is None
407 else long_line_ignore_cache)
409 # Many fixers are the same even though pep8 categorizes them
411 self.fix_e115 = self.fix_e112
412 self.fix_e116 = self.fix_e113
413 self.fix_e121 = self._fix_reindent
414 self.fix_e122 = self._fix_reindent
415 self.fix_e123 = self._fix_reindent
416 self.fix_e124 = self._fix_reindent
417 self.fix_e126 = self._fix_reindent
418 self.fix_e127 = self._fix_reindent
419 self.fix_e128 = self._fix_reindent
420 self.fix_e129 = self._fix_reindent
421 self.fix_e202 = self.fix_e201
422 self.fix_e203 = self.fix_e201
423 self.fix_e211 = self.fix_e201
424 self.fix_e221 = self.fix_e271
425 self.fix_e222 = self.fix_e271
426 self.fix_e223 = self.fix_e271
427 self.fix_e226 = self.fix_e225
428 self.fix_e227 = self.fix_e225
429 self.fix_e228 = self.fix_e225
430 self.fix_e241 = self.fix_e271
431 self.fix_e242 = self.fix_e224
432 self.fix_e261 = self.fix_e262
433 self.fix_e272 = self.fix_e271
434 self.fix_e273 = self.fix_e271
435 self.fix_e274 = self.fix_e271
436 self.fix_e309 = self.fix_e301
438 self.fix_long_line_logically if
439 options and (options.aggressive >= 2 or options.experimental) else
440 self.fix_long_line_physically)
441 self.fix_e703 = self.fix_e702
443 self._ws_comma_done = False
445 def _fix_source(self, results):
447 (logical_start, logical_end) = _find_logical(self.source)
448 logical_support = True
449 except (SyntaxError, tokenize.TokenError): # pragma: no cover
450 logical_support = False
452 completed_lines = set()
453 for result in sorted(results, key=_priority_key):
454 if result['line'] in completed_lines:
457 fixed_methodname = 'fix_' + result['id'].lower()
458 if hasattr(self, fixed_methodname):
459 fix = getattr(self, fixed_methodname)
461 line_index = result['line'] - 1
462 original_line = self.source[line_index]
464 is_logical_fix = len(inspect.getargspec(fix).args) > 2
468 logical = _get_logical(self.source,
472 if logical and set(range(
474 logical[1][0] + 1)).intersection(
478 modified_lines = fix(result, logical)
480 modified_lines = fix(result)
482 if modified_lines is None:
483 # Force logical fixes to report what they modified.
484 assert not is_logical_fix
486 if self.source[line_index] == original_line:
490 completed_lines.update(modified_lines)
491 elif modified_lines == []: # Empty list means no fix
492 if self.options.verbose >= 2:
494 '---> Not fixing {f} on line {l}'.format(
495 f=result['id'], l=result['line']),
497 else: # We assume one-line fix when None.
498 completed_lines.add(result['line'])
500 if self.options.verbose >= 3:
502 "---> '{0}' is not defined.".format(fixed_methodname),
505 info = result['info'].strip()
506 print('---> {0}:{1}:{2}:{3}'.format(self.filename,
513 """Return a version of the source code with PEP 8 violations fixed."""
515 'ignore': self.options.ignore,
516 'select': self.options.select,
517 'max_line_length': self.options.max_line_length,
519 results = _execute_pep8(pep8_options, self.source)
521 if self.options.verbose:
524 if r['id'] not in progress:
525 progress[r['id']] = set()
526 progress[r['id']].add(r['line'])
527 print('---> {n} issue(s) to fix {progress}'.format(
528 n=len(results), progress=progress), file=sys.stderr)
530 if self.options.line_range:
531 start, end = self.options.line_range
532 results = [r for r in results
533 if start <= r['line'] <= end]
535 self._fix_source(filter_results(source=''.join(self.source),
537 aggressive=self.options.aggressive))
539 if self.options.line_range:
540 # If number of lines has changed then change line_range.
541 count = sum(sline.count('\n')
542 for sline in self.source[start - 1:end])
543 self.options.line_range[1] = start + count - 1
545 return ''.join(self.source)
547 def _fix_reindent(self, result):
548 """Fix a badly indented line.
550 This is done by adding or removing from its initial indent only.
553 num_indent_spaces = int(result['info'].split()[1])
554 line_index = result['line'] - 1
555 target = self.source[line_index]
557 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
559 def fix_e112(self, result):
560 """Fix under-indented comments."""
561 line_index = result['line'] - 1
562 target = self.source[line_index]
564 if not target.lstrip().startswith('#'):
565 # Don't screw with invalid syntax.
568 self.source[line_index] = self.indent_word + target
570 def fix_e113(self, result):
571 """Fix over-indented comments."""
572 line_index = result['line'] - 1
573 target = self.source[line_index]
575 indent = _get_indentation(target)
576 stripped = target.lstrip()
578 if not stripped.startswith('#'):
579 # Don't screw with invalid syntax.
582 self.source[line_index] = indent[1:] + stripped
584 def fix_e125(self, result):
585 """Fix indentation undistinguish from the next logical line."""
586 num_indent_spaces = int(result['info'].split()[1])
587 line_index = result['line'] - 1
588 target = self.source[line_index]
590 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
591 indent = len(_get_indentation(target))
594 while len(_get_indentation(self.source[line_index])) >= indent:
595 self.source[line_index] = (' ' * spaces_to_add +
596 self.source[line_index])
597 modified_lines.append(1 + line_index) # Line indexed at 1.
600 return modified_lines
602 def fix_e201(self, result):
603 """Remove extraneous whitespace."""
604 line_index = result['line'] - 1
605 target = self.source[line_index]
606 offset = result['column'] - 1
608 if is_probably_part_of_multiline(target):
611 fixed = fix_whitespace(target,
615 self.source[line_index] = fixed
617 def fix_e224(self, result):
618 """Remove extraneous whitespace around operator."""
619 target = self.source[result['line'] - 1]
620 offset = result['column'] - 1
621 fixed = target[:offset] + target[offset:].replace('\t', ' ')
622 self.source[result['line'] - 1] = fixed
624 def fix_e225(self, result):
625 """Fix missing whitespace around operator."""
626 target = self.source[result['line'] - 1]
627 offset = result['column'] - 1
628 fixed = target[:offset] + ' ' + target[offset:]
630 # Only proceed if non-whitespace characters match.
631 # And make sure we don't break the indentation.
633 fixed.replace(' ', '') == target.replace(' ', '') and
634 _get_indentation(fixed) == _get_indentation(target)
636 self.source[result['line'] - 1] = fixed
640 def fix_e231(self, result):
641 """Add missing whitespace."""
642 # Optimize for comma case. This will fix all commas in the full source
643 # code in one pass. Don't do this more than once. If it fails the first
644 # time, there is no point in trying again.
645 if ',' in result['info'] and not self._ws_comma_done:
646 self._ws_comma_done = True
647 original = ''.join(self.source)
648 new = refactor(original, ['ws_comma'])
649 if original.strip() != new.strip():
651 return range(1, 1 + len(original))
653 line_index = result['line'] - 1
654 target = self.source[line_index]
655 offset = result['column']
656 fixed = target[:offset] + ' ' + target[offset:]
657 self.source[line_index] = fixed
659 def fix_e251(self, result):
660 """Remove whitespace around parameter '=' sign."""
661 line_index = result['line'] - 1
662 target = self.source[line_index]
664 # This is necessary since pep8 sometimes reports columns that goes
665 # past the end of the physical line. This happens in cases like,
667 c = min(result['column'] - 1,
670 if target[c].strip():
673 fixed = target[:c].rstrip() + target[c:].lstrip()
675 # There could be an escaped newline
679 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
680 self.source[line_index] = fixed.rstrip('\n\r \t\\')
681 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
682 return [line_index + 1, line_index + 2] # Line indexed at 1
684 self.source[result['line'] - 1] = fixed
686 def fix_e262(self, result):
687 """Fix spacing after comment hash."""
688 target = self.source[result['line'] - 1]
689 offset = result['column']
691 code = target[:offset].rstrip(' \t#')
692 comment = target[offset:].lstrip(' \t#')
694 fixed = code + (' # ' + comment if comment.strip() else '\n')
696 self.source[result['line'] - 1] = fixed
698 def fix_e271(self, result):
699 """Fix extraneous whitespace around keywords."""
700 line_index = result['line'] - 1
701 target = self.source[line_index]
702 offset = result['column'] - 1
704 if is_probably_part_of_multiline(target):
707 fixed = fix_whitespace(target,
714 self.source[line_index] = fixed
716 def fix_e301(self, result):
717 """Add missing blank line."""
719 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
721 def fix_e302(self, result):
722 """Add missing 2 blank lines."""
723 add_linenum = 2 - int(result['info'].split()[-1])
724 cr = '\n' * add_linenum
725 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
727 def fix_e303(self, result):
728 """Remove extra blank lines."""
729 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
730 delete_linenum = max(1, delete_linenum)
732 # We need to count because pep8 reports an offset line number if there
735 line = result['line'] - 2
737 while cnt < delete_linenum and line >= 0:
738 if not self.source[line].strip():
739 self.source[line] = ''
740 modified_lines.append(1 + line) # Line indexed at 1
744 return modified_lines
746 def fix_e304(self, result):
747 """Remove blank line following function decorator."""
748 line = result['line'] - 2
749 if not self.source[line].strip():
750 self.source[line] = ''
752 def fix_e401(self, result):
753 """Put imports on separate lines."""
754 line_index = result['line'] - 1
755 target = self.source[line_index]
756 offset = result['column'] - 1
758 if not target.lstrip().startswith('import'):
761 indentation = re.split(pattern=r'\bimport\b',
762 string=target, maxsplit=1)[0]
763 fixed = (target[:offset].rstrip('\t ,') + '\n' +
764 indentation + 'import ' + target[offset:].lstrip('\t ,'))
765 self.source[line_index] = fixed
767 def fix_long_line_logically(self, result, logical):
768 """Try to make lines fit within --max-line-length characters."""
771 len(logical[2]) == 1 or
772 self.source[result['line'] - 1].lstrip().startswith('#')
774 return self.fix_long_line_physically(result)
776 start_line_index = logical[0][0]
777 end_line_index = logical[1][0]
778 logical_lines = logical[2]
780 previous_line = get_item(self.source, start_line_index - 1, default='')
781 next_line = get_item(self.source, end_line_index + 1, default='')
783 single_line = join_logical_line(''.join(logical_lines))
786 fixed = self.fix_long_line(
788 previous_line=previous_line,
790 original=''.join(logical_lines))
791 except (SyntaxError, tokenize.TokenError):
792 return self.fix_long_line_physically(result)
795 for line_index in range(start_line_index, end_line_index + 1):
796 self.source[line_index] = ''
797 self.source[start_line_index] = fixed
798 return range(start_line_index + 1, end_line_index + 1)
802 def fix_long_line_physically(self, result):
803 """Try to make lines fit within --max-line-length characters."""
804 line_index = result['line'] - 1
805 target = self.source[line_index]
807 previous_line = get_item(self.source, line_index - 1, default='')
808 next_line = get_item(self.source, line_index + 1, default='')
811 fixed = self.fix_long_line(
813 previous_line=previous_line,
816 except (SyntaxError, tokenize.TokenError):
820 self.source[line_index] = fixed
821 return [line_index + 1]
825 def fix_long_line(self, target, previous_line,
826 next_line, original):
827 cache_entry = (target, previous_line, next_line)
828 if cache_entry in self.long_line_ignore_cache:
831 if target.lstrip().startswith('#'):
832 # Wrap commented lines.
833 return shorten_comment(
835 max_line_length=self.options.max_line_length,
836 last_comment=not next_line.lstrip().startswith('#'))
838 fixed = get_fixed_long_line(
840 previous_line=previous_line,
842 indent_word=self.indent_word,
843 max_line_length=self.options.max_line_length,
844 aggressive=self.options.aggressive,
845 experimental=self.options.experimental,
846 verbose=self.options.verbose)
847 if fixed and not code_almost_equal(original, fixed):
850 self.long_line_ignore_cache.add(cache_entry)
853 def fix_e502(self, result):
854 """Remove extraneous escape of newline."""
855 line_index = result['line'] - 1
856 target = self.source[line_index]
857 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
859 def fix_e701(self, result):
860 """Put colon-separated compound statement on separate lines."""
861 line_index = result['line'] - 1
862 target = self.source[line_index]
865 fixed_source = (target[:c] + '\n' +
866 _get_indentation(target) + self.indent_word +
867 target[c:].lstrip('\n\r \t\\'))
868 self.source[result['line'] - 1] = fixed_source
869 return [result['line'], result['line'] + 1]
871 def fix_e702(self, result, logical):
872 """Put semicolon-separated compound statement on separate lines."""
874 return [] # pragma: no cover
875 logical_lines = logical[2]
877 line_index = result['line'] - 1
878 target = self.source[line_index]
880 if target.rstrip().endswith('\\'):
881 # Normalize '1; \\\n2' into '1; 2'.
882 self.source[line_index] = target.rstrip('\n \r\t\\')
883 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
884 return [line_index + 1, line_index + 2]
886 if target.rstrip().endswith(';'):
887 self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
888 return [line_index + 1]
890 offset = result['column'] - 1
891 first = target[:offset].rstrip(';').rstrip()
892 second = (_get_indentation(logical_lines[0]) +
893 target[offset:].lstrip(';').lstrip())
895 self.source[line_index] = first + '\n' + second
896 return [line_index + 1]
898 def fix_e711(self, result):
899 """Fix comparison with None."""
900 line_index = result['line'] - 1
901 target = self.source[line_index]
902 offset = result['column'] - 1
904 right_offset = offset + 2
905 if right_offset >= len(target):
908 left = target[:offset].rstrip()
909 center = target[offset:right_offset]
910 right = target[right_offset:].lstrip()
912 if not right.startswith('None'):
915 if center.strip() == '==':
917 elif center.strip() == '!=':
918 new_center = 'is not'
922 self.source[line_index] = ' '.join([left, new_center, right])
924 def fix_e712(self, result):
925 """Fix comparison with boolean."""
926 line_index = result['line'] - 1
927 target = self.source[line_index]
928 offset = result['column'] - 1
930 # Handle very easy "not" special cases.
931 if re.match(r'^\s*if \w+ == False:$', target):
932 self.source[line_index] = re.sub(r'if (\w+) == False:',
933 r'if not \1:', target, count=1)
934 elif re.match(r'^\s*if \w+ != True:$', target):
935 self.source[line_index] = re.sub(r'if (\w+) != True:',
936 r'if not \1:', target, count=1)
938 right_offset = offset + 2
939 if right_offset >= len(target):
942 left = target[:offset].rstrip()
943 center = target[offset:right_offset]
944 right = target[right_offset:].lstrip()
946 # Handle simple cases only.
948 if center.strip() == '==':
949 if re.match(r'\bTrue\b', right):
950 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
951 elif center.strip() == '!=':
952 if re.match(r'\bFalse\b', right):
953 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
955 if new_right is None:
958 if new_right[0].isalnum():
959 new_right = ' ' + new_right
961 self.source[line_index] = left + new_right
963 def fix_e713(self, result):
964 """Fix non-membership check."""
965 line_index = result['line'] - 1
966 target = self.source[line_index]
968 # Handle very easy case only.
969 if re.match(r'^\s*if not \w+ in \w+:$', target):
970 self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
975 def fix_w291(self, result):
976 """Remove trailing whitespace."""
977 fixed_line = self.source[result['line'] - 1].rstrip()
978 self.source[result['line'] - 1] = fixed_line + '\n'
981 def get_fixed_long_line(target, previous_line, original,
982 indent_word=' ', max_line_length=79,
983 aggressive=False, experimental=False, verbose=False):
984 """Break up long line and return result.
986 Do this by generating multiple reformatted candidates and then
987 ranking the candidates to heuristically select the best option.
990 indent = _get_indentation(target)
991 source = target[len(indent):]
992 assert source.lstrip() == source
994 # Check for partial multiline.
995 tokens = list(generate_tokens(source))
997 candidates = shorten_line(
998 tokens, source, indent,
1001 aggressive=aggressive,
1002 experimental=experimental,
1003 previous_line=previous_line)
1005 # Also sort alphabetically as a tie breaker (for determinism).
1006 candidates = sorted(
1007 sorted(set(candidates).union([target, original])),
1008 key=lambda x: line_shortening_rank(x,
1014 print(('-' * 79 + '\n').join([''] + candidates + ['']),
1015 file=codecs.getwriter('utf-8')(sys.stderr.buffer
1016 if hasattr(sys.stderr,
1021 return candidates[0]
1024 def join_logical_line(logical_line):
1025 """Return single line based on logical line input."""
1026 indentation = _get_indentation(logical_line)
1028 return indentation + untokenize_without_newlines(
1029 generate_tokens(logical_line.lstrip())) + '\n'
1032 def untokenize_without_newlines(tokens):
1033 """Return source code based on tokens."""
1040 (start_row, start_column) = t[2]
1041 (end_row, end_column) = t[3]
1043 if start_row > last_row:
1046 (start_column > last_column or token_string == '\n') and
1047 not text.endswith(' ')
1051 if token_string != '\n':
1052 text += token_string
1055 last_column = end_column
1060 def _find_logical(source_lines):
1061 # Make a variable which is the index of all the starts of lines.
1066 for t in generate_tokens(''.join(source_lines)):
1067 if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1068 tokenize.INDENT, tokenize.NL,
1069 tokenize.ENDMARKER]:
1071 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1073 logical_end.append((t[3][0] - 1, t[2][1]))
1075 if last_newline and not parens:
1076 logical_start.append((t[2][0] - 1, t[2][1]))
1077 last_newline = False
1078 if t[0] == tokenize.OP:
1083 return (logical_start, logical_end)
1086 def _get_logical(source_lines, result, logical_start, logical_end):
1087 """Return the logical line corresponding to the result.
1089 Assumes input is already E702-clean.
1092 row = result['line'] - 1
1093 col = result['column'] - 1
1096 for i in range(0, len(logical_start), 1):
1099 if x[0] > row or (x[0] == row and x[1] > col):
1101 ls = logical_start[i]
1105 original = source_lines[ls[0]:le[0] + 1]
1106 return ls, le, original
1109 def get_item(items, index, default=None):
1110 if 0 <= index < len(items):
1116 def reindent(source, indent_size):
1117 """Reindent all lines."""
1118 reindenter = Reindenter(source)
1119 return reindenter.run(indent_size)
1122 def code_almost_equal(a, b):
1123 """Return True if code is similar.
1125 Ignore whitespace when comparing specific line.
1128 split_a = split_and_strip_non_empty_lines(a)
1129 split_b = split_and_strip_non_empty_lines(b)
1131 if len(split_a) != len(split_b):
1134 for index in range(len(split_a)):
1135 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1141 def split_and_strip_non_empty_lines(text):
1142 """Return lines split by newline.
1147 return [line.strip() for line in text.splitlines() if line.strip()]
1150 def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
1151 """Format block comments."""
1152 if '#' not in source:
1156 ignored_line_numbers = multiline_string_lines(
1158 include_docstrings=True) | set(commented_out_code_lines(source))
1161 sio = io.StringIO(source)
1162 for (line_number, line) in enumerate(sio.readlines(), start=1):
1164 line.lstrip().startswith('#') and
1165 line_number not in ignored_line_numbers
1167 indentation = _get_indentation(line)
1168 line = line.lstrip()
1170 # Normalize beginning if not a shebang.
1173 # Leave multiple spaces like '# ' alone.
1174 (line.count('#') > 1 or line[1].isalnum())
1175 # Leave stylistic outlined blocks alone.
1176 and not line.rstrip().endswith('#')
1178 line = '# ' + line.lstrip('# \t')
1180 fixed_lines.append(indentation + line)
1182 fixed_lines.append(line)
1184 return ''.join(fixed_lines)
1187 def refactor(source, fixer_names, ignore=None):
1188 """Return refactored code using lib2to3.
1190 Skip if ignore string is produced in the refactored code.
1193 from lib2to3 import pgen2
1195 new_text = refactor_with_2to3(source,
1196 fixer_names=fixer_names)
1197 except (pgen2.parse.ParseError,
1200 UnicodeEncodeError):
1204 if ignore in new_text and ignore not in source:
1210 def code_to_2to3(select, ignore):
1212 for code, fix in CODE_TO_2TO3.items():
1213 if code_match(code, select=select, ignore=ignore):
1218 def fix_2to3(source, aggressive=True, select=None, ignore=None):
1219 """Fix various deprecated code (via lib2to3)."""
1223 select = select or []
1224 ignore = ignore or []
1226 return refactor(source,
1227 code_to_2to3(select=select,
1231 def fix_w602(source, aggressive=True):
1232 """Fix deprecated form of raising exception."""
1236 return refactor(source, ['raise'],
1237 ignore='with_traceback')
1240 def find_newline(source):
1241 """Return type of newline used in source.
1243 Input is a list of lines.
1246 assert not isinstance(source, unicode)
1248 counter = collections.defaultdict(int)
1250 if line.endswith(CRLF):
1252 elif line.endswith(CR):
1254 elif line.endswith(LF):
1257 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1260 def _get_indentword(source):
1261 """Return indentation type."""
1262 indent_word = ' ' # Default in case source has no indentation
1264 for t in generate_tokens(source):
1265 if t[0] == token.INDENT:
1268 except (SyntaxError, tokenize.TokenError):
1273 def _get_indentation(line):
1274 """Return leading whitespace."""
1276 non_whitespace_index = len(line) - len(line.lstrip())
1277 return line[:non_whitespace_index]
1282 def get_diff_text(old, new, filename):
1283 """Return text of unified diff between old and new."""
1285 diff = difflib.unified_diff(
1287 'original/' + filename,
1288 'fixed/' + filename,
1295 # Work around missing newline (http://bugs.python.org/issue2142).
1296 if text and not line.endswith(newline):
1297 text += newline + r'\ No newline at end of file' + newline
1302 def _priority_key(pep8_result):
1303 """Key for sorting PEP8 results.
1305 Global fixes should be done first. This is important for things like
1310 # Fix multiline colon-based before semicolon based.
1312 # Break multiline statements early.
1314 # Things that make lines longer.
1316 # Remove extraneous whitespace before breaking lines.
1318 # Shorten whitespace in comment before resorting to wrapping.
1321 middle_index = 10000
1323 # We need to shorten lines last since the logical fixer can get in a
1324 # loop, which causes us to exit early.
1327 key = pep8_result['id'].lower()
1329 return priority.index(key)
1332 return middle_index + lowest_priority.index(key) + 1
1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1338 aggressive=False, experimental=False, previous_line=''):
1339 """Separate line at OPERATOR.
1341 Multiple candidates will be yielded.
1344 for candidate in _shorten_line(tokens=tokens,
1346 indentation=indentation,
1347 indent_word=indent_word,
1348 aggressive=aggressive,
1349 previous_line=previous_line):
1353 for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1354 shortened = _shorten_line_at_tokens(
1357 indentation=indentation,
1358 indent_word=indent_word,
1359 key_token_strings=key_token_strings,
1360 aggressive=aggressive)
1362 if shortened is not None and shortened != source:
1366 for shortened in _shorten_line_at_tokens_new(
1369 indentation=indentation,
1370 max_line_length=max_line_length):
1375 def _shorten_line(tokens, source, indentation, indent_word,
1376 aggressive=False, previous_line=''):
1377 """Separate line at OPERATOR.
1379 The input is expected to be free of newlines except for inside multiline
1380 strings and at the end.
1382 Multiple candidates will be yielded.
1388 end_offset) in token_offsets(tokens):
1391 token_type == tokenize.COMMENT and
1392 not is_probably_part_of_multiline(previous_line) and
1393 not is_probably_part_of_multiline(source) and
1394 not source[start_offset + 1:].strip().lower().startswith(
1395 ('noqa', 'pragma:', 'pylint:'))
1397 # Move inline comments to previous line.
1398 first = source[:start_offset]
1399 second = source[start_offset:]
1400 yield (indentation + second.strip() + '\n' +
1401 indentation + first.strip() + '\n')
1402 elif token_type == token.OP and token_string != '=':
1403 # Don't break on '=' after keyword as this violates PEP 8.
1405 assert token_type != token.INDENT
1407 first = source[:end_offset]
1409 second_indent = indentation
1410 if first.rstrip().endswith('('):
1411 second_indent += indent_word
1413 second_indent += ' ' * (1 + first.find('('))
1415 second_indent += indent_word
1417 second = (second_indent + source[end_offset:].lstrip())
1419 not second.strip() or
1420 second.lstrip().startswith('#')
1424 # Do not begin a line with a comma
1425 if second.lstrip().startswith(','):
1427 # Do end a line with a dot
1428 if first.rstrip().endswith('.'):
1430 if token_string in '+-*/':
1431 fixed = first + ' \\' + '\n' + second
1433 fixed = first + '\n' + second
1435 # Only fix if syntax is okay.
1436 if check_syntax(normalize_multiline(fixed)
1437 if aggressive else fixed):
1438 yield indentation + fixed
1441 # A convenient way to handle tokens.
1442 Token = collections.namedtuple('Token', ['token_type', 'token_string',
1443 'spos', 'epos', 'line'])
1446 class ReformattedLines(object):
1448 """The reflowed lines of atoms.
1450 Each part of the line is represented as an "atom." They can be moved
1451 around when need be to get the optimal formatting.
1455 ###########################################################################
1458 class _Indent(object):
1460 """Represent an indentation in the atom stream."""
1462 def __init__(self, indent_amt):
1463 self._indent_amt = indent_amt
1466 return ' ' * self._indent_amt
1470 return self._indent_amt
1472 class _Space(object):
1474 """Represent a space in the atom stream."""
1483 class _LineBreak(object):
1485 """Represent a line break in the atom stream."""
1494 def __init__(self, max_line_length):
1495 self._max_line_length = max_line_length
1497 self._bracket_depth = 0
1498 self._prev_item = None
1499 self._prev_prev_item = None
1504 ###########################################################################
1507 def add(self, obj, indent_amt, break_after_open_bracket):
1508 if isinstance(obj, Atom):
1509 self._add_item(obj, indent_amt)
1512 self._add_container(obj, indent_amt, break_after_open_bracket)
1514 def add_comment(self, item):
1516 if len(self._lines) > 1:
1517 if isinstance(self._lines[-1], self._Space):
1519 if len(self._lines) > 2:
1520 if isinstance(self._lines[-2], self._Space):
1523 while num_spaces > 0:
1524 self._lines.append(self._Space())
1526 self._lines.append(item)
1528 def add_indent(self, indent_amt):
1529 self._lines.append(self._Indent(indent_amt))
1531 def add_line_break(self, indent):
1532 self._lines.append(self._LineBreak())
1533 self.add_indent(len(indent))
1535 def add_line_break_at(self, index, indent_amt):
1536 self._lines.insert(index, self._LineBreak())
1537 self._lines.insert(index + 1, self._Indent(indent_amt))
1539 def add_space_if_needed(self, curr_text, equal=False):
1541 not self._lines or isinstance(
1542 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
1546 prev_text = unicode(self._prev_item)
1548 unicode(self._prev_prev_item) if self._prev_prev_item else '')
1551 # The previous item was a keyword or identifier and the current
1552 # item isn't an operator that doesn't require a space.
1553 ((self._prev_item.is_keyword or self._prev_item.is_string or
1554 self._prev_item.is_name or self._prev_item.is_number) and
1555 (curr_text[0] not in '([{.,:}])' or
1556 (curr_text[0] == '=' and equal))) or
1558 # Don't place spaces around a '.', unless it's in an 'import'
1560 ((prev_prev_text != 'from' and prev_text[-1] != '.' and
1561 curr_text != 'import') and
1563 # Don't place a space before a colon.
1564 curr_text[0] != ':' and
1566 # Don't split up ending brackets by spaces.
1567 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
1569 # Put a space after a colon or comma.
1570 prev_text[-1] in ':,' or
1572 # Put space around '=' if asked to.
1573 (equal and prev_text == '=') or
1575 # Put spaces around non-unary arithmetic operators.
1576 ((self._prev_prev_item and
1577 (prev_text not in '+-' and
1578 (self._prev_prev_item.is_name or
1579 self._prev_prev_item.is_number or
1580 self._prev_prev_item.is_string)) and
1581 prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
1583 self._lines.append(self._Space())
1585 def previous_item(self):
1586 """Return the previous non-whitespace item."""
1587 return self._prev_item
1589 def fits_on_current_line(self, item_extent):
1590 return self.current_size() + item_extent <= self._max_line_length
1592 def current_size(self):
1593 """The size of the current line minus the indentation."""
1595 for item in reversed(self._lines):
1597 if isinstance(item, self._LineBreak):
1602 def line_empty(self):
1603 return (self._lines and
1604 isinstance(self._lines[-1],
1605 (self._LineBreak, self._Indent)))
1609 for item in self._lines:
1610 if isinstance(item, self._LineBreak):
1611 string = string.rstrip()
1612 string += item.emit()
1614 return string.rstrip() + '\n'
1616 ###########################################################################
1619 def _add_item(self, item, indent_amt):
1620 """Add an item to the line.
1622 Reflow the line to get the best formatting after the item is
1623 inserted. The bracket depth indicates if the item is being
1624 inserted inside of a container or not.
1627 if self._prev_item and self._prev_item.is_string and item.is_string:
1628 # Place consecutive string literals on separate lines.
1629 self._lines.append(self._LineBreak())
1630 self._lines.append(self._Indent(indent_amt))
1632 item_text = unicode(item)
1633 if self._lines and self._bracket_depth:
1634 # Adding the item into a container.
1635 self._prevent_default_initializer_splitting(item, indent_amt)
1637 if item_text in '.,)]}':
1638 self._split_after_delimiter(item, indent_amt)
1640 elif self._lines and not self.line_empty():
1641 # Adding the item outside of a container.
1642 if self.fits_on_current_line(len(item_text)):
1643 self._enforce_space(item)
1646 # Line break for the new item.
1647 self._lines.append(self._LineBreak())
1648 self._lines.append(self._Indent(indent_amt))
1650 self._lines.append(item)
1651 self._prev_item, self._prev_prev_item = item, self._prev_item
1653 if item_text in '([{':
1654 self._bracket_depth += 1
1656 elif item_text in '}])':
1657 self._bracket_depth -= 1
1658 assert self._bracket_depth >= 0
1660 def _add_container(self, container, indent_amt, break_after_open_bracket):
1661 actual_indent = indent_amt + 1
1664 unicode(self._prev_item) != '=' and
1665 not self.line_empty() and
1666 not self.fits_on_current_line(
1667 container.size + self._bracket_depth + 2)
1670 if unicode(container)[0] == '(' and self._prev_item.is_name:
1671 # Don't split before the opening bracket of a call.
1672 break_after_open_bracket = True
1673 actual_indent = indent_amt + 4
1675 break_after_open_bracket or
1676 unicode(self._prev_item) not in '([{'
1678 # If the container doesn't fit on the current line and the
1679 # current line isn't empty, place the container on the next
1681 self._lines.append(self._LineBreak())
1682 self._lines.append(self._Indent(indent_amt))
1683 break_after_open_bracket = False
1685 actual_indent = self.current_size() + 1
1686 break_after_open_bracket = False
1688 if isinstance(container, (ListComprehension, IfExpression)):
1689 actual_indent = indent_amt
1691 # Increase the continued indentation only if recursing on a
1693 container.reflow(self, ' ' * actual_indent,
1694 break_after_open_bracket=break_after_open_bracket)
1696 def _prevent_default_initializer_splitting(self, item, indent_amt):
1697 """Prevent splitting between a default initializer.
1699 When there is a default initializer, it's best to keep it all on
1700 the same line. It's nicer and more readable, even if it goes
1701 over the maximum allowable line length. This goes back along the
1702 current line to determine if we have a default initializer, and,
1703 if so, to remove extraneous whitespaces and add a line
1704 break/indent before it if needed.
1707 if unicode(item) == '=':
1708 # This is the assignment in the initializer. Just remove spaces for
1710 self._delete_whitespace()
1713 if (not self._prev_item or not self._prev_prev_item or
1714 unicode(self._prev_item) != '='):
1717 self._delete_whitespace()
1718 prev_prev_index = self._lines.index(self._prev_prev_item)
1721 isinstance(self._lines[prev_prev_index - 1], self._Indent) or
1722 self.fits_on_current_line(item.size + 1)
1724 # The default initializer is already the only item on this line.
1725 # Don't insert a newline here.
1728 # Replace the space with a newline/indent combo.
1729 if isinstance(self._lines[prev_prev_index - 1], self._Space):
1730 del self._lines[prev_prev_index - 1]
1732 self.add_line_break_at(self._lines.index(self._prev_prev_item),
1735 def _split_after_delimiter(self, item, indent_amt):
1736 """Split the line only after a delimiter."""
1737 self._delete_whitespace()
1739 if self.fits_on_current_line(item.size):
1743 for item in reversed(self._lines):
1746 (not isinstance(item, Atom) or not item.is_colon)
1751 if isinstance(item, self._Space):
1753 if isinstance(item, (self._LineBreak, self._Indent)):
1759 self.add_line_break_at(self._lines.index(last_space), indent_amt)
1761 def _enforce_space(self, item):
1762 """Enforce a space in certain situations.
1764 There are cases where we will want a space where normally we
1765 wouldn't put one. This just enforces the addition of a space.
1768 if isinstance(self._lines[-1],
1769 (self._Space, self._LineBreak, self._Indent)):
1772 if not self._prev_item:
1775 item_text = unicode(item)
1776 prev_text = unicode(self._prev_item)
1778 # Prefer a space around a '.' in an import statement, and between the
1781 (item_text == '.' and prev_text == 'from') or
1782 (item_text == 'import' and prev_text == '.') or
1783 (item_text == '(' and prev_text == 'import')
1785 self._lines.append(self._Space())
1787 def _delete_whitespace(self):
1788 """Delete all whitespace from the end of the line."""
1789 while isinstance(self._lines[-1], (self._Space, self._LineBreak,
1796 """The smallest unbreakable unit that can be reflowed."""
1798 def __init__(self, atom):
1802 return self._atom.token_string
1808 self, reflowed_lines, continued_indent, extent,
1809 break_after_open_bracket=False,
1810 is_list_comp_or_if_expr=False,
1813 if self._atom.token_type == tokenize.COMMENT:
1814 reflowed_lines.add_comment(self)
1817 total_size = extent if extent else self.size
1819 if self._atom.token_string not in ',:([{}])':
1820 # Some atoms will need an extra 1-sized space token after them.
1823 prev_item = reflowed_lines.previous_item()
1825 not is_list_comp_or_if_expr and
1826 not reflowed_lines.fits_on_current_line(total_size) and
1827 not (next_is_dot and
1828 reflowed_lines.fits_on_current_line(self.size + 1)) and
1829 not reflowed_lines.line_empty() and
1830 not self.is_colon and
1831 not (prev_item and prev_item.is_name and
1832 unicode(self) == '(')
1834 # Start a new line if there is already something on the line and
1835 # adding this atom would make it go over the max line length.
1836 reflowed_lines.add_line_break(continued_indent)
1838 reflowed_lines.add_space_if_needed(unicode(self))
1840 reflowed_lines.add(self, len(continued_indent),
1841 break_after_open_bracket)
1844 return self.__repr__()
1847 def is_keyword(self):
1848 return keyword.iskeyword(self._atom.token_string)
1851 def is_string(self):
1852 return self._atom.token_type == tokenize.STRING
1856 return self._atom.token_type == tokenize.NAME
1859 def is_number(self):
1860 return self._atom.token_type == tokenize.NUMBER
1864 return self._atom.token_string == ','
1868 return self._atom.token_string == ':'
1872 return len(self._atom.token_string)
1875 class Container(object):
1877 """Base class for all container types."""
1879 def __init__(self, items):
1884 last_was_keyword = False
1886 for item in self._items:
1892 item_string = unicode(item)
1895 (last_was_keyword or
1896 (not string.endswith(tuple('([{,.:}]) ')) and
1897 not item_string.startswith(tuple('([{,.:}])'))))
1900 string += item_string
1902 last_was_keyword = item.is_keyword
1906 for element in self._items:
1909 def __getitem__(self, idx):
1910 return self._items[idx]
1912 def reflow(self, reflowed_lines, continued_indent,
1913 break_after_open_bracket=False):
1914 last_was_container = False
1915 for (index, item) in enumerate(self._items):
1916 next_item = get_item(self._items, index + 1)
1918 if isinstance(item, Atom):
1919 is_list_comp_or_if_expr = (
1920 isinstance(self, (ListComprehension, IfExpression)))
1921 item.reflow(reflowed_lines, continued_indent,
1922 self._get_extent(index),
1923 is_list_comp_or_if_expr=is_list_comp_or_if_expr,
1924 next_is_dot=(next_item and
1925 unicode(next_item) == '.'))
1926 if last_was_container and item.is_comma:
1927 reflowed_lines.add_line_break(continued_indent)
1928 last_was_container = False
1929 else: # isinstance(item, Container)
1930 reflowed_lines.add(item, len(continued_indent),
1931 break_after_open_bracket)
1932 last_was_container = not isinstance(item, (ListComprehension,
1936 break_after_open_bracket and index == 0 and
1937 # Prefer to keep empty containers together instead of
1939 unicode(item) == self.open_bracket and
1940 (not next_item or unicode(next_item) != self.close_bracket) and
1941 (len(self._items) != 3 or not isinstance(next_item, Atom))
1943 reflowed_lines.add_line_break(continued_indent)
1944 break_after_open_bracket = False
1946 next_next_item = get_item(self._items, index + 2)
1948 unicode(item) not in ['.', '%', 'in'] and
1949 next_item and not isinstance(next_item, Container) and
1950 unicode(next_item) != ':' and
1951 next_next_item and (not isinstance(next_next_item, Atom) or
1952 unicode(next_item) == 'not') and
1953 not reflowed_lines.line_empty() and
1954 not reflowed_lines.fits_on_current_line(
1955 self._get_extent(index + 1) + 2)
1957 reflowed_lines.add_line_break(continued_indent)
1959 def _get_extent(self, index):
1960 """The extent of the full element.
1962 E.g., the length of a function call or keyword.
1966 prev_item = get_item(self._items, index - 1)
1967 seen_dot = prev_item and unicode(prev_item) == '.'
1968 while index < len(self._items):
1969 item = get_item(self._items, index)
1972 if isinstance(item, (ListComprehension, IfExpression)):
1975 if isinstance(item, Container):
1976 if prev_item and prev_item.is_name:
1984 elif (unicode(item) not in ['.', '=', ':', 'not'] and
1985 not item.is_name and not item.is_string):
1988 if unicode(item) == '.':
1997 def is_string(self):
2002 return len(self.__repr__())
2005 def is_keyword(self):
2021 def open_bracket(self):
2025 def close_bracket(self):
2029 class Tuple(Container):
2031 """A high-level representation of a tuple."""
2034 def open_bracket(self):
2038 def close_bracket(self):
2042 class List(Container):
2044 """A high-level representation of a list."""
2047 def open_bracket(self):
2051 def close_bracket(self):
2055 class DictOrSet(Container):
2057 """A high-level representation of a dictionary or set."""
2060 def open_bracket(self):
2064 def close_bracket(self):
2068 class ListComprehension(Container):
2070 """A high-level representation of a list comprehension."""
2075 for item in self._items:
2076 if isinstance(item, IfExpression):
2082 class IfExpression(Container):
2084 """A high-level representation of an if-expression."""
2087 def _parse_container(tokens, index, for_or_if=None):
2088 """Parse a high-level container, such as a list, tuple, etc."""
2090 # Store the opening bracket.
2091 items = [Atom(Token(*tokens[index]))]
2094 num_tokens = len(tokens)
2095 while index < num_tokens:
2096 tok = Token(*tokens[index])
2098 if tok.token_string in ',)]}':
2099 # First check if we're at the end of a list comprehension or
2100 # if-expression. Don't add the ending token as part of the list
2101 # comprehension or if-expression, because they aren't part of those
2103 if for_or_if == 'for':
2104 return (ListComprehension(items), index - 1)
2106 elif for_or_if == 'if':
2107 return (IfExpression(items), index - 1)
2109 # We've reached the end of a container.
2110 items.append(Atom(tok))
2112 # If not, then we are at the end of a container.
2113 if tok.token_string == ')':
2114 # The end of a tuple.
2115 return (Tuple(items), index)
2117 elif tok.token_string == ']':
2118 # The end of a list.
2119 return (List(items), index)
2121 elif tok.token_string == '}':
2122 # The end of a dictionary or set.
2123 return (DictOrSet(items), index)
2125 elif tok.token_string in '([{':
2126 # A sub-container is being defined.
2127 (container, index) = _parse_container(tokens, index)
2128 items.append(container)
2130 elif tok.token_string == 'for':
2131 (container, index) = _parse_container(tokens, index, 'for')
2132 items.append(container)
2134 elif tok.token_string == 'if':
2135 (container, index) = _parse_container(tokens, index, 'if')
2136 items.append(container)
2139 items.append(Atom(tok))
2146 def _parse_tokens(tokens):
2147 """Parse the tokens.
2149 This converts the tokens into a form where we can manipulate them
2157 num_tokens = len(tokens)
2158 while index < num_tokens:
2159 tok = Token(*tokens[index])
2161 assert tok.token_type != token.INDENT
2162 if tok.token_type == tokenize.NEWLINE:
2163 # There's only one newline and it's at the end.
2166 if tok.token_string in '([{':
2167 (container, index) = _parse_container(tokens, index)
2170 parsed_tokens.append(container)
2172 parsed_tokens.append(Atom(tok))
2176 return parsed_tokens
2179 def _reflow_lines(parsed_tokens, indentation, max_line_length,
2180 start_on_prefix_line):
2181 """Reflow the lines so that it looks nice."""
2183 if unicode(parsed_tokens[0]) == 'def':
2184 # A function definition gets indented a bit more.
2185 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2187 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2189 break_after_open_bracket = not start_on_prefix_line
2191 lines = ReformattedLines(max_line_length)
2192 lines.add_indent(len(indentation.lstrip('\r\n')))
2194 if not start_on_prefix_line:
2195 # If splitting after the opening bracket will cause the first element
2196 # to be aligned weirdly, don't try it.
2197 first_token = get_item(parsed_tokens, 0)
2198 second_token = get_item(parsed_tokens, 1)
2201 first_token and second_token and
2202 unicode(second_token)[0] == '(' and
2203 len(indentation) + len(first_token) + 1 == len(continued_indent)
2207 for item in parsed_tokens:
2208 lines.add_space_if_needed(unicode(item), equal=True)
2210 save_continued_indent = continued_indent
2211 if start_on_prefix_line and isinstance(item, Container):
2212 start_on_prefix_line = False
2213 continued_indent = ' ' * (lines.current_size() + 1)
2215 item.reflow(lines, continued_indent, break_after_open_bracket)
2216 continued_indent = save_continued_indent
2221 def _shorten_line_at_tokens_new(tokens, source, indentation,
2223 """Shorten the line taking its length into account.
2225 The input is expected to be free of newlines except for inside
2226 multiline strings and at the end.
2229 # Yield the original source so to see if it's a better choice than the
2230 # shortened candidate lines we generate here.
2231 yield indentation + source
2233 parsed_tokens = _parse_tokens(tokens)
2236 # Perform two reflows. The first one starts on the same line as the
2237 # prefix. The second starts on the line after the prefix.
2238 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2239 start_on_prefix_line=True)
2240 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2243 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2244 start_on_prefix_line=False)
2245 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2250 key_token_strings, aggressive):
2251 """Separate line by breaking at tokens in key_token_strings.
2253 The input is expected to be free of newlines except for inside
2254 multiline strings and at the end.
2258 for (index, _t) in enumerate(token_offsets(tokens)):
2264 assert token_type != token.INDENT
2266 if token_string in key_token_strings:
2267 # Do not break in containers with zero or one items.
2268 unwanted_next_token = {
2271 '{': '}'}.get(token_string)
2272 if unwanted_next_token:
2276 default=[None, None])[1] == unwanted_next_token or
2279 default=[None, None])[1] == unwanted_next_token
2284 index > 2 and token_string == '(' and
2285 tokens[index - 1][1] in ',(%['
2287 # Don't split after a tuple start, or before a tuple start if
2288 # the tuple is in a list.
2291 if end_offset < len(source) - 1:
2292 # Don't split right before newline.
2293 offsets.append(end_offset)
2295 # Break at adjacent strings. These were probably meant to be on
2296 # separate lines in the first place.
2297 previous_token = get_item(tokens, index - 1)
2299 token_type == tokenize.STRING and
2300 previous_token and previous_token[0] == tokenize.STRING
2302 offsets.append(start_offset)
2304 current_indent = None
2306 for line in split_at_offsets(source, offsets):
2308 fixed += '\n' + current_indent + line
2310 for symbol in '([{':
2311 if line.endswith(symbol):
2312 current_indent += indent_word
2316 assert not current_indent
2317 current_indent = indent_word
2319 assert fixed is not None
2321 if check_syntax(normalize_multiline(fixed)
2322 if aggressive > 1 else fixed):
2323 return indentation + fixed
2328 def token_offsets(tokens):
2329 """Yield tokens and offsets."""
2331 previous_end_row = 0
2332 previous_end_column = 0
2336 (start_row, start_column) = t[2]
2337 (end_row, end_column) = t[3]
2339 # Account for the whitespace between tokens.
2340 end_offset += start_column
2341 if previous_end_row == start_row:
2342 end_offset -= previous_end_column
2344 # Record the start offset of the token.
2345 start_offset = end_offset
2347 # Account for the length of the token itself.
2348 end_offset += len(token_string)
2355 previous_end_row = end_row
2356 previous_end_column = end_column
2359 def normalize_multiline(line):
2360 """Normalize multiline-related code that will cause syntax error.
2362 This is for purposes of checking syntax.
2365 if line.startswith('def ') and line.rstrip().endswith(':'):
2366 return line + ' pass'
2367 elif line.startswith('return '):
2368 return 'def _(): ' + line
2369 elif line.startswith('@'):
2370 return line + 'def _(): pass'
2371 elif line.startswith('class '):
2372 return line + ' pass'
2373 elif line.startswith('if '):
2374 return line + ' pass'
2379 def fix_whitespace(line, offset, replacement):
2380 """Replace whitespace at offset and return fixed line."""
2381 # Replace escaped newlines too
2382 left = line[:offset].rstrip('\n\r \t\\')
2383 right = line[offset:].lstrip('\n\r \t\\')
2384 if right.startswith('#'):
2387 return left + replacement + right
2390 def _execute_pep8(pep8_options, source):
2391 """Execute pep8 via python method calls."""
2392 class QuietReport(pep8.BaseReport):
2394 """Version of checker that does not print."""
2396 def __init__(self, options):
2397 super(QuietReport, self).__init__(options)
2398 self.__full_error_results = []
2400 def error(self, line_number, offset, text, _):
2401 """Collect errors."""
2402 code = super(QuietReport, self).error(line_number, offset, text, _)
2404 self.__full_error_results.append(
2406 'line': line_number,
2407 'column': offset + 1,
2410 def full_error_results(self):
2411 """Return error results in detail.
2413 Results are in the form of a list of dictionaries. Each
2414 dictionary contains 'id', 'line', 'column', and 'info'.
2417 return self.__full_error_results
2419 checker = pep8.Checker('', lines=source,
2420 reporter=QuietReport, **pep8_options)
2422 return checker.report.full_error_results()
2425 def _remove_leading_and_normalize(line):
2426 return line.lstrip().rstrip(CR + LF) + '\n'
2429 class Reindenter(object):
2431 """Reindents badly-indented code to uniformly use four-space indentation.
2433 Released to the public domain, by Tim Peters, 03 October 2000.
2437 def __init__(self, input_text):
2438 sio = io.StringIO(input_text)
2439 source_lines = sio.readlines()
2441 self.string_content_line_numbers = multiline_string_lines(input_text)
2443 # File lines, rstripped & tab-expanded. Dummy at start is so
2444 # that we can use tokenize's 1-based line numbering easily.
2445 # Note that a line is all-blank iff it is a newline.
2447 for line_number, line in enumerate(source_lines, start=1):
2448 # Do not modify if inside a multiline string.
2449 if line_number in self.string_content_line_numbers:
2450 self.lines.append(line)
2452 # Only expand leading tabs.
2453 self.lines.append(_get_indentation(line).expandtabs() +
2454 _remove_leading_and_normalize(line))
2456 self.lines.insert(0, None)
2457 self.index = 1 # index into self.lines of next line
2458 self.input_text = input_text
2460 def run(self, indent_size=DEFAULT_INDENT_SIZE):
2461 """Fix indentation and return modified line numbers.
2463 Line numbers are indexed at 1.
2467 return self.input_text
2470 stats = _reindent_stats(tokenize.generate_tokens(self.getline))
2471 except (SyntaxError, tokenize.TokenError):
2472 return self.input_text
2473 # Remove trailing empty lines.
2475 while lines and lines[-1] == '\n':
2478 stats.append((len(lines), 0))
2479 # Map count of leading spaces to # we want.
2481 # Program after transformation.
2483 # Copy over initial empty lines -- there's nothing to do until
2484 # we see a line with *something* on it.
2486 after.extend(lines[1:i])
2487 for i in range(len(stats) - 1):
2488 thisstmt, thislevel = stats[i]
2489 nextstmt = stats[i + 1][0]
2490 have = _leading_space_count(lines[thisstmt])
2491 want = thislevel * indent_size
2495 # An indented comment line. If we saw the same
2496 # indentation before, reuse what it most recently
2498 want = have2want.get(have, -1)
2500 # Then it probably belongs to the next real stmt.
2501 for j in range(i + 1, len(stats) - 1):
2502 jline, jlevel = stats[j]
2504 if have == _leading_space_count(lines[jline]):
2505 want = jlevel * indent_size
2507 if want < 0: # Maybe it's a hanging
2508 # comment like this one,
2509 # in which case we should shift it like its base
2511 for j in range(i - 1, -1, -1):
2512 jline, jlevel = stats[j]
2514 want = (have + _leading_space_count(
2516 _leading_space_count(lines[jline]))
2519 # Still no luck -- leave it alone.
2524 have2want[have] = want
2526 if diff == 0 or have == 0:
2527 after.extend(lines[thisstmt:nextstmt])
2529 for line_number, line in enumerate(lines[thisstmt:nextstmt],
2531 if line_number in self.string_content_line_numbers:
2537 after.append(' ' * diff + line)
2539 remove = min(_leading_space_count(line), -diff)
2540 after.append(line[remove:])
2542 return ''.join(after)
2545 """Line-getter for tokenize."""
2546 if self.index >= len(self.lines):
2549 line = self.lines[self.index]
2554 def _reindent_stats(tokens):
2555 """Return list of (lineno, indentlevel) pairs.
2557 One for each stmt and comment line. indentlevel is -1 for comment lines, as
2558 a signal that tokenize doesn't know what to do about them; indeed, they're
2562 find_stmt = 1 # Next token begins a fresh stmt?
2563 level = 0 # Current indent level.
2571 if token_type == tokenize.NEWLINE:
2572 # A program statement, or ENDMARKER, will eventually follow,
2573 # after some (possibly empty) run of tokens of the form
2574 # (NL | COMMENT)* (INDENT | DEDENT+)?
2577 elif token_type == tokenize.INDENT:
2581 elif token_type == tokenize.DEDENT:
2585 elif token_type == tokenize.COMMENT:
2587 stats.append((sline, -1))
2588 # But we're still looking for a new stmt, so leave
2591 elif token_type == tokenize.NL:
2595 # This is the first "real token" following a NEWLINE, so it
2596 # must be the first token of the next program statement, or an
2599 if line: # Not endmarker.
2600 stats.append((sline, level))
2605 def _leading_space_count(line):
2606 """Return number of leading spaces in line."""
2608 while i < len(line) and line[i] == ' ':
2613 def refactor_with_2to3(source_text, fixer_names):
2614 """Use lib2to3 to refactor the source.
2616 Return the refactored source code.
2619 from lib2to3.refactor import RefactoringTool
2620 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
2621 tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
2623 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
2625 return unicode(tool.refactor_string(source_text, name=''))
2626 except lib2to3_tokenize.TokenError:
2630 def check_syntax(code):
2631 """Return True if syntax is okay."""
2633 return compile(code, '<string>', 'exec')
2634 except (SyntaxError, TypeError, UnicodeDecodeError):
2638 def filter_results(source, results, aggressive):
2639 """Filter out spurious reports from pep8.
2641 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
2644 non_docstring_string_line_numbers = multiline_string_lines(
2645 source, include_docstrings=False)
2646 all_string_line_numbers = multiline_string_lines(
2647 source, include_docstrings=True)
2649 commented_out_code_line_numbers = commented_out_code_lines(source)
2652 issue_id = r['id'].lower()
2654 if r['line'] in non_docstring_string_line_numbers:
2655 if issue_id.startswith(('e1', 'e501', 'w191')):
2658 if r['line'] in all_string_line_numbers:
2659 if issue_id in ['e501']:
2662 # We must offset by 1 for lines that contain the trailing contents of
2663 # multiline strings.
2664 if not aggressive and (r['line'] + 1) in all_string_line_numbers:
2665 # Do not modify multiline strings in non-aggressive mode. Remove
2666 # trailing whitespace could break doctests.
2667 if issue_id.startswith(('w29', 'w39')):
2671 if issue_id.startswith(('e711', 'w6')):
2675 if issue_id.startswith(('e712', 'e713')):
2678 if r['line'] in commented_out_code_line_numbers:
2679 if issue_id.startswith(('e26', 'e501')):
2685 def multiline_string_lines(source, include_docstrings=False):
2686 """Return line numbers that are within multiline strings.
2688 The line numbers are indexed at 1.
2690 Docstrings are ignored.
2693 line_numbers = set()
2694 previous_token_type = ''
2696 for t in generate_tokens(source):
2701 if token_type == tokenize.STRING and start_row != end_row:
2703 include_docstrings or
2704 previous_token_type != tokenize.INDENT
2706 # We increment by one since we want the contents of the
2708 line_numbers |= set(range(1 + start_row, 1 + end_row))
2710 previous_token_type = token_type
2711 except (SyntaxError, tokenize.TokenError):
2717 def commented_out_code_lines(source):
2718 """Return line numbers of comments that are likely code.
2720 Commented-out code is bad practice, but modifying it just adds even more
2726 for t in generate_tokens(source):
2732 # Ignore inline comments.
2733 if not line.lstrip().startswith('#'):
2736 if token_type == tokenize.COMMENT:
2737 stripped_line = token_string.lstrip('#').strip()
2739 ' ' in stripped_line and
2740 '#' not in stripped_line and
2741 check_syntax(stripped_line)
2743 line_numbers.append(start_row)
2744 except (SyntaxError, tokenize.TokenError):
2750 def shorten_comment(line, max_line_length, last_comment=False):
2751 """Return trimmed or split long comment line.
2753 If there are no comments immediately following it, do a text wrap.
2754 Doing this wrapping on all comments in general would lead to jagged
2758 assert len(line) > max_line_length
2759 line = line.rstrip()
2761 # PEP 8 recommends 72 characters for comment text.
2762 indentation = _get_indentation(line) + '# '
2763 max_line_length = min(max_line_length,
2764 len(indentation) + 72)
2766 MIN_CHARACTER_REPEAT = 5
2768 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
2769 not line[-1].isalnum()
2771 # Trim comments that end with things like ---------
2772 return line[:max_line_length] + '\n'
2773 elif last_comment and re.match(r'\s*#+\s*\w+', line):
2775 split_lines = textwrap.wrap(line.lstrip(' \t#'),
2776 initial_indent=indentation,
2777 subsequent_indent=indentation,
2778 width=max_line_length,
2779 break_long_words=False,
2780 break_on_hyphens=False)
2781 return '\n'.join(split_lines) + '\n'
2786 def normalize_line_endings(lines, newline):
2787 """Return fixed line endings.
2789 All lines will be modified to use the most common line ending.
2792 return [line.rstrip('\n\r') + newline for line in lines]
2795 def mutual_startswith(a, b):
2796 return b.startswith(a) or a.startswith(b)
2799 def code_match(code, select, ignore):
2801 assert not isinstance(ignore, unicode)
2802 for ignored_code in [c.strip() for c in ignore]:
2803 if mutual_startswith(code.lower(), ignored_code.lower()):
2807 assert not isinstance(select, unicode)
2808 for selected_code in [c.strip() for c in select]:
2809 if mutual_startswith(code.lower(), selected_code.lower()):
2816 def fix_code(source, options=None):
2817 """Return fixed source code."""
2819 options = parse_args([''])
2821 if not isinstance(source, unicode):
2822 source = source.decode(locale.getpreferredencoding())
2824 sio = io.StringIO(source)
2825 return fix_lines(sio.readlines(), options=options)
2828 def fix_lines(source_lines, options, filename=''):
2829 """Return fixed source code."""
2830 # Transform everything to line feed. Then change them back to original
2831 # before returning fixed source code.
2832 original_newline = find_newline(source_lines)
2833 tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
2835 # Keep a history to break out of cycles.
2836 previous_hashes = set()
2838 if options.line_range:
2839 fixed_source = apply_local_fixes(tmp_source, options)
2841 # Apply global fixes only once (for efficiency).
2842 fixed_source = apply_global_fixes(tmp_source, options)
2845 long_line_ignore_cache = set()
2846 while hash(fixed_source) not in previous_hashes:
2847 if options.pep8_passes >= 0 and passes > options.pep8_passes:
2851 previous_hashes.add(hash(fixed_source))
2853 tmp_source = copy.copy(fixed_source)
2858 contents=tmp_source,
2859 long_line_ignore_cache=long_line_ignore_cache)
2861 fixed_source = fix.fix()
2863 sio = io.StringIO(fixed_source)
2864 return ''.join(normalize_line_endings(sio.readlines(), original_newline))
2867 def fix_file(filename, options=None, output=None):
2869 options = parse_args([filename])
2871 original_source = readlines_from_file(filename)
2873 fixed_source = original_source
2875 if options.in_place or output:
2876 encoding = detect_encoding(filename)
2879 output = codecs.getwriter(encoding)(output.buffer
2880 if hasattr(output, 'buffer')
2883 output = LineEndingWrapper(output)
2885 fixed_source = fix_lines(fixed_source, options, filename=filename)
2888 new = io.StringIO(fixed_source)
2889 new = new.readlines()
2890 diff = get_diff_text(original_source, new, filename)
2896 elif options.in_place:
2897 fp = open_with_encoding(filename, encoding=encoding,
2899 fp.write(fixed_source)
2903 output.write(fixed_source)
2910 """Yield multiple (code, function) tuples."""
2911 for function in globals().values():
2912 if inspect.isfunction(function):
2913 arguments = inspect.getargspec(function)[0]
2914 if arguments[:1] != ['source']:
2917 code = extract_code_from_function(function)
2919 yield (code, function)
2922 def apply_global_fixes(source, options, where='global'):
2923 """Run global fixes on source code.
2925 These are fixes that only need be done once (unlike those in
2926 FixPEP8, which are dependent on pep8).
2929 if code_match('E101', select=options.select, ignore=options.ignore):
2930 source = reindent(source,
2931 indent_size=options.indent_size)
2933 for (code, function) in global_fixes():
2934 if code_match(code, select=options.select, ignore=options.ignore):
2936 print('---> Applying {0} fix for {1}'.format(where,
2939 source = function(source,
2940 aggressive=options.aggressive)
2942 source = fix_2to3(source,
2943 aggressive=options.aggressive,
2944 select=options.select,
2945 ignore=options.ignore)
2950 def apply_local_fixes(source, options):
2951 """Ananologus to apply_global_fixes, but runs only those which makes sense
2952 for the given line_range.
2954 Do as much as we can without breaking code.
2958 """Find leftmost item greater than or equal to x."""
2959 i = bisect.bisect_left(a, x)
2962 return len(a) - 1, a[-1]
2965 """Find rightmost value less than or equal to x."""
2966 i = bisect.bisect_right(a, x)
2968 return i - 1, a[i - 1]
2971 def local_fix(source, start_log, end_log,
2972 start_lines, end_lines, indents, last_line):
2973 """apply_global_fixes to the source between start_log and end_log.
2975 The subsource must be the correct syntax of a complete python program
2976 (but all lines may share an indentation). The subsource's shared indent
2977 is removed, fixes are applied and the indent prepended back. Taking
2978 care to not reindent strings.
2980 last_line is the strict cut off (options.line_range[1]), so that
2981 lines after last_line are not modified.
2984 if end_log < start_log:
2987 ind = indents[start_log]
2988 indent = _get_indentation(source[start_lines[start_log]])
2990 sl = slice(start_lines[start_log], end_lines[end_log] + 1)
2992 subsource = source[sl]
2993 # Remove indent from subsource.
2995 for line_no in start_lines[start_log:end_log + 1]:
2996 pos = line_no - start_lines[start_log]
2997 subsource[pos] = subsource[pos][ind:]
2999 # Fix indentation of subsource.
3000 fixed_subsource = apply_global_fixes(''.join(subsource),
3003 fixed_subsource = fixed_subsource.splitlines(True)
3005 # Add back indent for non multi-line strings lines.
3006 msl = multiline_string_lines(''.join(fixed_subsource),
3007 include_docstrings=False)
3008 for i, line in enumerate(fixed_subsource):
3009 if not i + 1 in msl:
3010 fixed_subsource[i] = indent + line if line != '\n' else line
3012 # We make a special case to look at the final line, if it's a multiline
3013 # *and* the cut off is somewhere inside it, we take the fixed
3014 # subset up until last_line, this assumes that the number of lines
3015 # does not change in this multiline line.
3016 changed_lines = len(fixed_subsource)
3017 if (start_lines[end_log] != end_lines[end_log]
3018 and end_lines[end_log] > last_line):
3019 after_end = end_lines[end_log] - last_line
3020 fixed_subsource = (fixed_subsource[:-after_end] +
3021 source[sl][-after_end:])
3022 changed_lines -= after_end
3024 options.line_range[1] = (options.line_range[0] +
3027 return (source[:start_lines[start_log]] +
3029 source[end_lines[end_log] + 1:])
3031 def is_continued_stmt(line,
3032 continued_stmts=frozenset(['else', 'elif',
3033 'finally', 'except'])):
3034 return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
3036 assert options.line_range
3037 start, end = options.line_range
3040 last_line = end # We shouldn't modify lines after this cut-off.
3043 logical = _find_logical(source)
3044 except (SyntaxError, tokenize.TokenError):
3045 return ''.join(source)
3048 # Just blank lines, this should imply that it will become '\n' ?
3049 return apply_global_fixes(source, options)
3051 start_lines, indents = zip(*logical[0])
3052 end_lines, _ = zip(*logical[1])
3054 source = source.splitlines(True)
3056 start_log, start = find_ge(start_lines, start)
3057 end_log, end = find_le(start_lines, end)
3059 # Look behind one line, if it's indented less than current indent
3060 # then we can move to this previous line knowing that its
3061 # indentation level will not be changed.
3063 and indents[start_log - 1] < indents[start_log]
3064 and not is_continued_stmt(source[start_log - 1])):
3066 start = start_lines[start_log]
3070 if is_continued_stmt(source[start]):
3072 start = start_lines[start_log]
3075 ind = indents[start_log]
3076 for t in itertools.takewhile(lambda t: t[1][1] >= ind,
3077 enumerate(logical[0][start_log:])):
3078 n_log, n = start_log + t[0], t[1][0]
3079 # start shares indent up to n.
3082 source = local_fix(source, start_log, n_log,
3083 start_lines, end_lines,
3085 start_log = n_log if n == end else n_log + 1
3086 start = start_lines[start_log]
3090 # Look at the line after end and see if allows us to reindent.
3091 after_end_log, after_end = find_ge(start_lines, end + 1)
3093 if indents[after_end_log] > indents[start_log]:
3094 start_log, start = find_ge(start_lines, start + 1)
3097 if (indents[after_end_log] == indents[start_log]
3098 and is_continued_stmt(source[after_end])):
3099 # find n, the beginning of the last continued statement
3100 # Apply fix to previous block if there is one.
3102 for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
3103 if n_ind == ind and not is_continued_stmt(source[n]):
3104 n_log = start_lines.index(n)
3105 source = local_fix(source, start_log, n_log - 1,
3106 start_lines, end_lines,
3108 start_log = n_log + 1
3109 start = start_lines[start_log]
3113 end_log, end = find_le(start_lines, end - 1)
3116 source = local_fix(source, start_log, end_log,
3117 start_lines, end_lines,
3121 return ''.join(source)
3124 def extract_code_from_function(function):
3125 """Return code handled by function."""
3126 if not function.__name__.startswith('fix_'):
3129 code = re.sub('^fix_', '', function.__name__)
3141 def create_parser():
3142 """Return command-line parser."""
3143 # Do import locally to be friendly to those who use autopep8 as a library
3144 # and are supporting Python 2.6.
3147 parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3149 parser.add_argument('--version', action='version',
3150 version='%(prog)s ' + __version__)
3151 parser.add_argument('-v', '--verbose', action='count', dest='verbose',
3153 help='print verbose messages; '
3154 'multiple -v result in more verbose messages')
3155 parser.add_argument('-d', '--diff', action='store_true', dest='diff',
3156 help='print the diff for the fixed source')
3157 parser.add_argument('-i', '--in-place', action='store_true',
3158 help='make changes to files in place')
3159 parser.add_argument('-r', '--recursive', action='store_true',
3160 help='run recursively over directories; '
3161 'must be used with --in-place or --diff')
3162 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3163 help='number of parallel jobs; '
3164 'match CPU count if value is less than 1')
3165 parser.add_argument('-p', '--pep8-passes', metavar='n',
3166 default=-1, type=int,
3167 help='maximum number of additional pep8 passes '
3168 '(default: infinite)')
3169 parser.add_argument('-a', '--aggressive', action='count', default=0,
3170 help='enable non-whitespace changes; '
3171 'multiple -a result in more aggressive changes')
3172 parser.add_argument('--experimental', action='store_true',
3173 help='enable experimental fixes')
3174 parser.add_argument('--exclude', metavar='globs',
3175 help='exclude file/directory names that match these '
3176 'comma-separated globs')
3177 parser.add_argument('--list-fixes', action='store_true',
3178 help='list codes for fixes; '
3179 'used by --ignore and --select')
3180 parser.add_argument('--ignore', metavar='errors', default='',
3181 help='do not fix these errors/warnings '
3182 '(default: {0})'.format(DEFAULT_IGNORE))
3183 parser.add_argument('--select', metavar='errors', default='',
3184 help='fix only these errors/warnings (e.g. E4,W)')
3185 parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3186 help='set maximum allowed line length '
3187 '(default: %(default)s)')
3188 parser.add_argument('--range', metavar='line', dest='line_range',
3189 default=None, type=int, nargs=2,
3190 help='only fix errors found within this inclusive '
3191 'range of line numbers (e.g. 1 99); '
3192 'line numbers are indexed at 1')
3193 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3194 type=int, metavar='n',
3195 help='number of spaces per indent level '
3196 '(default %(default)s)')
3197 parser.add_argument('files', nargs='*',
3198 help="files to format or '-' for standard in")
3203 def parse_args(arguments):
3204 """Parse command-line options."""
3205 parser = create_parser()
3206 args = parser.parse_args(arguments)
3208 if not args.files and not args.list_fixes:
3209 parser.error('incorrect number of arguments')
3211 args.files = [decode_filename(name) for name in args.files]
3213 if '-' in args.files:
3214 if len(args.files) > 1:
3215 parser.error('cannot mix stdin and regular files')
3218 parser.error('--diff cannot be used with standard input')
3221 parser.error('--in-place cannot be used with standard input')
3224 parser.error('--recursive cannot be used with standard input')
3226 if len(args.files) > 1 and not (args.in_place or args.diff):
3227 parser.error('autopep8 only takes one filename as argument '
3228 'unless the "--in-place" or "--diff" args are '
3231 if args.recursive and not (args.in_place or args.diff):
3232 parser.error('--recursive must be used with --in-place or --diff')
3234 if args.exclude and not args.recursive:
3235 parser.error('--exclude is only relevant when used with --recursive')
3237 if args.in_place and args.diff:
3238 parser.error('--in-place and --diff are mutually exclusive')
3240 if args.max_line_length <= 0:
3241 parser.error('--max-line-length must be greater than 0')
3244 args.select = args.select.split(',')
3247 args.ignore = args.ignore.split(',')
3248 elif not args.select:
3250 # Enable everything by default if aggressive.
3251 args.select = ['E', 'W']
3253 args.ignore = DEFAULT_IGNORE.split(',')
3256 args.exclude = args.exclude.split(',')
3261 # Do not import multiprocessing globally in case it is not supported
3263 import multiprocessing
3264 args.jobs = multiprocessing.cpu_count()
3266 if args.jobs > 1 and not args.in_place:
3267 parser.error('parallel jobs requires --in-place')
3270 if args.line_range[0] <= 0:
3271 parser.error('--range must be positive numbers')
3272 if args.line_range[0] > args.line_range[1]:
3273 parser.error('First value of --range should be less than or equal '
3279 def decode_filename(filename):
3280 """Return Unicode filename."""
3281 if isinstance(filename, unicode):
3284 return filename.decode(sys.getfilesystemencoding())
3287 def supported_fixes():
3288 """Yield pep8 error codes that autopep8 fixes.
3290 Each item we yield is a tuple of the code followed by its
3294 yield ('E101', docstring_summary(reindent.__doc__))
3296 instance = FixPEP8(filename=None, options=None, contents='')
3297 for attribute in dir(instance):
3298 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
3301 code.group(1).upper(),
3303 docstring_summary(getattr(instance, attribute).__doc__))
3306 for (code, function) in sorted(global_fixes()):
3307 yield (code.upper() + (4 - len(code)) * ' ',
3308 re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
3310 for code in sorted(CODE_TO_2TO3):
3311 yield (code.upper() + (4 - len(code)) * ' ',
3312 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
3315 def docstring_summary(docstring):
3316 """Return summary of docstring."""
3317 return docstring.split('\n')[0]
3320 def line_shortening_rank(candidate, indent_word, max_line_length,
3321 experimental=False):
3322 """Return rank of candidate.
3324 This is for sorting candidates.
3327 if not candidate.strip():
3331 lines = candidate.split('\n')
3335 not lines[0].lstrip().startswith('#') and
3336 lines[0].rstrip()[-1] not in '([{'
3338 for (opening, closing) in ('()', '[]', '{}'):
3339 # Don't penalize empty containers that aren't split up. Things like
3340 # this "foo(\n )" aren't particularly good.
3341 opening_loc = lines[0].find(opening)
3342 closing_loc = lines[0].find(closing)
3343 if opening_loc >= 0:
3344 if closing_loc < 0 or closing_loc != opening_loc + 1:
3345 offset = max(offset, 1 + opening_loc)
3347 current_longest = max(offset + len(x.strip()) for x in lines)
3349 rank += 4 * max(0, current_longest - max_line_length)
3353 # Too much variation in line length is ugly.
3354 rank += 2 * standard_deviation(len(line) for line in lines)
3356 bad_staring_symbol = {
3359 '{': '}'}.get(lines[0][-1])
3363 bad_staring_symbol and
3364 lines[1].lstrip().startswith(bad_staring_symbol)
3368 for lineno, current_line in enumerate(lines):
3369 current_line = current_line.strip()
3371 if current_line.startswith('#'):
3374 for bad_start in ['.', '%', '+', '-', '/']:
3375 if current_line.startswith(bad_start):
3378 # Do not tolerate operators on their own line.
3379 if current_line == bad_start:
3382 if current_line.endswith(('(', '[', '{', '.')):
3383 # Avoid lonely opening. They result in longer lines.
3384 if len(current_line) <= len(indent_word):
3387 # Avoid the ugliness of ", (\n".
3389 current_line.endswith('(') and
3390 current_line[:-1].rstrip().endswith(',')
3394 # Also avoid the ugliness of "foo.\nbar"
3395 if current_line.endswith('.'):
3398 if has_arithmetic_operator(current_line):
3401 if current_line.endswith(('%', '(', '[', '{')):
3404 # Try to break list comprehensions at the "for".
3405 if current_line.startswith('for '):
3408 if current_line.endswith('\\'):
3409 # If a line ends in \-newline, it may be part of a
3410 # multiline string. In that case, we would like to know
3411 # how long that line is without the \-newline. If it's
3412 # longer than the maximum, or has comments, then we assume
3413 # that the \-newline is an okay candidate and only
3414 # penalize it a bit.
3415 total_len = len(current_line)
3417 while lineno < len(lines):
3418 total_len += len(lines[lineno])
3420 if lines[lineno].lstrip().startswith('#'):
3421 total_len = max_line_length
3424 if not lines[lineno].endswith('\\'):
3429 if total_len < max_line_length:
3432 rank += 100 if experimental else 1
3434 # Prefer breaking at commas rather than colon.
3435 if ',' in current_line and current_line.endswith(':'):
3438 rank += 10 * count_unbalanced_brackets(current_line)
3443 def standard_deviation(numbers):
3444 """Return standard devation."""
3445 numbers = list(numbers)
3448 mean = sum(numbers) / len(numbers)
3449 return (sum((n - mean) ** 2 for n in numbers) /
3453 def has_arithmetic_operator(line):
3454 """Return True if line contains any arithmetic operators."""
3455 for operator in pep8.ARITHMETIC_OP:
3456 if operator in line:
3462 def count_unbalanced_brackets(line):
3463 """Return number of unmatched open/close brackets."""
3465 for opening, closing in ['()', '[]', '{}']:
3466 count += abs(line.count(opening) - line.count(closing))
3471 def split_at_offsets(line, offsets):
3472 """Split line at offsets.
3474 Return list of strings.
3481 for current_offset in sorted(offsets):
3482 if current_offset < len(line) and previous_offset != current_offset:
3483 result.append(line[previous_offset:current_offset].strip())
3484 previous_offset = current_offset
3486 result.append(line[current_offset:])
3491 class LineEndingWrapper(object):
3493 r"""Replace line endings to work with sys.stdout.
3495 It seems that sys.stdout expects only '\n' as the line ending, no matter
3496 the platform. Otherwise, we get repeated line endings.
3500 def __init__(self, output):
3501 self.__output = output
3504 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
3507 self.__output.flush()
3510 def match_file(filename, exclude):
3511 """Return True if file is okay for modifying/recursing."""
3512 base_name = os.path.basename(filename)
3514 if base_name.startswith('.'):
3517 for pattern in exclude:
3518 if fnmatch.fnmatch(base_name, pattern):
3521 if not os.path.isdir(filename) and not is_python_file(filename):
3527 def find_files(filenames, recursive, exclude):
3528 """Yield filenames."""
3530 name = filenames.pop(0)
3531 if recursive and os.path.isdir(name):
3532 for root, directories, children in os.walk(name):
3533 filenames += [os.path.join(root, f) for f in children
3534 if match_file(os.path.join(root, f),
3536 directories[:] = [d for d in directories
3537 if match_file(os.path.join(root, d),
3543 def _fix_file(parameters):
3544 """Helper function for optionally running fix_file() in parallel."""
3545 if parameters[1].verbose:
3546 print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
3548 fix_file(*parameters)
3549 except IOError as error:
3550 print(unicode(error), file=sys.stderr)
3553 def fix_multiple_files(filenames, options, output=None):
3554 """Fix list of files.
3556 Optionally fix files recursively.
3559 filenames = find_files(filenames, options.recursive, options.exclude)
3560 if options.jobs > 1:
3561 import multiprocessing
3562 pool = multiprocessing.Pool(options.jobs)
3564 [(name, options) for name in filenames])
3566 for name in filenames:
3567 _fix_file((name, options, output))
3570 def is_python_file(filename):
3571 """Return True if filename is Python file."""
3572 if filename.endswith('.py'):
3576 with open_with_encoding(filename) as f:
3577 first_line = f.readlines(1)[0]
3578 except (IOError, IndexError):
3581 if not PYTHON_SHEBANG_REGEX.match(first_line):
3587 def is_probably_part_of_multiline(line):
3588 """Return True if line is likely part of a multiline string.
3590 When multiline strings are involved, pep8 reports the error as being
3591 at the start of the multiline string, which doesn't work for us.
3597 line.rstrip().endswith('\\')
3604 # Exit on broken pipe.
3605 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
3606 except AttributeError: # pragma: no cover
3607 # SIGPIPE is not available on Windows.
3611 args = parse_args(sys.argv[1:])
3614 for code, description in sorted(supported_fixes()):
3615 print('{code} - {description}'.format(
3616 code=code, description=description))
3619 if args.files == ['-']:
3620 assert not args.in_place
3622 # LineEndingWrapper is unnecessary here due to the symmetry between
3623 # standard in and standard out.
3624 sys.stdout.write(fix_code(sys.stdin.read(), args))
3626 if args.in_place or args.diff:
3627 args.files = list(set(args.files))
3629 assert len(args.files) == 1
3630 assert not args.recursive
3632 fix_multiple_files(args.files, args, sys.stdout)
3633 except KeyboardInterrupt:
3634 return 1 # pragma: no cover
3637 class CachedTokenizer(object):
3639 """A one-element cache around tokenize.generate_tokens().
3641 Original code written by Ned Batchelder, in coverage.py.
3646 self.last_text = None
3647 self.last_tokens = None
3649 def generate_tokens(self, text):
3650 """A stand-in for tokenize.generate_tokens()."""
3651 if text != self.last_text:
3652 string_io = io.StringIO(text)
3653 self.last_tokens = list(
3654 tokenize.generate_tokens(string_io.readline)
3656 self.last_text = text
3657 return self.last_tokens
3659 _cached_tokenizer = CachedTokenizer()
3660 generate_tokens = _cached_tokenizer.generate_tokens
3663 if __name__ == '__main__':