3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too.
9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10 -v (--verbose) Verbose. Print informative msgs; else no output.
11 -h (--help) Help. Print this usage information and exit.
13 Change Python (.py) files to use 4-space indents and no hard tab characters.
14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
15 at the end of files. Also ensure the last line ends with a newline.
17 If no paths are given on the command line, reindent operates as a filter,
18 reading a single source file from standard input and writing the transformed
19 source to standard output. In this case, the -d, -r and -v flags are
22 You can pass one or more file and/or directory paths. When a directory
23 path, all .py files within the directory will be examined, and, if the -r
24 option is given, likewise recursively for subdirectories.
26 If output is not to standard output, reindent overwrites files in place,
27 renaming the originals with a .bak extension. If it finds nothing to
28 change, the file is left alone. If reindent does change a file, the changed
29 file is a fixed-point for future runs (i.e., running reindent on the
30 resulting .py file won't change it again).
32 The hard part of reindenting is figuring out what to do with comment
33 lines. So long as the input files get a clean bill of health from
34 tabnanny.py, reindent should do a good job.
36 The backup file is a copy of the one that is being reindented. The ".bak"
37 file is generated with shutil.copy(), but some corner cases regarding
38 user/group and permissions could leave the backup file more readable than
39 you'd prefer. You can always use the --nobackup option to prevent this.
56 print >> sys.stderr, msg
57 print >> sys.stderr, __doc__
62 sys.stderr.write(sep + str(arg))
64 sys.stderr.write("\n")
68 global verbose, recurse, dryrun, makebackup
70 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
72 except getopt.error, msg:
76 if o in ('-d', '--dryrun'):
78 elif o in ('-r', '--recurse'):
80 elif o in ('-n', '--nobackup'):
82 elif o in ('-v', '--verbose'):
84 elif o in ('-h', '--help'):
88 r = Reindenter(sys.stdin)
96 if os.path.isdir(file) and not os.path.islink(file):
98 print "listing directory", file
99 names = os.listdir(file)
101 fullname = os.path.join(file, name)
102 if ((recurse and os.path.isdir(fullname) and
103 not os.path.islink(fullname) and
104 not os.path.split(fullname)[1].startswith("."))
105 or name.lower().endswith(".py")):
110 print "checking", file, "...",
114 errprint("%s: I/O Error: %s" % (file, str(msg)))
121 if isinstance(newline, tuple):
122 errprint("%s: mixed newlines detected; cannot process file" % file)
129 print "But this is a dry run, so leaving it alone."
133 shutil.copyfile(file, bak)
135 print "backed up", file, "to", bak
136 f = io.open(file, "w", newline=newline)
140 print "wrote new", file
147 def _rstrip(line, JUNK='\n \t'):
148 """Return line stripped of trailing spaces, tabs, newlines.
150 Note that line.rstrip() instead also strips sundry control characters,
151 but at least one known Emacs user expects to keep junk like that, not
152 mentioning Barry by name or anything <wink>.
156 while i > 0 and line[i-1] in JUNK:
162 def __init__(self, f):
163 self.find_stmt = 1 # next token begins a fresh stmt?
164 self.level = 0 # current indent level
167 self.raw = f.readlines()
169 # File lines, rstripped & tab-expanded. Dummy at start is so
170 # that we can use tokenize's 1-based line numbering easily.
171 # Note that a line is all-blank iff it's "\n".
172 self.lines = [_rstrip(line).expandtabs() + "\n"
173 for line in self.raw]
174 self.lines.insert(0, None)
175 self.index = 1 # index into self.lines of next line
177 # List of (lineno, indentlevel) pairs, one for each stmt and
178 # comment line. indentlevel is -1 for comment lines, as a
179 # signal that tokenize doesn't know what to do about them;
180 # indeed, they're our headache!
183 # Save the newlines found in the file so they can be used to
184 # create output without mutating the newlines.
185 self.newlines = f.newlines
188 tokenize.tokenize(self.getline, self.tokeneater)
189 # Remove trailing empty lines.
191 while lines and lines[-1] == "\n":
195 stats.append((len(lines), 0))
196 # Map count of leading spaces to # we want.
198 # Program after transformation.
199 after = self.after = []
200 # Copy over initial empty lines -- there's nothing to do until
201 # we see a line with *something* on it.
203 after.extend(lines[1:i])
204 for i in range(len(stats)-1):
205 thisstmt, thislevel = stats[i]
206 nextstmt = stats[i+1][0]
207 have = getlspace(lines[thisstmt])
212 # An indented comment line. If we saw the same
213 # indentation before, reuse what it most recently
215 want = have2want.get(have, -1)
217 # Then it probably belongs to the next real stmt.
218 for j in xrange(i+1, len(stats)-1):
219 jline, jlevel = stats[j]
221 if have == getlspace(lines[jline]):
224 if want < 0: # Maybe it's a hanging
225 # comment like this one,
226 # in which case we should shift it like its base
228 for j in xrange(i-1, -1, -1):
229 jline, jlevel = stats[j]
231 want = have + getlspace(after[jline-1]) - \
232 getlspace(lines[jline])
235 # Still no luck -- leave it alone.
240 have2want[have] = want
242 if diff == 0 or have == 0:
243 after.extend(lines[thisstmt:nextstmt])
245 for line in lines[thisstmt:nextstmt]:
250 after.append(" " * diff + line)
252 remove = min(getlspace(line), -diff)
253 after.append(line[remove:])
254 return self.raw != self.after
257 f.writelines(self.after)
259 # Line-getter for tokenize.
261 if self.index >= len(self.lines):
264 line = self.lines[self.index]
268 # Line-eater for tokenize.
269 def tokeneater(self, type, token, (sline, scol), end, line,
270 INDENT=tokenize.INDENT,
271 DEDENT=tokenize.DEDENT,
272 NEWLINE=tokenize.NEWLINE,
273 COMMENT=tokenize.COMMENT,
277 # A program statement, or ENDMARKER, will eventually follow,
278 # after some (possibly empty) run of tokens of the form
279 # (NL | COMMENT)* (INDENT | DEDENT+)?
290 elif type == COMMENT:
292 self.stats.append((sline, -1))
293 # but we're still looking for a new stmt, so leave
300 # This is the first "real token" following a NEWLINE, so it
301 # must be the first token of the next program statement, or an
304 if line: # not endmarker
305 self.stats.append((sline, self.level))
307 # Count number of leading blanks.
310 while i < n and line[i] == " ":
314 if __name__ == '__main__':