Tools/scripts/reindent.py

   1 #! /usr/bin/env python
   2
   3 # Released to the public domain, by Tim Peters, 03 October 2000.
   4
   5 """reindent [-d][-r][-v] [ path ... ]
   6
   7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
   8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
   9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
  10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
  11 -h (--help)     Help.      Print this usage information and exit.
  12
  13 Change Python (.py) files to use 4-space indents and no hard tab characters.
  14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
  15 at the end of files.  Also ensure the last line ends with a newline.
  16
  17 If no paths are given on the command line, reindent operates as a filter,
  18 reading a single source file from standard input and writing the transformed
  19 source to standard output.  In this case, the -d, -r and -v flags are
  20 ignored.
  21
  22 You can pass one or more file and/or directory paths.  When a directory
  23 path, all .py files within the directory will be examined, and, if the -r
  24 option is given, likewise recursively for subdirectories.
  25
  26 If output is not to standard output, reindent overwrites files in place,
  27 renaming the originals with a .bak extension.  If it finds nothing to
  28 change, the file is left alone.  If reindent does change a file, the changed
  29 file is a fixed-point for future runs (i.e., running reindent on the
  30 resulting .py file won't change it again).
  31
  32 The hard part of reindenting is figuring out what to do with comment
  33 lines.  So long as the input files get a clean bill of health from
  34 tabnanny.py, reindent should do a good job.
  35
  36 The backup file is a copy of the one that is being reindented. The ".bak"
  37 file is generated with shutil.copy(), but some corner cases regarding
  38 user/group and permissions could leave the backup file more readable than
  39 you'd prefer. You can always use the --nobackup option to prevent this.
  40 """
  41
  42 __version__ = "1"
  43
  44 import tokenize
  45 import os, shutil
  46 import sys
  47 import io
  48
  49 verbose    = 0
  50 recurse    = 0
  51 dryrun     = 0
  52 makebackup = True
  53
  54 def usage(msg=None):
  55     if msg is not None:
  56         print >> sys.stderr, msg
  57     print >> sys.stderr, __doc__
  58
  59 def errprint(*args):
  60     sep = ""
  61     for arg in args:
  62         sys.stderr.write(sep + str(arg))
  63         sep = " "
  64     sys.stderr.write("\n")
  65
  66 def main():
  67     import getopt
  68     global verbose, recurse, dryrun, makebackup
  69     try:
  70         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
  71                         ["dryrun", "recurse", "nobackup", "verbose", "help"])
  72     except getopt.error, msg:
  73         usage(msg)
  74         return
  75     for o, a in opts:
  76         if o in ('-d', '--dryrun'):
  77             dryrun += 1
  78         elif o in ('-r', '--recurse'):
  79             recurse += 1
  80         elif o in ('-n', '--nobackup'):
  81             makebackup = False
  82         elif o in ('-v', '--verbose'):
  83             verbose += 1
  84         elif o in ('-h', '--help'):
  85             usage()
  86             return
  87     if not args:
  88         r = Reindenter(sys.stdin)
  89         r.run()
  90         r.write(sys.stdout)
  91         return
  92     for arg in args:
  93         check(arg)
  94
  95 def check(file):
  96     if os.path.isdir(file) and not os.path.islink(file):
  97         if verbose:
  98             print "listing directory", file
  99         names = os.listdir(file)
 100         for name in names:
 101             fullname = os.path.join(file, name)
 102             if ((recurse and os.path.isdir(fullname) and
 103                  not os.path.islink(fullname) and
 104                  not os.path.split(fullname)[1].startswith("."))
 105                 or name.lower().endswith(".py")):
 106                 check(fullname)
 107         return
 108
 109     if verbose:
 110         print "checking", file, "...",
 111     try:
 112         f = io.open(file)
 113     except IOError, msg:
 114         errprint("%s: I/O Error: %s" % (file, str(msg)))
 115         return
 116
 117     r = Reindenter(f)
 118     f.close()
 119
 120     newline = r.newlines
 121     if isinstance(newline, tuple):
 122         errprint("%s: mixed newlines detected; cannot process file" % file)
 123         return
 124
 125     if r.run():
 126         if verbose:
 127             print "changed."
 128             if dryrun:
 129                 print "But this is a dry run, so leaving it alone."
 130         if not dryrun:
 131             bak = file + ".bak"
 132             if makebackup:
 133                 shutil.copyfile(file, bak)
 134                 if verbose:
 135                     print "backed up", file, "to", bak
 136             f = io.open(file, "w", newline=newline)
 137             r.write(f)
 138             f.close()
 139             if verbose:
 140                 print "wrote new", file
 141         return True
 142     else:
 143         if verbose:
 144             print "unchanged."
 145         return False
 146
 147 def _rstrip(line, JUNK='\n \t'):
 148     """Return line stripped of trailing spaces, tabs, newlines.
 149
 150     Note that line.rstrip() instead also strips sundry control characters,
 151     but at least one known Emacs user expects to keep junk like that, not
 152     mentioning Barry by name or anything <wink>.
 153     """
 154
 155     i = len(line)
 156     while i > 0 and line[i-1] in JUNK:
 157         i -= 1
 158     return line[:i]
 159
 160 class Reindenter:
 161
 162     def __init__(self, f):
 163         self.find_stmt = 1  # next token begins a fresh stmt?
 164         self.level = 0      # current indent level
 165
 166         # Raw file lines.
 167         self.raw = f.readlines()
 168
 169         # File lines, rstripped & tab-expanded.  Dummy at start is so
 170         # that we can use tokenize's 1-based line numbering easily.
 171         # Note that a line is all-blank iff it's "\n".
 172         self.lines = [_rstrip(line).expandtabs() + "\n"
 173                       for line in self.raw]
 174         self.lines.insert(0, None)
 175         self.index = 1  # index into self.lines of next line
 176
 177         # List of (lineno, indentlevel) pairs, one for each stmt and
 178         # comment line.  indentlevel is -1 for comment lines, as a
 179         # signal that tokenize doesn't know what to do about them;
 180         # indeed, they're our headache!
 181         self.stats = []
 182
 183         # Save the newlines found in the file so they can be used to
 184         #  create output without mutating the newlines.
 185         self.newlines = f.newlines
 186
 187     def run(self):
 188         tokenize.tokenize(self.getline, self.tokeneater)
 189         # Remove trailing empty lines.
 190         lines = self.lines
 191         while lines and lines[-1] == "\n":
 192             lines.pop()
 193         # Sentinel.
 194         stats = self.stats
 195         stats.append((len(lines), 0))
 196         # Map count of leading spaces to # we want.
 197         have2want = {}
 198         # Program after transformation.
 199         after = self.after = []
 200         # Copy over initial empty lines -- there's nothing to do until
 201         # we see a line with *something* on it.
 202         i = stats[0][0]
 203         after.extend(lines[1:i])
 204         for i in range(len(stats)-1):
 205             thisstmt, thislevel = stats[i]
 206             nextstmt = stats[i+1][0]
 207             have = getlspace(lines[thisstmt])
 208             want = thislevel * 4
 209             if want < 0:
 210                 # A comment line.
 211                 if have:
 212                     # An indented comment line.  If we saw the same
 213                     # indentation before, reuse what it most recently
 214                     # mapped to.
 215                     want = have2want.get(have, -1)
 216                     if want < 0:
 217                         # Then it probably belongs to the next real stmt.
 218                         for j in xrange(i+1, len(stats)-1):
 219                             jline, jlevel = stats[j]
 220                             if jlevel >= 0:
 221                                 if have == getlspace(lines[jline]):
 222                                     want = jlevel * 4
 223                                 break
 224                     if want < 0:           # Maybe it's a hanging
 225                                            # comment like this one,
 226                         # in which case we should shift it like its base
 227                         # line got shifted.
 228                         for j in xrange(i-1, -1, -1):
 229                             jline, jlevel = stats[j]
 230                             if jlevel >= 0:
 231                                 want = have + getlspace(after[jline-1]) - \
 232                                        getlspace(lines[jline])
 233                                 break
 234                     if want < 0:
 235                         # Still no luck -- leave it alone.
 236                         want = have
 237                 else:
 238                     want = 0
 239             assert want >= 0
 240             have2want[have] = want
 241             diff = want - have
 242             if diff == 0 or have == 0:
 243                 after.extend(lines[thisstmt:nextstmt])
 244             else:
 245                 for line in lines[thisstmt:nextstmt]:
 246                     if diff > 0:
 247                         if line == "\n":
 248                             after.append(line)
 249                         else:
 250                             after.append(" " * diff + line)
 251                     else:
 252                         remove = min(getlspace(line), -diff)
 253                         after.append(line[remove:])
 254         return self.raw != self.after
 255
 256     def write(self, f):
 257         f.writelines(self.after)
 258
 259     # Line-getter for tokenize.
 260     def getline(self):
 261         if self.index >= len(self.lines):
 262             line = ""
 263         else:
 264             line = self.lines[self.index]
 265             self.index += 1
 266         return line
 267
 268     # Line-eater for tokenize.
 269     def tokeneater(self, type, token, (sline, scol), end, line,
 270                    INDENT=tokenize.INDENT,
 271                    DEDENT=tokenize.DEDENT,
 272                    NEWLINE=tokenize.NEWLINE,
 273                    COMMENT=tokenize.COMMENT,
 274                    NL=tokenize.NL):
 275
 276         if type == NEWLINE:
 277             # A program statement, or ENDMARKER, will eventually follow,
 278             # after some (possibly empty) run of tokens of the form
 279             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 280             self.find_stmt = 1
 281
 282         elif type == INDENT:
 283             self.find_stmt = 1
 284             self.level += 1
 285
 286         elif type == DEDENT:
 287             self.find_stmt = 1
 288             self.level -= 1
 289
 290         elif type == COMMENT:
 291             if self.find_stmt:
 292                 self.stats.append((sline, -1))
 293                 # but we're still looking for a new stmt, so leave
 294                 # find_stmt alone
 295
 296         elif type == NL:
 297             pass
 298
 299         elif self.find_stmt:
 300             # This is the first "real token" following a NEWLINE, so it
 301             # must be the first token of the next program statement, or an
 302             # ENDMARKER.
 303             self.find_stmt = 0
 304             if line:   # not endmarker
 305                 self.stats.append((sline, self.level))
 306
 307 # Count number of leading blanks.
 308 def getlspace(line):
 309     i, n = 0, len(line)
 310     while i < n and line[i] == " ":
 311         i += 1
 312     return i
 313
 314 if __name__ == '__main__':
 315     main()