patch.py

   1 """ Patch utility to apply unified diffs
   2
   3     Brute-force line-by-line non-recursive parsing
   4
   5     Copyright (c) 2008-2010 anatoly techtonik
   6     Available under the terms of MIT license
   7
   8     NOTE: This version has been patched by Alex Stewart <alex@foogod.com> for
   9     Python 3.x support and other misc fixups.
  10
  11     Project home: http://code.google.com/p/python-patch/
  12
  13
  14     $Id: patch.py 92 2010-07-02 06:04:57Z techtonik $
  15     $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
  16 """
  17
  18 __author__ = "techtonik.rainforce.org"
  19 __version__ = "10.04-2.pAS1"
  20
  21 import copy
  22 import logging
  23 import re
  24 from logging import debug, info, warning
  25 import sys
  26
  27 try:
  28   # cStringIO doesn't support unicode in 2.5
  29   from StringIO import StringIO
  30 except ImportError:
  31   # StringIO has been renamed to 'io' in 3.x
  32   from io import StringIO
  33
  34 from os.path import exists, isfile, abspath
  35 from os import unlink
  36
  37 _open = open
  38
  39 if sys.version_info >= (3,):
  40     # Open files with universal newline support but no newline translation (3.x)
  41     def open(filename, mode='r'):
  42         return _open(filename, mode, newline='')
  43 else:
  44     # Open files with universal newline support but no newline translation (2.x)
  45     def open(filename, mode='r'):
  46         return _open(filename, mode + 'b')
  47
  48     # Python 3.x has changed iter.next() to be next(iter) instead, so for
  49     # backwards compatibility, we'll just define a next() function under 2.x
  50     def next(iter):
  51         return iter.next()
  52
  53
  54 #------------------------------------------------
  55 # Logging is controlled by "python_patch" logger
  56
  57 debugmode = False
  58
  59 logger = logging.getLogger("python_patch")
  60 loghandler = logging.StreamHandler()
  61 logger.addHandler(loghandler)
  62
  63 debug = logger.debug
  64 info = logger.info
  65 warning = logger.warning
  66
  67 # If called as a library, don't log info/debug messages by default.
  68 logger.setLevel(logging.WARN)
  69
  70 #------------------------------------------------
  71
  72 # constants for patch types
  73
  74 DIFF = PLAIN = "plain"
  75 HG = MERCURIAL = "mercurial"
  76 SVN = SUBVERSION = "svn"
  77
  78
  79 def fromfile(filename):
  80   """ Parse patch file and return Patch() object
  81   """
  82   info("reading patch from file %s" % filename)
  83   fp = open(filename, "r")
  84   patch = Patch(fp)
  85   fp.close()
  86   return patch
  87
  88
  89 def fromstring(s):
  90   """ Parse text string and return Patch() object
  91   """
  92   return Patch( StringIO(s) )
  93
  94
  95
  96 class HunkInfo(object):
  97   """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
  98
  99   def __init__(self):
 100     self.startsrc=None #: line count starts with 1
 101     self.linessrc=None
 102     self.starttgt=None
 103     self.linestgt=None
 104     self.invalid=False
 105     self.text=[]
 106
 107   def copy(self):
 108     return copy.copy(self)
 109
 110 #  def apply(self, estream):
 111 #    """ write hunk data into enumerable stream
 112 #        return strings one by one until hunk is
 113 #        over
 114 #
 115 #        enumerable stream are tuples (lineno, line)
 116 #        where lineno starts with 0
 117 #    """
 118 #    pass
 119
 120
 121
 122 class Patch(object):
 123
 124   def __init__(self, stream=None):
 125
 126     # define Patch data members
 127     # table with a row for every source file
 128
 129     #: list of source filenames
 130     self.source=None
 131     self.target=None
 132     #: list of lists of hunks
 133     self.hunks=None
 134     #: file endings statistics for every hunk
 135     self.hunkends=None
 136     #: headers for each file
 137     self.header=None
 138
 139     #: patch type - one of constants
 140     self.type = None
 141
 142     if stream:
 143       self.parse(stream)
 144
 145   def copy(self):
 146     return copy.copy(self)
 147
 148   def parse(self, stream):
 149     """ parse unified diff """
 150     self.header = []
 151
 152     self.source = []
 153     self.target = []
 154     self.hunks = []
 155     self.hunkends = []
 156
 157     # define possible file regions that will direct the parser flow
 158     headscan  = False # scanning header before the patch body
 159     filenames = False # lines starting with --- and +++
 160
 161     hunkhead = False  # @@ -R +R @@ sequence
 162     hunkbody = False  #
 163     hunkskip = False  # skipping invalid hunk mode
 164
 165     headscan = True
 166     lineends = dict(lf=0, crlf=0, cr=0)
 167     nextfileno = 0
 168     nexthunkno = 0    #: even if index starts with 0 user messages number hunks from 1
 169
 170     # hunkinfo holds parsed values, hunkactual - calculated
 171     hunkinfo = HunkInfo()
 172     hunkactual = dict(linessrc=None, linestgt=None)
 173
 174
 175     fe = enumerate(stream)
 176     for lineno, line in fe:
 177
 178       # read out header
 179       if headscan:
 180         header = ''
 181         try:
 182           while not line.startswith("--- "):
 183             header += line
 184             lineno, line = next(fe)
 185         except StopIteration:
 186             # this is actually a loop exit
 187             continue
 188         self.header.append(header)
 189
 190         headscan = False
 191         # switch to filenames state
 192         filenames = True
 193
 194       # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
 195       if hunkbody:
 196         # process line first
 197         if re.match(r"^[- \+\\]", line):
 198             # gather stats about line endings
 199             if line.endswith("\r\n"):
 200               self.hunkends[nextfileno-1]["crlf"] += 1
 201             elif line.endswith("\n"):
 202               self.hunkends[nextfileno-1]["lf"] += 1
 203             elif line.endswith("\r"):
 204               self.hunkends[nextfileno-1]["cr"] += 1
 205
 206             if line.startswith("-"):
 207               hunkactual["linessrc"] += 1
 208             elif line.startswith("+"):
 209               hunkactual["linestgt"] += 1
 210             elif not line.startswith("\\"):
 211               hunkactual["linessrc"] += 1
 212               hunkactual["linestgt"] += 1
 213             hunkinfo.text.append(line)
 214             # todo: handle \ No newline cases
 215         else:
 216             warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
 217             # add hunk status node
 218             self.hunks[nextfileno-1].append(hunkinfo.copy())
 219             self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
 220             # switch to hunkskip state
 221             hunkbody = False
 222             hunkskip = True
 223
 224         # check exit conditions
 225         if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
 226             warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
 227             # add hunk status node
 228             self.hunks[nextfileno-1].append(hunkinfo.copy())
 229             self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
 230             # switch to hunkskip state
 231             hunkbody = False
 232             hunkskip = True
 233         elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
 234             self.hunks[nextfileno-1].append(hunkinfo.copy())
 235             # switch to hunkskip state
 236             hunkbody = False
 237             hunkskip = True
 238
 239             # detect mixed window/unix line ends
 240             ends = self.hunkends[nextfileno-1]
 241             if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
 242               warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
 243             if debugmode:
 244               debuglines = dict(ends)
 245               debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
 246               debug("crlf: %(crlf)d  lf: %(lf)d  cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
 247
 248       if hunkskip:
 249         match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
 250         if match:
 251           # switch to hunkhead state
 252           hunkskip = False
 253           hunkhead = True
 254         elif line.startswith("--- "):
 255           # switch to filenames state
 256           hunkskip = False
 257           filenames = True
 258           if debugmode and len(self.source) > 0:
 259             debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
 260
 261       if filenames:
 262         if line.startswith("--- "):
 263           if nextfileno in self.source:
 264             warning("skipping invalid patch for %s" % self.source[nextfileno])
 265             del self.source[nextfileno]
 266             # double source filename line is encountered
 267             # attempt to restart from this second line
 268           re_filename = "^--- ([^\t]+)"
 269           match = re.match(re_filename, line)
 270           # todo: support spaces in filenames
 271           if match:
 272             self.source.append(match.group(1).strip())
 273           else:
 274             warning("skipping invalid filename at line %d" % lineno)
 275             # switch back to headscan state
 276             filenames = False
 277             headscan = True
 278         elif not line.startswith("+++ "):
 279           if nextfileno in self.source:
 280             warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
 281             del self.source[nextfileno]
 282           else:
 283             # this should be unreachable
 284             warning("skipping invalid target patch")
 285           filenames = False
 286           headscan = True
 287         else:
 288           if nextfileno in self.target:
 289             warning("skipping invalid patch - double target at line %d" % lineno)
 290             del self.source[nextfileno]
 291             del self.target[nextfileno]
 292             nextfileno -= 1
 293             # double target filename line is encountered
 294             # switch back to headscan state
 295             filenames = False
 296             headscan = True
 297           else:
 298             re_filename = "^\+\+\+ ([^\t]+)"
 299             match = re.match(re_filename, line)
 300             if not match:
 301               warning("skipping invalid patch - no target filename at line %d" % lineno)
 302               # switch back to headscan state
 303               filenames = False
 304               headscan = True
 305             else:
 306               self.target.append(match.group(1).strip())
 307               nextfileno += 1
 308               # switch to hunkhead state
 309               filenames = False
 310               hunkhead = True
 311               nexthunkno = 0
 312               self.hunks.append([])
 313               self.hunkends.append(lineends.copy())
 314               continue
 315
 316       if hunkhead:
 317         match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
 318         if not match:
 319           if nextfileno-1 not in self.hunks:
 320             warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
 321             # switch to headscan state
 322             hunkhead = False
 323             headscan = True
 324             continue
 325           else:
 326             # switch to headscan state
 327             hunkhead = False
 328             headscan = True
 329         else:
 330           hunkinfo.startsrc = int(match.group(1))
 331           hunkinfo.linessrc = 1
 332           if match.group(3): hunkinfo.linessrc = int(match.group(3))
 333           hunkinfo.starttgt = int(match.group(4))
 334           hunkinfo.linestgt = 1
 335           if match.group(6): hunkinfo.linestgt = int(match.group(6))
 336           hunkinfo.invalid = False
 337           hunkinfo.text = []
 338
 339           hunkactual["linessrc"] = hunkactual["linestgt"] = 0
 340
 341           # switch to hunkbody state
 342           hunkhead = False
 343           hunkbody = True
 344           nexthunkno += 1
 345           continue
 346
 347     if not hunkskip:
 348       warning("patch file incomplete - %s" % filename)
 349       # sys.exit(?)
 350     else:
 351       # duplicated message when an eof is reached
 352       if debugmode and len(self.source) > 0:
 353           debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
 354
 355     info("total files: %d  total hunks: %d" % (len(self.source), sum([len(hset) for hset in self.hunks])))
 356
 357
 358   def apply(self):
 359     """ apply parsed patch """
 360
 361     total = len(self.source)
 362     for fileno, filename in enumerate(self.source):
 363
 364       f2patch = filename
 365       if not exists(f2patch):
 366         f2patch = self.target[fileno]
 367         if not exists(f2patch):
 368           warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
 369           continue
 370       if not isfile(f2patch):
 371         warning("not a file - %s" % f2patch)
 372         continue
 373       filename = f2patch
 374
 375       info("processing %d/%d:\t %s" % (fileno+1, total, filename))
 376
 377       # validate before patching
 378       f2fp = open(filename)
 379       hunkno = 0
 380       hunk = self.hunks[fileno][hunkno]
 381       hunkfind = []
 382       hunkreplace = []
 383       validhunks = 0
 384       canpatch = False
 385       for lineno, line in enumerate(f2fp):
 386         if lineno+1 < hunk.startsrc:
 387           continue
 388         elif lineno+1 == hunk.startsrc:
 389           hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
 390           hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
 391           #pprint(hunkreplace)
 392           hunklineno = 0
 393
 394           # todo \ No newline at end of file
 395
 396         # check hunks in source file
 397         if lineno+1 < hunk.startsrc+len(hunkfind)-1:
 398           if line.rstrip("\r\n") == hunkfind[hunklineno]:
 399             hunklineno+=1
 400           else:
 401             debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
 402             # file may be already patched, but we will check other hunks anyway
 403             hunkno += 1
 404             if hunkno < len(self.hunks[fileno]):
 405               hunk = self.hunks[fileno][hunkno]
 406               continue
 407             else:
 408               break
 409
 410         # check if processed line is the last line
 411         if lineno+1 == hunk.startsrc+len(hunkfind)-1:
 412           debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
 413           hunkno+=1
 414           validhunks+=1
 415           if hunkno < len(self.hunks[fileno]):
 416             hunk = self.hunks[fileno][hunkno]
 417           else:
 418             if validhunks == len(self.hunks[fileno]):
 419               # patch file
 420               canpatch = True
 421               break
 422       else:
 423         if hunkno < len(self.hunks[fileno]):
 424           warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
 425
 426       f2fp.close()
 427
 428       if validhunks < len(self.hunks[fileno]):
 429         if self._match_file_hunks(filename, self.hunks[fileno]):
 430           warning("already patched  %s" % filename)
 431         else:
 432           warning("source file is different - %s" % filename)
 433       if canpatch:
 434         backupname = filename+".orig"
 435         if exists(backupname):
 436           warning("can't backup original file to %s - aborting" % backupname)
 437         else:
 438           import shutil
 439           shutil.move(filename, backupname)
 440           if self.write_hunks(backupname, filename, self.hunks[fileno]):
 441             info("successfully patched %s" % filename)
 442             unlink(backupname)
 443           else:
 444             warning("error patching file %s" % filename)
 445             shutil.copy(filename, filename+".invalid")
 446             warning("invalid version is saved to %s" % filename+".invalid")
 447             # todo: proper rejects
 448             shutil.move(backupname, filename)
 449
 450     # todo: check for premature eof
 451
 452
 453   def can_patch(self, filename):
 454     """ Check if specified filename can be patched. Returns None if file can
 455     not be found among source filenames. False if patch can not be applied
 456     clearly. True otherwise.
 457
 458     :returns: True, False or None
 459     """
 460     idx = self._get_file_idx(filename, source=True)
 461     if idx == None:
 462       return None
 463     return self._match_file_hunks(filename, self.hunks[idx])
 464
 465
 466   def _match_file_hunks(self, filepath, hunks):
 467     matched = True
 468     fp = open(abspath(filepath))
 469
 470     class NoMatch(Exception):
 471       pass
 472
 473     lineno = 1
 474     line = fp.readline()
 475     hno = None
 476     try:
 477       for hno, h in enumerate(hunks):
 478         # skip to first line of the hunk
 479         while lineno < h.starttgt:
 480           if not len(line): # eof
 481             debug("check failed - premature eof before hunk: %d" % (hno+1))
 482             raise NoMatch
 483           line = fp.readline()
 484           lineno += 1
 485         for hline in h.text:
 486           if hline.startswith("-"):
 487             continue
 488           if not len(line):
 489             debug("check failed - premature eof on hunk: %d" % (hno+1))
 490             # todo: \ No newline at the end of file
 491             raise NoMatch
 492           if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
 493             debug("file is not patched - failed hunk: %d" % (hno+1))
 494             raise NoMatch
 495           line = fp.readline()
 496           lineno += 1
 497
 498     except NoMatch:
 499       matched = False
 500       # todo: display failed hunk, i.e. expected/found
 501
 502     fp.close()
 503     return matched
 504
 505
 506   def patch_stream(self, instream, hunks):
 507     """ Generator that yields stream patched with hunks iterable
 508
 509         Converts lineends in hunk lines to the best suitable format
 510         autodetected from input
 511     """
 512
 513     # todo: At the moment substituted lineends may not be the same
 514     #       at the start and at the end of patching. Also issue a
 515     #       warning/throw about mixed lineends (is it really needed?)
 516
 517     hunks = iter(hunks)
 518
 519     srclineno = 1
 520
 521     lineends = {'\n':0, '\r\n':0, '\r':0}
 522     def get_line():
 523       """
 524       local utility function - return line from source stream
 525       collecting line end statistics on the way
 526       """
 527       line = instream.readline()
 528         # 'U' mode works only with text files
 529       if line.endswith("\r\n"):
 530         lineends["\r\n"] += 1
 531       elif line.endswith("\n"):
 532         lineends["\n"] += 1
 533       elif line.endswith("\r"):
 534         lineends["\r"] += 1
 535       return line
 536
 537     for hno, h in enumerate(hunks):
 538       debug("hunk %d" % (hno+1))
 539       # skip to line just before hunk starts
 540       while srclineno < h.startsrc:
 541         yield get_line()
 542         srclineno += 1
 543
 544       for hline in h.text:
 545         # todo: check \ No newline at the end of file
 546         if hline.startswith("-") or hline.startswith("\\"):
 547           get_line()
 548           srclineno += 1
 549           continue
 550         else:
 551           if not hline.startswith("+"):
 552             get_line()
 553             srclineno += 1
 554           line2write = hline[1:]
 555           # detect if line ends are consistent in source file
 556           if sum([bool(lineends[x]) for x in lineends]) == 1:
 557             newline = [x for x in lineends if lineends[x] != 0][0]
 558             yield line2write.rstrip("\r\n")+newline
 559           else: # newlines are mixed
 560             yield line2write
 561
 562     for line in instream:
 563       yield line
 564
 565
 566   def write_hunks(self, srcname, tgtname, hunks):
 567     src = open(srcname, "r")
 568     tgt = open(tgtname, "w")
 569
 570     debug("processing target file %s" % tgtname)
 571
 572     tgt.writelines(self.patch_stream(src, hunks))
 573
 574     tgt.close()
 575     src.close()
 576     return True
 577
 578
 579   def _get_file_idx(self, filename, source=None):
 580     """ Detect index of given filename within patch.
 581
 582         :param filename:
 583         :param source: search filename among sources (True),
 584                        targets (False), or both (None)
 585         :returns: int or None
 586     """
 587     filename = abspath(filename)
 588     if source == True or source == None:
 589       for i,fnm in enumerate(self.source):
 590         if filename == abspath(fnm):
 591           return i
 592     if source == False or source == None:
 593       for i,fnm in enumerate(self.target):
 594         if filename == abspath(fnm):
 595           return i
 596
 597
 598
 599
 600 if __name__ == "__main__":
 601   from optparse import OptionParser
 602   from os.path import exists
 603   import sys
 604
 605   opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
 606   opt.add_option("-d", "--debug", action="store_true", dest="debugmode", help="Print debugging messages")
 607   opt.add_option("-q", "--quiet", action="store_true", dest="quiet", help="Only print messages on warning/error")
 608   (options, args) = opt.parse_args()
 609
 610   if not args:
 611     opt.print_version()
 612     opt.print_help()
 613     sys.exit()
 614   debugmode = options.debugmode
 615   patchfile = args[0]
 616   if not exists(patchfile) or not isfile(patchfile):
 617     sys.exit("patch file does not exist - %s" % patchfile)
 618
 619
 620   if debugmode:
 621     loglevel = logging.DEBUG
 622     logformat = "%(levelname)8s %(message)s"
 623   elif options.quiet:
 624     loglevel = logging.WARN
 625     logformat = "%(message)s"
 626   else:
 627     loglevel = logging.INFO
 628     logformat = "%(message)s"
 629   logger.setLevel(loglevel)
 630   loghandler.setFormatter(logging.Formatter(logformat))
 631
 632
 633
 634   patch = fromfile(patchfile)
 635   #pprint(patch)
 636   patch.apply()
 637
 638   # todo: document and test line ends handling logic - patch.py detects proper line-endings
 639   #       for inserted hunks and issues a warning if patched file has incosistent line ends