1 """ Patch utility to apply unified diffs
3 Brute-force line-by-line non-recursive parsing
5 Copyright (c) 2008-2010 anatoly techtonik
6 Available under the terms of MIT license
8 NOTE: This version has been patched by Alex Stewart <alex@foogod.com> for
9 Python 3.x support and other misc fixups.
11 Project home: http://code.google.com/p/python-patch/
14 $Id: patch.py 92 2010-07-02 06:04:57Z techtonik $
15 $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
18 __author__ = "techtonik.rainforce.org"
19 __version__ = "10.04-2.pAS1"
24 from logging import debug, info, warning
28 # cStringIO doesn't support unicode in 2.5
29 from StringIO import StringIO
31 # StringIO has been renamed to 'io' in 3.x
32 from io import StringIO
34 from os.path import exists, isfile, abspath
39 if sys.version_info >= (3,):
40 # Open files with universal newline support but no newline translation (3.x)
41 def open(filename, mode='r'):
42 return _open(filename, mode, newline='')
44 # Open files with universal newline support but no newline translation (2.x)
45 def open(filename, mode='r'):
46 return _open(filename, mode + 'b')
48 # Python 3.x has changed iter.next() to be next(iter) instead, so for
49 # backwards compatibility, we'll just define a next() function under 2.x
54 #------------------------------------------------
55 # Logging is controlled by "python_patch" logger
59 logger = logging.getLogger("python_patch")
60 loghandler = logging.StreamHandler()
61 logger.addHandler(loghandler)
65 warning = logger.warning
67 # If called as a library, don't log info/debug messages by default.
68 logger.setLevel(logging.WARN)
70 #------------------------------------------------
72 # constants for patch types
74 DIFF = PLAIN = "plain"
75 HG = MERCURIAL = "mercurial"
76 SVN = SUBVERSION = "svn"
79 def fromfile(filename):
80 """ Parse patch file and return Patch() object
82 info("reading patch from file %s" % filename)
83 fp = open(filename, "r")
90 """ Parse text string and return Patch() object
92 return Patch( StringIO(s) )
96 class HunkInfo(object):
97 """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
100 self.startsrc=None #: line count starts with 1
108 return copy.copy(self)
110 # def apply(self, estream):
111 # """ write hunk data into enumerable stream
112 # return strings one by one until hunk is
115 # enumerable stream are tuples (lineno, line)
116 # where lineno starts with 0
124 def __init__(self, stream=None):
126 # define Patch data members
127 # table with a row for every source file
129 #: list of source filenames
132 #: list of lists of hunks
134 #: file endings statistics for every hunk
136 #: headers for each file
139 #: patch type - one of constants
146 return copy.copy(self)
148 def parse(self, stream):
149 """ parse unified diff """
157 # define possible file regions that will direct the parser flow
158 headscan = False # scanning header before the patch body
159 filenames = False # lines starting with --- and +++
161 hunkhead = False # @@ -R +R @@ sequence
163 hunkskip = False # skipping invalid hunk mode
166 lineends = dict(lf=0, crlf=0, cr=0)
168 nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
170 # hunkinfo holds parsed values, hunkactual - calculated
171 hunkinfo = HunkInfo()
172 hunkactual = dict(linessrc=None, linestgt=None)
175 fe = enumerate(stream)
176 for lineno, line in fe:
182 while not line.startswith("--- "):
184 lineno, line = next(fe)
185 except StopIteration:
186 # this is actually a loop exit
188 self.header.append(header)
191 # switch to filenames state
194 # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
197 if re.match(r"^[- \+\\]", line):
198 # gather stats about line endings
199 if line.endswith("\r\n"):
200 self.hunkends[nextfileno-1]["crlf"] += 1
201 elif line.endswith("\n"):
202 self.hunkends[nextfileno-1]["lf"] += 1
203 elif line.endswith("\r"):
204 self.hunkends[nextfileno-1]["cr"] += 1
206 if line.startswith("-"):
207 hunkactual["linessrc"] += 1
208 elif line.startswith("+"):
209 hunkactual["linestgt"] += 1
210 elif not line.startswith("\\"):
211 hunkactual["linessrc"] += 1
212 hunkactual["linestgt"] += 1
213 hunkinfo.text.append(line)
214 # todo: handle \ No newline cases
216 warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
217 # add hunk status node
218 self.hunks[nextfileno-1].append(hunkinfo.copy())
219 self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
220 # switch to hunkskip state
224 # check exit conditions
225 if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
226 warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
227 # add hunk status node
228 self.hunks[nextfileno-1].append(hunkinfo.copy())
229 self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
230 # switch to hunkskip state
233 elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
234 self.hunks[nextfileno-1].append(hunkinfo.copy())
235 # switch to hunkskip state
239 # detect mixed window/unix line ends
240 ends = self.hunkends[nextfileno-1]
241 if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
242 warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
244 debuglines = dict(ends)
245 debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
246 debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
249 match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
251 # switch to hunkhead state
254 elif line.startswith("--- "):
255 # switch to filenames state
258 if debugmode and len(self.source) > 0:
259 debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
262 if line.startswith("--- "):
263 if nextfileno in self.source:
264 warning("skipping invalid patch for %s" % self.source[nextfileno])
265 del self.source[nextfileno]
266 # double source filename line is encountered
267 # attempt to restart from this second line
268 re_filename = "^--- ([^\t]+)"
269 match = re.match(re_filename, line)
270 # todo: support spaces in filenames
272 self.source.append(match.group(1).strip())
274 warning("skipping invalid filename at line %d" % lineno)
275 # switch back to headscan state
278 elif not line.startswith("+++ "):
279 if nextfileno in self.source:
280 warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
281 del self.source[nextfileno]
283 # this should be unreachable
284 warning("skipping invalid target patch")
288 if nextfileno in self.target:
289 warning("skipping invalid patch - double target at line %d" % lineno)
290 del self.source[nextfileno]
291 del self.target[nextfileno]
293 # double target filename line is encountered
294 # switch back to headscan state
298 re_filename = "^\+\+\+ ([^\t]+)"
299 match = re.match(re_filename, line)
301 warning("skipping invalid patch - no target filename at line %d" % lineno)
302 # switch back to headscan state
306 self.target.append(match.group(1).strip())
308 # switch to hunkhead state
312 self.hunks.append([])
313 self.hunkends.append(lineends.copy())
317 match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
319 if nextfileno-1 not in self.hunks:
320 warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
321 # switch to headscan state
326 # switch to headscan state
330 hunkinfo.startsrc = int(match.group(1))
331 hunkinfo.linessrc = 1
332 if match.group(3): hunkinfo.linessrc = int(match.group(3))
333 hunkinfo.starttgt = int(match.group(4))
334 hunkinfo.linestgt = 1
335 if match.group(6): hunkinfo.linestgt = int(match.group(6))
336 hunkinfo.invalid = False
339 hunkactual["linessrc"] = hunkactual["linestgt"] = 0
341 # switch to hunkbody state
348 warning("patch file incomplete - %s" % filename)
351 # duplicated message when an eof is reached
352 if debugmode and len(self.source) > 0:
353 debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
355 info("total files: %d total hunks: %d" % (len(self.source), sum([len(hset) for hset in self.hunks])))
359 """ apply parsed patch """
361 total = len(self.source)
362 for fileno, filename in enumerate(self.source):
365 if not exists(f2patch):
366 f2patch = self.target[fileno]
367 if not exists(f2patch):
368 warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
370 if not isfile(f2patch):
371 warning("not a file - %s" % f2patch)
375 info("processing %d/%d:\t %s" % (fileno+1, total, filename))
377 # validate before patching
378 f2fp = open(filename)
380 hunk = self.hunks[fileno][hunkno]
385 for lineno, line in enumerate(f2fp):
386 if lineno+1 < hunk.startsrc:
388 elif lineno+1 == hunk.startsrc:
389 hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
390 hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
394 # todo \ No newline at end of file
396 # check hunks in source file
397 if lineno+1 < hunk.startsrc+len(hunkfind)-1:
398 if line.rstrip("\r\n") == hunkfind[hunklineno]:
401 debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
402 # file may be already patched, but we will check other hunks anyway
404 if hunkno < len(self.hunks[fileno]):
405 hunk = self.hunks[fileno][hunkno]
410 # check if processed line is the last line
411 if lineno+1 == hunk.startsrc+len(hunkfind)-1:
412 debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
415 if hunkno < len(self.hunks[fileno]):
416 hunk = self.hunks[fileno][hunkno]
418 if validhunks == len(self.hunks[fileno]):
423 if hunkno < len(self.hunks[fileno]):
424 warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
428 if validhunks < len(self.hunks[fileno]):
429 if self._match_file_hunks(filename, self.hunks[fileno]):
430 warning("already patched %s" % filename)
432 warning("source file is different - %s" % filename)
434 backupname = filename+".orig"
435 if exists(backupname):
436 warning("can't backup original file to %s - aborting" % backupname)
439 shutil.move(filename, backupname)
440 if self.write_hunks(backupname, filename, self.hunks[fileno]):
441 info("successfully patched %s" % filename)
444 warning("error patching file %s" % filename)
445 shutil.copy(filename, filename+".invalid")
446 warning("invalid version is saved to %s" % filename+".invalid")
447 # todo: proper rejects
448 shutil.move(backupname, filename)
450 # todo: check for premature eof
453 def can_patch(self, filename):
454 """ Check if specified filename can be patched. Returns None if file can
455 not be found among source filenames. False if patch can not be applied
456 clearly. True otherwise.
458 :returns: True, False or None
460 idx = self._get_file_idx(filename, source=True)
463 return self._match_file_hunks(filename, self.hunks[idx])
466 def _match_file_hunks(self, filepath, hunks):
468 fp = open(abspath(filepath))
470 class NoMatch(Exception):
477 for hno, h in enumerate(hunks):
478 # skip to first line of the hunk
479 while lineno < h.starttgt:
480 if not len(line): # eof
481 debug("check failed - premature eof before hunk: %d" % (hno+1))
486 if hline.startswith("-"):
489 debug("check failed - premature eof on hunk: %d" % (hno+1))
490 # todo: \ No newline at the end of file
492 if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
493 debug("file is not patched - failed hunk: %d" % (hno+1))
500 # todo: display failed hunk, i.e. expected/found
506 def patch_stream(self, instream, hunks):
507 """ Generator that yields stream patched with hunks iterable
509 Converts lineends in hunk lines to the best suitable format
510 autodetected from input
513 # todo: At the moment substituted lineends may not be the same
514 # at the start and at the end of patching. Also issue a
515 # warning/throw about mixed lineends (is it really needed?)
521 lineends = {'\n':0, '\r\n':0, '\r':0}
524 local utility function - return line from source stream
525 collecting line end statistics on the way
527 line = instream.readline()
528 # 'U' mode works only with text files
529 if line.endswith("\r\n"):
530 lineends["\r\n"] += 1
531 elif line.endswith("\n"):
533 elif line.endswith("\r"):
537 for hno, h in enumerate(hunks):
538 debug("hunk %d" % (hno+1))
539 # skip to line just before hunk starts
540 while srclineno < h.startsrc:
545 # todo: check \ No newline at the end of file
546 if hline.startswith("-") or hline.startswith("\\"):
551 if not hline.startswith("+"):
554 line2write = hline[1:]
555 # detect if line ends are consistent in source file
556 if sum([bool(lineends[x]) for x in lineends]) == 1:
557 newline = [x for x in lineends if lineends[x] != 0][0]
558 yield line2write.rstrip("\r\n")+newline
559 else: # newlines are mixed
562 for line in instream:
566 def write_hunks(self, srcname, tgtname, hunks):
567 src = open(srcname, "r")
568 tgt = open(tgtname, "w")
570 debug("processing target file %s" % tgtname)
572 tgt.writelines(self.patch_stream(src, hunks))
579 def _get_file_idx(self, filename, source=None):
580 """ Detect index of given filename within patch.
583 :param source: search filename among sources (True),
584 targets (False), or both (None)
585 :returns: int or None
587 filename = abspath(filename)
588 if source == True or source == None:
589 for i,fnm in enumerate(self.source):
590 if filename == abspath(fnm):
592 if source == False or source == None:
593 for i,fnm in enumerate(self.target):
594 if filename == abspath(fnm):
600 if __name__ == "__main__":
601 from optparse import OptionParser
602 from os.path import exists
605 opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
606 opt.add_option("-d", "--debug", action="store_true", dest="debugmode", help="Print debugging messages")
607 opt.add_option("-q", "--quiet", action="store_true", dest="quiet", help="Only print messages on warning/error")
608 (options, args) = opt.parse_args()
614 debugmode = options.debugmode
616 if not exists(patchfile) or not isfile(patchfile):
617 sys.exit("patch file does not exist - %s" % patchfile)
621 loglevel = logging.DEBUG
622 logformat = "%(levelname)8s %(message)s"
624 loglevel = logging.WARN
625 logformat = "%(message)s"
627 loglevel = logging.INFO
628 logformat = "%(message)s"
629 logger.setLevel(loglevel)
630 loghandler.setFormatter(logging.Formatter(logformat))
634 patch = fromfile(patchfile)
638 # todo: document and test line ends handling logic - patch.py detects proper line-endings
639 # for inserted hunks and issues a warning if patched file has incosistent line ends