1 '''This module contains parser which understand unified diff result'''
7 class LookAhead(object):
8 '''Iterable but can also push back'''
9 def __init__(self, iterable):
10 self.iterable = iterable
13 def push_back(self, token):
14 "push token back to this iterable"
15 self.stack.append(token)
20 return self.stack.pop()
21 return self.iterable.next()
28 class MessageParser(object):
29 '''Message in diff result. This class is a abstract class. All its
30 children should implement its interface:
33 Method: parse(self, line, match)
36 # it should be implemented by subclasses
39 def parse(self, line, mres):
40 "it should be implemented by subclass"
41 raise NotImplementedError
43 def match(self, line):
44 '''determine whether the line is a message'''
45 mres = self.PATTERN.match(line)
46 return self.parse(line, mres) if mres else None
49 class OnlyInOneSide(MessageParser):
51 Only in img2/root/home/tizen: .bash_profile
54 PATTERN = re.compile(r'Only in (.*?): (.*)')
56 def parse(self, line, match):
57 '''Return the concrete message'''
58 side = 'left' if match.group(1).startswith('img1/') else 'right'
59 filename = os.path.join(match.group(1), match.group(2))
62 'filetype': 'Only in %s side' % side,
69 class SpecialFile(MessageParser):
71 File img1/partx/p2/dev/full is a character special file while file
72 img2/partx/p2/dev/full is a character special file
75 PATTERN = re.compile(r'File (.*?) is a (.*) while file (.*?) is a (.*)')
77 def parse(self, line, match):
78 '''Return the concrete message'''
79 fromfile, tofile = match.group(1), match.group(3)
82 'filetype': match.group(2),
83 'message': line[:-1], # strip the last \n
90 class BinaryFile(MessageParser):
92 Binary files img1/partx/p2/var/lib/random-seed and
93 img2/partx/p2/var/lib/random-seed differ
96 PATTERN = re.compile(r'Binary files (.*?) and (.*?) differ')
98 def parse(self, line, match):
99 '''Return the concrete message'''
100 fromfile, tofile = match.group(1), match.group(2)
103 'filetype': 'Binary files',
104 'message': line[:-1], # strip the last \n
105 'fromfile': fromfile,
107 'filename': fromfile,
111 MESSAGE_PARSERS = [obj() for name, obj in globals().items()
112 if hasattr(obj, '__bases__') and
113 MessageParser in obj.__bases__]
118 Message that file can't be compare, such as binary, device files
122 def parse(cls, stream):
123 "Parse message text into dict"
125 for parser in MESSAGE_PARSERS:
126 data = parser.match(line)
129 stream.push_back(line)
133 return self['message']
136 class OneFileDiff(dict):
138 Diff result for one same file name in two sides
142 def parse(cls, stream):
143 '''Parse a patch which should contains following parts:
146 Serveral sections which of each is consist of:
147 Range: start and count
148 Hunks: context and different text
151 diff -r -u /home/xxx/tmp/images/img1/partition_table.txt
152 /home/xxx/tmp/images/img2/partition_table.txt
153 --- img1/partition_tab.txt 2013-10-28 11:05:11.814220566 +0800
154 +++ img2/partition_tab.txt 2013-10-28 11:05:14.954220642 +0800
157 -Disk /home/xxx/tmp/images/192.raw: 3998237696B
158 +Disk /home/xxx/tmp/images/20.raw: 3998237696B
159 Sector size (logical/physical): 512B/512B
163 if not line.startswith('diff '):
164 stream.push_back(line)
167 startline = line[:-1]
168 cols = ('path', 'date', 'time', 'timezone')
170 def parse_header(line):
172 return dict(zip(cols, line.rstrip().split()[1:]))
174 fromfile = parse_header(stream.next())
175 tofile = parse_header(stream.next())
176 sections = cls._parse_sections(stream)
178 'type': 'onefilediff',
179 'startline': startline,
180 'sections': sections,
181 'fromfile': fromfile,
183 'filename': fromfile['path'],
187 "back to unified format"
188 header = '%(path)s\t%(date)s %(time)s %(timezone)s'
189 fromfile = '--- ' + (header % self['fromfile'])
190 tofile = '+++ ' + (header % self['tofile'])
193 def start_count(start, count):
194 "make start count string"
195 return str(start) if count <= 1 else '%d,%d' % (start, count)
197 for i in self['sections']:
198 sec = ['@@ -%s +%s @@' %
199 (start_count(*i['range']['delete']),
200 start_count(*i['range']['insert']))
203 typ, txt = j['type'], j['text']
205 sec.append(' ' + txt)
206 elif typ == 'delete':
207 sec.append('-' + txt)
208 elif typ == 'insert':
209 sec.append('+' + txt)
210 elif typ == 'no_newline_at_eof':
211 sec.append('\\' + txt)
214 sections.append('\n'.join(sec))
215 return '\n'.join([self['startline'],
222 def _parse_sections(cls, stream):
223 '''Range and Hunks'''
226 if not line.startswith('@@ '):
227 stream.push_back(line)
230 range_ = cls._parse_range(line)
231 hunks = cls._parse_hunks(stream)
232 sections.append({'range': range_,
238 def _parse_range(cls, line):
239 '''Start and Count'''
240 def parse_start_count(chars):
241 '''Count ommit when it's 1'''
242 start, count = (chars[1:] + ',1').split(',')[:2]
243 return int(start), int(count)
245 _, delete, insert, _ = line.split()
247 'delete': parse_start_count(delete),
248 'insert': parse_start_count(insert),
252 def _parse_hunks(cls, stream):
256 if line.startswith(' '):
258 elif line.startswith('-'):
260 elif line.startswith('+'):
262 elif line.startswith('\\ No newline at end of file'):
263 type_ = 'no_newline_at_eof'
265 stream.push_back(line)
267 text = line[1:-1] # remove the last \n
268 hunks.append({'type': type_, 'text': text})
274 Unified diff result parser
275 Reference: http://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified # flake8: noqa
278 stream = LookAhead(stream)
281 one = Message.parse(stream) or \
282 OneFileDiff.parse(stream)
283 except StopIteration:
292 except StopIteration:
293 # one equals None means steam hasn't stop but no one can
294 # understand the input. If we are here there must be bug
295 # in previous parsing logic
296 raise Exception('Unknown error in parsing diff output')
298 print >> sys.stderr, '[WARN] Unknown diff output:', line,