imgdiff/unified.py

   1 '''This module contains parser which understand unified diff result'''
   2 import os
   3 import re
   4 import sys
   5
   6
   7 class LookAhead(object):
   8     '''Iterable but can also push back'''
   9     def __init__(self, iterable):
  10         self.iterable = iterable
  11         self.stack = []
  12
  13     def push_back(self, token):
  14         "push token back to this iterable"
  15         self.stack.append(token)
  16
  17     def next(self):
  18         "next token"
  19         if self.stack:
  20             return self.stack.pop()
  21         return self.iterable.next()
  22
  23     def __iter__(self):
  24         "iterable"
  25         return self
  26
  27
  28 class MessageParser(object):
  29     '''Message in diff result. This class is a abstract class. All its
  30     children should implement its interface:
  31
  32     Attr: self.PATTERN
  33     Method: parse(self, line, match)
  34     '''
  35
  36     # it should be implemented by subclasses
  37     PATTERN = None
  38
  39     def parse(self, line, mres):
  40         "it should be implemented by subclass"
  41         raise NotImplementedError
  42
  43     def match(self, line):
  44         '''determine whether the line is a message'''
  45         mres = self.PATTERN.match(line)
  46         return self.parse(line, mres) if mres else None
  47
  48
  49 class OnlyInOneSide(MessageParser):
  50     '''Message like this:
  51     Only in img2/root/home/tizen: .bash_profile
  52     '''
  53
  54     PATTERN = re.compile(r'Only in (.*?): (.*)')
  55
  56     def parse(self, line, match):
  57         '''Return the concrete message'''
  58         side = 'left' if match.group(1).startswith('img1/') else 'right'
  59         filename = os.path.join(match.group(1), match.group(2))
  60         return {
  61             'type': 'message',
  62             'filetype': 'Only in %s side' % side,
  63             'message': line[:-1],
  64             'filename': filename,
  65             'side': side,
  66             }
  67
  68
  69 class SpecialFile(MessageParser):
  70     '''Message like this:
  71     File img1/partx/p2/dev/full is a character special file while file
  72     img2/partx/p2/dev/full is a character special file
  73     '''
  74
  75     PATTERN = re.compile(r'File (.*?) is a (.*) while file (.*?) is a (.*)')
  76
  77     def parse(self, line, match):
  78         '''Return the concrete message'''
  79         fromfile, tofile = match.group(1), match.group(3)
  80         return {
  81             'type': 'message',
  82             'filetype': match.group(2),
  83             'message': line[:-1],  # strip the last \n
  84             'fromfile': fromfile,
  85             'tofile': tofile,
  86             'filename': fromfile,
  87             }
  88
  89
  90 class BinaryFile(MessageParser):
  91     '''Message like this:
  92     Binary files img1/partx/p2/var/lib/random-seed and
  93     img2/partx/p2/var/lib/random-seed differ
  94     '''
  95
  96     PATTERN = re.compile(r'Binary files (.*?) and (.*?) differ')
  97
  98     def parse(self, line, match):
  99         '''Return the concrete message'''
 100         fromfile, tofile = match.group(1), match.group(2)
 101         return {
 102             'type': 'message',
 103             'filetype': 'Binary files',
 104             'message': line[:-1],  # strip the last \n
 105             'fromfile': fromfile,
 106             'tofile': tofile,
 107             'filename': fromfile,
 108             }
 109
 110
 111 MESSAGE_PARSERS = [obj() for name, obj in globals().items()
 112                    if hasattr(obj, '__bases__') and
 113                    MessageParser in obj.__bases__]
 114
 115
 116 class Message(dict):
 117     """
 118     Message that file can't be compare, such as binary, device files
 119     """
 120
 121     @classmethod
 122     def parse(cls, stream):
 123         "Parse message text into dict"
 124         line = stream.next()
 125         for parser in MESSAGE_PARSERS:
 126             data = parser.match(line)
 127             if data:
 128                 return cls(data)
 129         stream.push_back(line)
 130
 131     def __str__(self):
 132         "to message text"
 133         return self['message']
 134
 135
 136 class OneFileDiff(dict):
 137     """
 138     Diff result for one same file name in two sides
 139     """
 140
 141     @classmethod
 142     def parse(cls, stream):
 143         '''Parse a patch which should contains following parts:
 144         Start line
 145         Two lines header
 146         Serveral sections which of each is consist of:
 147             Range: start and count
 148             Hunks: context and different text
 149
 150         Example:
 151         diff -r -u /home/xxx/tmp/images/img1/partition_table.txt
 152             /home/xxx/tmp/images/img2/partition_table.txt
 153         --- img1/partition_tab.txt      2013-10-28 11:05:11.814220566 +0800
 154         +++ img2/partition_tab.txt      2013-10-28 11:05:14.954220642 +0800
 155         @@ -1,5 +1,5 @@
 156          Model:  (file)
 157         -Disk /home/xxx/tmp/images/192.raw: 3998237696B
 158         +Disk /home/xxx/tmp/images/20.raw: 3998237696B
 159          Sector size (logical/physical): 512B/512B
 160          Partition Table: gpt
 161         '''
 162         line = stream.next()
 163         if not line.startswith('diff '):
 164             stream.push_back(line)
 165             return
 166
 167         startline = line[:-1]
 168         cols = ('path', 'date', 'time', 'timezone')
 169
 170         def parse_header(line):
 171             '''header'''
 172             return dict(zip(cols, line.rstrip().split()[1:]))
 173
 174         fromfile = parse_header(stream.next())
 175         tofile = parse_header(stream.next())
 176         sections = cls._parse_sections(stream)
 177         return cls({
 178             'type': 'onefilediff',
 179             'startline': startline,
 180             'sections': sections,
 181             'fromfile': fromfile,
 182             'tofile': tofile,
 183             'filename': fromfile['path'],
 184             })
 185
 186     def __str__(self):
 187         "back to unified format"
 188         header = '%(path)s\t%(date)s %(time)s %(timezone)s'
 189         fromfile = '--- ' + (header % self['fromfile'])
 190         tofile = '+++ ' + (header % self['tofile'])
 191         sections = []
 192
 193         def start_count(start, count):
 194             "make start count string"
 195             return str(start) if count <= 1 else '%d,%d' % (start, count)
 196
 197         for i in self['sections']:
 198             sec = ['@@ -%s +%s @@' %
 199                    (start_count(*i['range']['delete']),
 200                     start_count(*i['range']['insert']))
 201                    ]
 202             for j in i['hunks']:
 203                 typ, txt = j['type'], j['text']
 204                 if typ == 'context':
 205                     sec.append(' ' + txt)
 206                 elif typ == 'delete':
 207                     sec.append('-' + txt)
 208                 elif typ == 'insert':
 209                     sec.append('+' + txt)
 210                 elif typ == 'no_newline_at_eof':
 211                     sec.append('\\' + txt)
 212                 else:
 213                     sec.append(txt)
 214             sections.append('\n'.join(sec))
 215         return '\n'.join([self['startline'],
 216                           fromfile,
 217                           tofile,
 218                           '\n'.join(sections),
 219                           ])
 220
 221     @classmethod
 222     def _parse_sections(cls, stream):
 223         '''Range and Hunks'''
 224         sections = []
 225         for line in stream:
 226             if not line.startswith('@@ '):
 227                 stream.push_back(line)
 228                 return sections
 229
 230             range_ = cls._parse_range(line)
 231             hunks = cls._parse_hunks(stream)
 232             sections.append({'range': range_,
 233                              'hunks': hunks,
 234                              })
 235         return sections
 236
 237     @classmethod
 238     def _parse_range(cls, line):
 239         '''Start and Count'''
 240         def parse_start_count(chars):
 241             '''Count ommit when it's 1'''
 242             start, count = (chars[1:] + ',1').split(',')[:2]
 243             return int(start), int(count)
 244
 245         _, delete, insert, _ = line.split()
 246         return {
 247             'delete': parse_start_count(delete),
 248             'insert': parse_start_count(insert),
 249             }
 250
 251     @classmethod
 252     def _parse_hunks(cls, stream):
 253         '''Hunks'''
 254         hunks = []
 255         for line in stream:
 256             if line.startswith(' '):
 257                 type_ = 'context'
 258             elif line.startswith('-'):
 259                 type_ = 'delete'
 260             elif line.startswith('+'):
 261                 type_ = 'insert'
 262             elif line.startswith('\\ No newline at end of file'):
 263                 type_ = 'no_newline_at_eof'
 264             else:
 265                 stream.push_back(line)
 266                 break
 267             text = line[1:-1]  # remove the last \n
 268             hunks.append({'type': type_, 'text': text})
 269         return hunks
 270
 271
 272 def parse(stream):
 273     '''
 274     Unified diff result parser
 275     Reference: http://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified  # flake8: noqa
 276
 277     '''
 278     stream = LookAhead(stream)
 279     while 1:
 280         try:
 281             one = Message.parse(stream) or \
 282                 OneFileDiff.parse(stream)
 283         except StopIteration:
 284             break
 285
 286         if one:
 287             yield one
 288             continue
 289
 290         try:
 291             line = stream.next()
 292         except StopIteration:
 293             # one equals None means steam hasn't stop but no one can
 294             # understand the input. If we are here there must be bug
 295             # in previous parsing logic
 296             raise Exception('Unknown error in parsing diff output')
 297         else:
 298             print >> sys.stderr, '[WARN] Unknown diff output:', line,