giscanner/annotationparser.py

   1 # -*- Mode: Python -*-
   2 # GObject-Introspection - a framework for introspecting GObject libraries
   3 # Copyright (C) 2008-2010 Johan Dahlin
   4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20 #
  21
  22
  23 # AnnotationParser - extract annotations from GTK-Doc comment blocks
  24
  25
  26 import re
  27
  28 from . import message
  29 from .odict import odict
  30
  31
  32 # GTK-Doc comment block parts
  33 PART_IDENTIFIER = 'identifier'
  34 PART_PARAMETERS = 'parameters'
  35 PART_DESCRIPTION = 'description'
  36 PART_TAGS = 'tags'
  37
  38 # Identifiers
  39 IDENTIFIER_SECTION = 'section'
  40 IDENTIFIER_SYMBOL = 'symbol'
  41 IDENTIFIER_PROPERTY = 'property'
  42 IDENTIFIER_SIGNAL = 'signal'
  43
  44 # Tags - annotations applied to comment blocks
  45 TAG_VFUNC = 'virtual'
  46 TAG_SINCE = 'since'
  47 TAG_STABILITY = 'stability'
  48 TAG_DEPRECATED = 'deprecated'
  49 TAG_RETURNS = 'returns'
  50 TAG_RETURNVALUE = 'return value'
  51 TAG_DESCRIPTION = 'description'
  52 TAG_ATTRIBUTES = 'attributes'
  53 TAG_RENAME_TO = 'rename to'
  54 TAG_TYPE = 'type'
  55 TAG_UNREF_FUNC = 'unref func'
  56 TAG_REF_FUNC = 'ref func'
  57 TAG_SET_VALUE_FUNC = 'set value func'
  58 TAG_GET_VALUE_FUNC = 'get value func'
  59 TAG_TRANSFER = 'transfer'
  60 TAG_VALUE = 'value'
  61 _ALL_TAGS = [TAG_VFUNC,
  62              TAG_SINCE,
  63              TAG_STABILITY,
  64              TAG_DEPRECATED,
  65              TAG_RETURNS,
  66              TAG_RETURNVALUE,
  67              TAG_DESCRIPTION,
  68              TAG_ATTRIBUTES,
  69              TAG_RENAME_TO,
  70              TAG_TYPE,
  71              TAG_UNREF_FUNC,
  72              TAG_REF_FUNC,
  73              TAG_SET_VALUE_FUNC,
  74              TAG_GET_VALUE_FUNC,
  75              TAG_TRANSFER,
  76              TAG_VALUE]
  77
  78 # Options - annotations for parameters and return values
  79 OPT_ALLOW_NONE = 'allow-none'
  80 OPT_ARRAY = 'array'
  81 OPT_ATTRIBUTE = 'attribute'
  82 OPT_CLOSURE = 'closure'
  83 OPT_DESTROY = 'destroy'
  84 OPT_ELEMENT_TYPE = 'element-type'
  85 OPT_FOREIGN = 'foreign'
  86 OPT_IN = 'in'
  87 OPT_INOUT = 'inout'
  88 OPT_INOUT_ALT = 'in-out'
  89 OPT_OUT = 'out'
  90 OPT_SCOPE = 'scope'
  91 OPT_TRANSFER = 'transfer'
  92 OPT_TYPE = 'type'
  93 OPT_SKIP = 'skip'
  94 OPT_CONSTRUCTOR = 'constructor'
  95 OPT_METHOD = 'method'
  96
  97 ALL_OPTIONS = [
  98     OPT_ALLOW_NONE,
  99     OPT_ARRAY,
 100     OPT_ATTRIBUTE,
 101     OPT_CLOSURE,
 102     OPT_DESTROY,
 103     OPT_ELEMENT_TYPE,
 104     OPT_FOREIGN,
 105     OPT_IN,
 106     OPT_INOUT,
 107     OPT_INOUT_ALT,
 108     OPT_OUT,
 109     OPT_SCOPE,
 110     OPT_TRANSFER,
 111     OPT_TYPE,
 112     OPT_SKIP,
 113     OPT_CONSTRUCTOR,
 114     OPT_METHOD]
 115
 116 # Array options - array specific annotations
 117 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
 118 OPT_ARRAY_LENGTH = 'length'
 119 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
 120
 121 # Out options
 122 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
 123 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
 124
 125 # Scope options
 126 OPT_SCOPE_ASYNC = 'async'
 127 OPT_SCOPE_CALL = 'call'
 128 OPT_SCOPE_NOTIFIED = 'notified'
 129
 130 # Transfer options
 131 OPT_TRANSFER_NONE = 'none'
 132 OPT_TRANSFER_CONTAINER = 'container'
 133 OPT_TRANSFER_FULL = 'full'
 134 OPT_TRANSFER_FLOATING = 'floating'
 135
 136
 137 #The following regular expression programs are built to:
 138 # - match (or substitute) a single comment block line at a time;
 139 # - support (but remains untested) LOCALE and UNICODE modes.
 140
 141 # Program matching the start of a comment block.
 142 #
 143 # Results in 0 symbolic groups.
 144 COMMENT_START_RE = re.compile(r'''
 145     ^                                        # start
 146     [^\S\n\r]*                               # 0 or more whitespace characters
 147     /                                        # 1 forward slash character
 148     \*{2}                                    # exactly 2 asterisk characters
 149     [^\S\n\r]*                               # 0 or more whitespace characters
 150     $                                        # end
 151     ''',
 152     re.VERBOSE)
 153
 154 # Program matching the end of a comment block. We need to take care
 155 # of comment ends that aren't on their own line for legacy support
 156 # reasons. See https://bugzilla.gnome.org/show_bug.cgi?id=689354
 157 #
 158 # Results in 1 symbolic group:
 159 #    - group 1 = description
 160 COMMENT_END_RE = re.compile(r'''
 161     ^                                        # start
 162     [^\S\n\r]*                               # 0 or more whitespace characters
 163     (?P<description>.*?)                     # description text
 164     [^\S\n\r]*                               # 0 or more whitespace characters
 165     \*+                                      # 1 or more asterisk characters
 166     /                                        # 1 forward slash character
 167     [^\S\n\r]*                               # 0 or more whitespace characters
 168     $                                        # end
 169     ''',
 170     re.VERBOSE)
 171
 172 # Program matching the ' * ' at the beginning of every
 173 # line inside a comment block.
 174 #
 175 # Results in 0 symbolic groups.
 176 COMMENT_ASTERISK_RE = re.compile(r'''
 177     ^                                        # start
 178     [^\S\n\r]*                               # 0 or more whitespace characters
 179     \*                                       # 1 asterisk character
 180     [^\S\n\r]?                               # 0 or 1 whitespace characters. Careful,
 181                                              # removing more than 1 whitespace
 182                                              # character would break embedded
 183                                              # example program indentation
 184     ''',
 185     re.VERBOSE)
 186
 187 # Program matching the indentation at the beginning of every
 188 # line (stripped from the ' * ') inside a comment block.
 189 #
 190 # Results in 1 symbolic group:
 191 #   - group 1 = indentation
 192 COMMENT_INDENTATION_RE = re.compile(r'''
 193     ^
 194     (?P<indentation>[^\S\n\r]*)              # 0 or more whitespace characters
 195     .*
 196     $
 197     ''',
 198     re.VERBOSE)
 199
 200 # Program matching an empty line.
 201 #
 202 # Results in 0 symbolic groups.
 203 EMPTY_LINE_RE = re.compile(r'''
 204     ^                                        # start
 205     [^\S\n\r]*                               # 0 or more whitespace characters
 206     $                                        # end
 207     ''',
 208     re.VERBOSE)
 209
 210 # Program matching SECTION identifiers.
 211 #
 212 # Results in 2 symbolic groups:
 213 #   - group 1 = colon
 214 #   - group 2 = section_name
 215 SECTION_RE = re.compile(r'''
 216     ^                                        # start
 217     [^\S\n\r]*                               # 0 or more whitespace characters
 218     SECTION                                  # SECTION
 219     [^\S\n\r]*                               # 0 or more whitespace characters
 220     (?P<colon>:?)                            # colon
 221     [^\S\n\r]*                               # 0 or more whitespace characters
 222     (?P<section_name>\w\S+)?                 # section name
 223     [^\S\n\r]*                               # 0 or more whitespace characters
 224     $
 225     ''',
 226     re.VERBOSE)
 227
 228 # Program matching symbol (function, constant, struct and enum) identifiers.
 229 #
 230 # Results in 3 symbolic groups:
 231 #   - group 1 = symbol_name
 232 #   - group 2 = colon
 233 #   - group 3 = annotations
 234 SYMBOL_RE = re.compile(r'''
 235     ^                                        # start
 236     [^\S\n\r]*                               # 0 or more whitespace characters
 237     (?P<symbol_name>[\w-]*\w)                # symbol name
 238     [^\S\n\r]*                               # 0 or more whitespace characters
 239     (?P<colon>:?)                            # colon
 240     [^\S\n\r]*                               # 0 or more whitespace characters
 241     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 242     [^\S\n\r]*                               # 0 or more whitespace characters
 243     $                                        # end
 244     ''',
 245     re.VERBOSE)
 246
 247 # Program matching property identifiers.
 248 #
 249 # Results in 4 symbolic groups:
 250 #   - group 1 = class_name
 251 #   - group 2 = property_name
 252 #   - group 3 = colon
 253 #   - group 4 = annotations
 254 PROPERTY_RE = re.compile(r'''
 255     ^                                        # start
 256     [^\S\n\r]*                               # 0 or more whitespace characters
 257     (?P<class_name>[\w]+)                    # class name
 258     [^\S\n\r]*                               # 0 or more whitespace characters
 259     :{1}                                     # required colon
 260     [^\S\n\r]*                               # 0 or more whitespace characters
 261     (?P<property_name>[\w-]*\w)              # property name
 262     [^\S\n\r]*                               # 0 or more whitespace characters
 263     (?P<colon>:?)                            # colon
 264     [^\S\n\r]*                               # 0 or more whitespace characters
 265     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 266     [^\S\n\r]*                               # 0 or more whitespace characters
 267     $                                        # end
 268     ''',
 269     re.VERBOSE)
 270
 271 # Program matching signal identifiers.
 272 #
 273 # Results in 4 symbolic groups:
 274 #   - group 1 = class_name
 275 #   - group 2 = signal_name
 276 #   - group 3 = colon
 277 #   - group 4 = annotations
 278 SIGNAL_RE = re.compile(r'''
 279     ^                                        # start
 280     [^\S\n\r]*                               # 0 or more whitespace characters
 281     (?P<class_name>[\w]+)                    # class name
 282     [^\S\n\r]*                               # 0 or more whitespace characters
 283     :{2}                                     # 2 required colons
 284     [^\S\n\r]*                               # 0 or more whitespace characters
 285     (?P<signal_name>[\w-]*\w)                # signal name
 286     [^\S\n\r]*                               # 0 or more whitespace characters
 287     (?P<colon>:?)                            # colon
 288     [^\S\n\r]*                               # 0 or more whitespace characters
 289     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 290     [^\S\n\r]*                               # 0 or more whitespace characters
 291     $                                        # end
 292     ''',
 293     re.VERBOSE)
 294
 295 # Program matching parameters.
 296 #
 297 # Results in 4 symbolic groups:
 298 #   - group 1 = parameter_name
 299 #   - group 2 = annotations
 300 #   - group 3 = colon
 301 #   - group 4 = description
 302 PARAMETER_RE = re.compile(r'''
 303     ^                                        # start
 304     [^\S\n\r]*                               # 0 or more whitespace characters
 305     @                                        # @ character
 306     (?P<parameter_name>[\w-]*\w|\.\.\.)      # parameter name
 307     [^\S\n\r]*                               # 0 or more whitespace characters
 308     :{1}                                     # required colon
 309     [^\S\n\r]*                               # 0 or more whitespace characters
 310     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 311     (?P<colon>:?)                            # colon
 312     [^\S\n\r]*                               # 0 or more whitespace characters
 313     (?P<description>.*?)                     # description
 314     [^\S\n\r]*                               # 0 or more whitespace characters
 315     $                                        # end
 316     ''',
 317     re.VERBOSE)
 318
 319 # Program matching tags.
 320 #
 321 # Results in 4 symbolic groups:
 322 #   - group 1 = tag_name
 323 #   - group 2 = annotations
 324 #   - group 3 = colon
 325 #   - group 4 = description
 326 _all_tags = '|'.join(_ALL_TAGS).replace(' ', '\\ ')
 327 TAG_RE = re.compile(r'''
 328     ^                                        # start
 329     [^\S\n\r]*                               # 0 or more whitespace characters
 330     (?P<tag_name>''' + _all_tags + r''')     # tag name
 331     [^\S\n\r]*                               # 0 or more whitespace characters
 332     :{1}                                     # required colon
 333     [^\S\n\r]*                               # 0 or more whitespace characters
 334     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 335     (?P<colon>:?)                            # colon
 336     [^\S\n\r]*                               # 0 or more whitespace characters
 337     (?P<description>.*?)                     # description
 338     [^\S\n\r]*                               # 0 or more whitespace characters
 339     $                                        # end
 340     ''',
 341     re.VERBOSE | re.IGNORECASE)
 342
 343 # Program matching multiline annotation continuations.
 344 # This is used on multiline parameters and tags (but not on the first line) to
 345 # generate warnings about invalid annotations spanning multiple lines.
 346 #
 347 # Results in 3 symbolic groups:
 348 #   - group 2 = annotations
 349 #   - group 3 = colon
 350 #   - group 4 = description
 351 MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile(r'''
 352     ^                                        # start
 353     [^\S\n\r]*                               # 0 or more whitespace characters
 354     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 355     (?P<colon>:)                             # colon
 356     [^\S\n\r]*                               # 0 or more whitespace characters
 357     (?P<description>.*?)                     # description
 358     [^\S\n\r]*                               # 0 or more whitespace characters
 359     $                                        # end
 360     ''',
 361     re.VERBOSE)
 362
 363
 364 class DocBlock(object):
 365
 366     def __init__(self, name):
 367         self.name = name
 368         self.options = DocOptions()
 369         self.value = None
 370         self.tags = odict()
 371         self.comment = None
 372         self.params = odict()
 373         self.position = None
 374
 375     def __cmp__(self, other):
 376         return cmp(self.name, other.name)
 377
 378     def __repr__(self):
 379         return '<DocBlock %r %r>' % (self.name, self.options)
 380
 381     def get_tag(self, name):
 382         return self.tags.get(name)
 383
 384     def get_param(self, name):
 385         return self.params.get(name)
 386
 387     def to_gtk_doc(self):
 388         options = ''
 389         if self.options:
 390             options += ' '
 391             options += ' '.join('(%s)' % o for o in self.options)
 392         lines = [self.name]
 393         if 'SECTION' not in self.name:
 394             lines[0] += ':'
 395         lines[0] += options
 396         for param in self.params.values():
 397             lines.append(param.to_gtk_doc_param())
 398         if self.comment:
 399             lines.append('')
 400             for l in self.comment.split('\n'):
 401                 lines.append(l)
 402         if self.tags:
 403             lines.append('')
 404             for tag in self.tags.values():
 405                 lines.append(tag.to_gtk_doc_tag())
 406
 407         comment = ''
 408         comment += '/**\n'
 409         for line in lines:
 410             line = line.rstrip()
 411             if line:
 412                 comment += ' * %s\n' % (line, )
 413             else:
 414                 comment += ' *\n'
 415         comment += ' */\n'
 416         return comment
 417
 418     def validate(self):
 419         for param in self.params.values():
 420             param.validate()
 421
 422         for tag in self.tags.values():
 423             tag.validate()
 424
 425
 426 class DocTag(object):
 427
 428     def __init__(self, block, name):
 429         self.block = block
 430         self.name = name
 431         self.options = DocOptions()
 432         self.comment = None
 433         self.value = ''
 434         self.position = None
 435
 436     def __repr__(self):
 437         return '<DocTag %r %r>' % (self.name, self.options)
 438
 439     def _validate_option(self, name, value, required=False,
 440                          n_params=None, choices=None):
 441         if required and value is None:
 442             message.warn('%s annotation needs a value' % (
 443                 name, ), self.position)
 444             return
 445
 446         if n_params is not None:
 447             if n_params == 0:
 448                 s = 'no value'
 449             elif n_params == 1:
 450                 s = 'one value'
 451             else:
 452                 s = '%d values' % (n_params, )
 453             if ((n_params > 0 and (value is None or value.length() != n_params)) or
 454                 n_params == 0 and value is not None):
 455                 if value is None:
 456                     length = 0
 457                 else:
 458                     length = value.length()
 459                 message.warn('%s annotation needs %s, not %d' % (
 460                     name, s, length), self.position)
 461                 return
 462
 463         if choices is not None:
 464             valuestr = value.one()
 465             if valuestr not in choices:
 466                 message.warn('invalid %s annotation value: %r' % (
 467                     name, valuestr, ), self.position)
 468                 return
 469
 470     def _validate_array(self, option, value):
 471         if value is None:
 472             return
 473
 474         for name, v in value.all().items():
 475             if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
 476                 try:
 477                     int(v)
 478                 except (TypeError, ValueError):
 479                     if v is None:
 480                         message.warn(
 481                             'array option %s needs a value' % (
 482                             name, ),
 483                             positions=self.position)
 484                     else:
 485                         message.warn(
 486                             'invalid array %s option value %r, '
 487                             'must be an integer' % (name, v, ),
 488                             positions=self.position)
 489             elif name == OPT_ARRAY_LENGTH:
 490                 if v is None:
 491                     message.warn(
 492                         'array option length needs a value',
 493                         positions=self.position)
 494             else:
 495                 message.warn(
 496                     'invalid array annotation value: %r' % (
 497                     name, ), self.position)
 498
 499     def _validate_closure(self, option, value):
 500         if value is not None and value.length() > 1:
 501             message.warn(
 502                 'closure takes at most 1 value, %d given' % (
 503                 value.length()), self.position)
 504
 505     def _validate_element_type(self, option, value):
 506         self._validate_option(option, value, required=True)
 507         if value is None:
 508             message.warn(
 509                 'element-type takes at least one value, none given',
 510                 self.position)
 511             return
 512         if value.length() > 2:
 513             message.warn(
 514                 'element-type takes at most 2 values, %d given' % (
 515                 value.length()), self.position)
 516             return
 517
 518     def _validate_out(self, option, value):
 519         if value is None:
 520             return
 521         if value.length() > 1:
 522             message.warn(
 523                 'out annotation takes at most 1 value, %d given' % (
 524                 value.length()), self.position)
 525             return
 526         value_str = value.one()
 527         if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
 528                              OPT_OUT_CALLER_ALLOCATES]:
 529             message.warn("out annotation value is invalid: %r" % (
 530                 value_str), self.position)
 531             return
 532
 533     def _get_gtk_doc_value(self):
 534         def serialize_one(option, value, fmt, fmt2):
 535             if value:
 536                 if type(value) != str:
 537                     value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
 538                                       for k, v in value.all().items()))
 539                 return fmt % (option, value)
 540             else:
 541                 return fmt2 % (option, )
 542         annotations = []
 543         for option, value in self.options.items():
 544             annotations.append(
 545                 serialize_one(option, value, '(%s %s)', '(%s)'))
 546         if annotations:
 547             return ' '.join(annotations) + ': '
 548         else:
 549             return self.value
 550
 551     def to_gtk_doc_param(self):
 552         return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
 553
 554     def to_gtk_doc_tag(self):
 555         return '%s: %s%s' % (self.name.capitalize(),
 556                              self._get_gtk_doc_value(),
 557                              self.comment or '')
 558
 559     def validate(self):
 560         if self.name == TAG_ATTRIBUTES:
 561             # The 'Attributes:' tag allows free form annotations so the
 562             # validation below is most certainly going to fail.
 563             return
 564
 565         for option, value in self.options.items():
 566             if option == OPT_ALLOW_NONE:
 567                 self._validate_option(option, value, n_params=0)
 568             elif option == OPT_ARRAY:
 569                 self._validate_array(option, value)
 570             elif option == OPT_ATTRIBUTE:
 571                 self._validate_option(option, value, n_params=2)
 572             elif option == OPT_CLOSURE:
 573                 self._validate_closure(option, value)
 574             elif option == OPT_DESTROY:
 575                 self._validate_option(option, value, n_params=1)
 576             elif option == OPT_ELEMENT_TYPE:
 577                 self._validate_element_type(option, value)
 578             elif option == OPT_FOREIGN:
 579                 self._validate_option(option, value, n_params=0)
 580             elif option == OPT_IN:
 581                 self._validate_option(option, value, n_params=0)
 582             elif option in [OPT_INOUT, OPT_INOUT_ALT]:
 583                 self._validate_option(option, value, n_params=0)
 584             elif option == OPT_OUT:
 585                 self._validate_out(option, value)
 586             elif option == OPT_SCOPE:
 587                 self._validate_option(
 588                     option, value, required=True,
 589                     n_params=1,
 590                     choices=[OPT_SCOPE_ASYNC,
 591                              OPT_SCOPE_CALL,
 592                              OPT_SCOPE_NOTIFIED])
 593             elif option == OPT_SKIP:
 594                 self._validate_option(option, value, n_params=0)
 595             elif option == OPT_TRANSFER:
 596                 self._validate_option(
 597                     option, value, required=True,
 598                     n_params=1,
 599                     choices=[OPT_TRANSFER_FULL,
 600                              OPT_TRANSFER_CONTAINER,
 601                              OPT_TRANSFER_NONE,
 602                              OPT_TRANSFER_FLOATING])
 603             elif option == OPT_TYPE:
 604                 self._validate_option(option, value, required=True,
 605                                       n_params=1)
 606             elif option == OPT_CONSTRUCTOR:
 607                 self._validate_option(option, value, n_params=0)
 608             elif option == OPT_METHOD:
 609                 self._validate_option(option, value, n_params=0)
 610             else:
 611                 message.warn('invalid annotation option: %s' % (option, ),
 612                              self.position)
 613
 614
 615 class DocOptions(object):
 616     def __init__(self):
 617         self.values = []
 618         self.position = None
 619
 620     def __repr__(self):
 621         return '<DocOptions %r>' % (self.values, )
 622
 623     def __getitem__(self, item):
 624         for key, value in self.values:
 625             if key == item:
 626                 return value
 627         raise KeyError
 628
 629     def __nonzero__(self):
 630         return bool(self.values)
 631
 632     def __iter__(self):
 633         return (k for k, v in self.values)
 634
 635     def add(self, name, value):
 636         self.values.append((name, value))
 637
 638     def get(self, item, default=None):
 639         for key, value in self.values:
 640             if key == item:
 641                 return value
 642         return default
 643
 644     def getall(self, item):
 645         for key, value in self.values:
 646             if key == item:
 647                 yield value
 648
 649     def items(self):
 650         return iter(self.values)
 651
 652
 653 class DocOption(object):
 654
 655     def __init__(self, tag, option):
 656         self.tag = tag
 657         self._array = []
 658         self._dict = odict()
 659         # (annotation option1=value1 option2=value2) etc
 660         for p in option.split(' '):
 661             if '=' in p:
 662                 name, value = p.split('=', 1)
 663             else:
 664                 name = p
 665                 value = None
 666             self._dict[name] = value
 667             if value is None:
 668                 self._array.append(name)
 669             else:
 670                 self._array.append((name, value))
 671
 672     def __repr__(self):
 673         return '<DocOption %r>' % (self._array, )
 674
 675     def length(self):
 676         return len(self._array)
 677
 678     def one(self):
 679         assert len(self._array) == 1
 680         return self._array[0]
 681
 682     def flat(self):
 683         return self._array
 684
 685     def all(self):
 686         return self._dict
 687
 688
 689 class AnnotationParser(object):
 690     """
 691     GTK-Doc comment block parser.
 692
 693     Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
 694     :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
 695     parser tries to accept malformed input whenever possible and does not emit
 696     syntax errors. However, it does emit warnings at the slightest indication
 697     of malformed input when possible. It is usually a good idea to heed these
 698     warnings as malformed input is known to result in invalid GTK-Doc output.
 699
 700     A GTK-Doc comment block can be constructed out of multiple parts that can
 701     be combined to write different types of documentation.
 702     See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
 703     Each part can be further divided into fields which are separated by `:` characters.
 704
 705     Possible parts and the fields they are constructed from look like the
 706     following (optional fields are enclosed in square brackets):
 707
 708     .. code-block:: c
 709         /**
 710          * identifier_name [:annotations]
 711          * @parameter_name [:annotations] [:description]
 712          *
 713          * comment_block_description
 714          * tag_name [:annotations] [:description]
 715          */
 716
 717     The order in which the different parts have to be specified is important::
 718
 719         - There has to be exactly 1 `identifier` part on the first line of the
 720           comment block which consists of:
 721               * an `identifier_name` field
 722               * an optional `annotations` field
 723         - Followed by 0 or more `parameters` parts, each consisting of:
 724               * a `parameter_name` field
 725               * an optional `annotations` field
 726               * an optional `description` field
 727         - Followed by at least 1 empty line signaling the beginning of
 728           the `comment_block_description` part
 729         - Followed by an optional `comment block description` part.
 730         - Followed by 0 or more `tag` parts, each consisting of:
 731               * a `tag_name` field
 732               * an optional `annotations` field
 733               * an optional `description` field
 734
 735     Additionally, the following restrictions are in effect::
 736
 737         - Parts can optionally be separated by an empty line, except between
 738           the `parameter` parts and the `comment block description` part where
 739           an empty line is required (see above).
 740         - Parts and fields cannot span multiple lines, except for
 741           `parameter descriptions`, `tag descriptions` and the
 742           `comment_block_description` fields.
 743         - `parameter descriptions` fields can not span multiple paragraphs.
 744         - `tag descriptions` and `comment block description` fields can
 745           span multiple paragraphs.
 746
 747     .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
 748         `ScanSourceFile()`_ function and is currently in sync with GTK-Doc
 749         commit `47abcd5`_.
 750
 751     .. _GTK-Doc's documentation:
 752             http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
 753     .. _ScanSourceFile():
 754             http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 755     .. _47abcd5: 47abcd53b8489ebceec9e394676512a181c1f1f6
 756     """
 757
 758     def parse(self, comments):
 759         """
 760         Parses multiple GTK-Doc comment blocks.
 761
 762         :param comments: a list of (comment, filename, lineno) tuples
 763         :returns: a dictionary mapping identifier names to :class:`DocBlock` objects
 764         """
 765
 766         comment_blocks = {}
 767
 768         for comment in comments:
 769             try:
 770                 comment_block = self.parse_comment_block(comment)
 771             except Exception:
 772                 message.warn('unrecoverable parse error, please file a GObject-Introspection '
 773                              'bug report including the complete comment block at the '
 774                              'indicated location.', message.Position(comment[1], comment[2]))
 775                 continue
 776
 777             if comment_block is not None:
 778                 # Note: previous versions of this parser did not check
 779                 # if an identifier was already stored in comment_blocks,
 780                 # so when multiple comment blocks where encountered documenting
 781                 # the same identifier the last one seen "wins".
 782                 # Keep this behavior for backwards compatibility, but
 783                 # emit a warning.
 784                 if comment_block.name in comment_blocks:
 785                     message.warn("multiple comment blocks documenting '%s:' identifier." %
 786                                  (comment_block.name),
 787                                  comment_block.position)
 788
 789                 comment_blocks[comment_block.name] = comment_block
 790
 791         return comment_blocks
 792
 793     def parse_comment_block(self, comment):
 794         """
 795         Parses a single GTK-Doc comment block.
 796
 797         :param comment: a (comment, filename, lineno) tuple
 798         :returns: a :class:`DocBlock` object or ``None``
 799         """
 800
 801         comment, filename, lineno = comment
 802
 803         # Assign line numbers to each line of the comment block,
 804         # which will later be used as the offset to calculate the
 805         # real line number in the source file
 806         comment_lines = list(enumerate(comment.split('\n')))
 807
 808         # Check for the start the comment block.
 809         if COMMENT_START_RE.match(comment_lines[0][1]):
 810             del comment_lines[0]
 811         else:
 812             # Not a GTK-Doc comment block.
 813             return None
 814
 815         # Check for the end the comment block.
 816         line_offset, line = comment_lines[-1]
 817         result = COMMENT_END_RE.match(line)
 818         if result:
 819             description = result.group('description')
 820             if description:
 821                 comment_lines[-1] = (line_offset, description)
 822                 position = message.Position(filename, lineno + line_offset)
 823                 marker = ' '*result.end('description') + '^'
 824                 message.warn("Comments should end with */ on a new line:\n%s\n%s" %
 825                              (line, marker),
 826                              position)
 827             else:
 828                 del comment_lines[-1]
 829         else:
 830             # Not a GTK-Doc comment block.
 831             return None
 832
 833         # If we get this far, we are inside a GTK-Doc comment block.
 834         return self._parse_comment_block(comment_lines, filename, lineno)
 835
 836     def _parse_comment_block(self, comment_lines, filename, lineno):
 837         """
 838         Parses a single GTK-Doc comment block already stripped from its
 839         comment start (/**) and comment end (*/) marker lines.
 840
 841         :param comment_lines: list of (line_offset, line) tuples representing a
 842                               GTK-Doc comment block already stripped from it's
 843                               start (/**) and end (*/) marker lines
 844         :param filename: source file name where the comment block originated from
 845         :param lineno:  line in the source file where the comment block starts
 846         :returns: a :class:`DocBlock` object or ``None``
 847
 848         .. NOTE:: If you are tempted to refactor this method and split it
 849             further up (for example into _parse_identifier(), _parse_parameters(),
 850             _parse_description(), _parse_tags() methods) then please resist the
 851             urge. It is considered important that this method should be more or
 852             less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
 853
 854             The different parsing steps are marked with a comment surrounded
 855             by `#` characters in an attempt to make it clear what is going on.
 856
 857         .. _ScanSourceFile():
 858                 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 859         """
 860         comment_block = None
 861         part_indent = None
 862         line_indent = None
 863         in_part = None
 864         identifier = None
 865         current_param = None
 866         current_tag = None
 867         returns_seen = False
 868
 869         for line_offset, line in comment_lines:
 870             position = message.Position(filename, line_offset + lineno)
 871
 872             # Store the original line (without \n) and column offset
 873             # so we can generate meaningful warnings later on.
 874             original_line = line
 875             column_offset = 0
 876
 877             # Get rid of ' * ' at start of the line.
 878             result = COMMENT_ASTERISK_RE.match(line)
 879             if result:
 880                 column_offset = result.end(0)
 881                 line = line[result.end(0):]
 882
 883             # Store indentation level of the line.
 884             result = COMMENT_INDENTATION_RE.match(line)
 885             line_indent = len(result.group('indentation').replace('\t', '  '))
 886
 887             ####################################################################
 888             # Check for GTK-Doc comment block identifier.
 889             ####################################################################
 890             if not comment_block:
 891                 if not identifier:
 892                     result = SECTION_RE.match(line)
 893                     if result:
 894                         identifier = IDENTIFIER_SECTION
 895                         identifier_name = 'SECTION:%s' % (result.group('section_name'))
 896                         column = result.start('section_name') + column_offset
 897
 898                 if not identifier:
 899                     result = SYMBOL_RE.match(line)
 900                     if result:
 901                         identifier = IDENTIFIER_SYMBOL
 902                         identifier_name = '%s' % (result.group('symbol_name'))
 903                         column = result.start('symbol_name') + column_offset
 904
 905                 if not identifier:
 906                     result = PROPERTY_RE.match(line)
 907                     if result:
 908                         identifier = IDENTIFIER_PROPERTY
 909                         identifier_name = '%s:%s' % (result.group('class_name'),
 910                                                      result.group('property_name'))
 911                         column = result.start('property_name') + column_offset
 912
 913                 if not identifier:
 914                     result = SIGNAL_RE.match(line)
 915                     if result:
 916                         identifier = IDENTIFIER_SIGNAL
 917                         identifier_name = '%s::%s' % (result.group('class_name'),
 918                                                       result.group('signal_name'))
 919                         column = result.start('signal_name') + column_offset
 920
 921                 if identifier:
 922                     in_part = PART_IDENTIFIER
 923                     part_indent = line_indent
 924
 925                     comment_block = DocBlock(identifier_name)
 926                     comment_block.position = position
 927
 928                     if 'colon' in result.groupdict() and result.group('colon') != ':':
 929                         colon_start = result.start('colon')
 930                         colon_column = column_offset + colon_start
 931                         marker = ' '*colon_column + '^'
 932                         message.warn("missing ':' at column %s:\n%s\n%s" %
 933                                      (colon_column + 1, original_line, marker),
 934                                      position)
 935
 936                     if 'annotations' in result.groupdict():
 937                         comment_block.options = self.parse_options(comment_block,
 938                                                                    result.group('annotations'))
 939
 940                     continue
 941                 else:
 942                     # If we get here, the identifier was not recognized, so
 943                     # ignore the rest of the block just like the old annotation
 944                     # parser did. Doing this is a bit more strict than
 945                     # gtkdoc-mkdb (which continues to search for the identifier
 946                     # until either it is found or the end of the block is
 947                     # reached). In practice, however, ignoring the block is the
 948                     # right thing to do because sooner or later some long
 949                     # descriptions will contain something matching an identifier
 950                     # pattern by accident.
 951                     marker = ' '*column_offset + '^'
 952                     message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
 953                                  'found:\n%s\n%s' % (original_line, marker),
 954                                  position)
 955
 956                     return None
 957
 958             ####################################################################
 959             # Check for comment block parameters.
 960             ####################################################################
 961             result = PARAMETER_RE.match(line)
 962             if result:
 963                 param_name = result.group('parameter_name')
 964                 param_annotations = result.group('annotations')
 965                 param_description = result.group('description')
 966
 967                 if in_part == PART_IDENTIFIER:
 968                     in_part = PART_PARAMETERS
 969
 970                 part_indent = line_indent
 971
 972                 if in_part != PART_PARAMETERS:
 973                     column = result.start('parameter_name') + column_offset
 974                     marker = ' '*column + '^'
 975                     message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
 976                                  (param_name, original_line, marker),
 977                                  position)
 978
 979                 # Old style GTK-Doc allowed return values to be specified as
 980                 # parameters instead of tags.
 981                 if param_name.lower() == TAG_RETURNS:
 982                     param_name = TAG_RETURNS
 983
 984                     if not returns_seen:
 985                         returns_seen = True
 986                     else:
 987                         message.warn("encountered multiple 'Returns' parameters or tags for "
 988                                      "'%s'." % (comment_block.name),
 989                                      position)
 990                 elif param_name in comment_block.params.keys():
 991                     column = result.start('parameter_name') + column_offset
 992                     marker = ' '*column + '^'
 993                     message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
 994                                  (param_name, comment_block.name, original_line, marker),
 995                                  position)
 996
 997                 tag = DocTag(comment_block, param_name)
 998                 tag.position = position
 999                 tag.comment = param_description
1000                 if param_annotations:
1001                     tag.options = self.parse_options(tag, param_annotations)
1002                 if param_name == TAG_RETURNS:
1003                     comment_block.tags[param_name] = tag
1004                 else:
1005                     comment_block.params[param_name] = tag
1006                 current_param = tag
1007                 continue
1008
1009             ####################################################################
1010             # Check for comment block description.
1011             #
1012             # When we are parsing comment block parameters or the comment block
1013             # identifier (when there are no parameters) and encounter an empty
1014             # line, we must be parsing the comment block description.
1015             ####################################################################
1016             if (EMPTY_LINE_RE.match(line)
1017             and in_part in [PART_IDENTIFIER, PART_PARAMETERS]):
1018                 in_part = PART_DESCRIPTION
1019                 part_indent = line_indent
1020                 continue
1021
1022             ####################################################################
1023             # Check for GTK-Doc comment block tags.
1024             ####################################################################
1025             result = TAG_RE.match(line)
1026             if result and line_indent <= part_indent:
1027                 tag_name = result.group('tag_name')
1028                 tag_annotations = result.group('annotations')
1029                 tag_description = result.group('description')
1030
1031                 marker = ' '*(result.start('tag_name') + column_offset) + '^'
1032
1033                 # Deprecated GTK-Doc Description: tag
1034                 if tag_name.lower() == TAG_DESCRIPTION:
1035                     message.warn("GTK-Doc tag \"Description:\" has been deprecated:\n%s\n%s" %
1036                                  (original_line, marker),
1037                                  position)
1038
1039                     in_part = PART_DESCRIPTION
1040                     part_indent = line_indent
1041
1042                     if not comment_block.comment:
1043                         comment_block.comment = tag_description
1044                     else:
1045                         comment_block.comment += '\n' + tag_description
1046                     continue
1047
1048                 # Now that the deprecated stuff is out of the way, continue parsing real tags
1049                 if in_part == PART_DESCRIPTION:
1050                     in_part = PART_TAGS
1051
1052                 part_indent = line_indent
1053
1054                 if in_part != PART_TAGS:
1055                     column = result.start('tag_name') + column_offset
1056                     marker = ' '*column + '^'
1057                     message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
1058                                  (tag_name, original_line, marker),
1059                                  position)
1060
1061                 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1062                     if not returns_seen:
1063                         returns_seen = True
1064                     else:
1065                         message.warn("encountered multiple 'Returns' parameters or tags for "
1066                                      "'%s'." % (comment_block.name),
1067                                      position)
1068
1069                     tag = DocTag(comment_block, TAG_RETURNS)
1070                     tag.position = position
1071                     tag.comment = tag_description
1072                     if tag_annotations:
1073                         tag.options = self.parse_options(tag, tag_annotations)
1074                     comment_block.tags[TAG_RETURNS] = tag
1075                     current_tag = tag
1076                     continue
1077                 else:
1078                     if tag_name.lower() in comment_block.tags.keys():
1079                         column = result.start('tag_name') + column_offset
1080                         marker = ' '*column + '^'
1081                         message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
1082                                      (tag_name, comment_block.name, original_line, marker),
1083                                      position)
1084
1085                     tag = DocTag(comment_block, tag_name.lower())
1086                     tag.position = position
1087                     tag.value = tag_description
1088                     if tag_annotations:
1089                         if tag_name.lower() == TAG_ATTRIBUTES:
1090                             tag.options = self.parse_options(tag, tag_annotations)
1091                         else:
1092                             message.warn("annotations not supported for tag '%s:'." %
1093                                          (tag_name),
1094                                          position)
1095                     comment_block.tags[tag_name.lower()] = tag
1096                     current_tag = tag
1097                     continue
1098
1099             ####################################################################
1100             # If we get here, we must be in the middle of a multiline
1101             # comment block, parameter or tag description.
1102             ####################################################################
1103             if in_part in [PART_IDENTIFIER, PART_DESCRIPTION]:
1104                 if not comment_block.comment:
1105                     comment_block.comment = line
1106                 else:
1107                     comment_block.comment += '\n' + line
1108                 continue
1109             elif in_part == PART_PARAMETERS:
1110                 self._validate_multiline_annotation_continuation(line, original_line,
1111                                                                  column_offset, position)
1112                 # Append to parameter description.
1113                 current_param.comment += ' ' + line.strip()
1114                 continue
1115             elif in_part == PART_TAGS:
1116                 self._validate_multiline_annotation_continuation(line, original_line,
1117                                                                  column_offset, position)
1118                 # Append to tag description.
1119                 if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1120                     current_tag.comment += ' ' + line.strip()
1121                 else:
1122                     current_tag.value += ' ' + line.strip()
1123                 continue
1124
1125         ########################################################################
1126         # Finished parsing this comment block.
1127         ########################################################################
1128         if comment_block:
1129             # We have picked up a couple of \n characters that where not
1130             # intended. Strip those.
1131             if comment_block.comment:
1132                 comment_block.comment = comment_block.comment.strip()
1133
1134             for tag in comment_block.tags.values():
1135                 self._clean_comment_block_part(tag)
1136
1137             for param in comment_block.params.values():
1138                 self._clean_comment_block_part(param)
1139
1140             # Validate and store block.
1141             comment_block.validate()
1142             return comment_block
1143         else:
1144             return None
1145
1146     def _clean_comment_block_part(self, part):
1147         if part.comment:
1148             part.comment = part.comment.strip()
1149         else:
1150             part.comment = None
1151
1152         if part.value:
1153             part.value = part.value.strip()
1154         else:
1155             part.value = ''
1156
1157     def _validate_multiline_annotation_continuation(self, line, original_line,
1158                                                           column_offset, position):
1159         '''
1160         Validate parameters and tags (except the first line) and generate
1161         warnings about invalid annotations spanning multiple lines.
1162
1163         :param line: line to validate, stripped from ' * ' at start of the line.
1164         :param original_line: original line to validate (used in warning messages)
1165         :param column_offset: column width of ' * ' at the time it was stripped from `line`
1166         :param position: position of `line` in the source file
1167         '''
1168
1169         result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line)
1170         if result:
1171             column = result.start('annotations') + column_offset
1172             marker = ' '*column + '^'
1173             message.warn('ignoring invalid multiline annotation continuation:\n'
1174                          '%s\n%s' % (original_line, marker),
1175                          position)
1176
1177     @classmethod
1178     def parse_options(cls, tag, value):
1179         # (annotation)
1180         # (annotation opt1 opt2 ...)
1181         # (annotation opt1=value1 opt2=value2 ...)
1182         opened = -1
1183         options = DocOptions()
1184         options.position = tag.position
1185
1186         for i, c in enumerate(value):
1187             if c == '(' and opened == -1:
1188                 opened = i+1
1189             if c == ')' and opened != -1:
1190                 segment = value[opened:i]
1191                 parts = segment.split(' ', 1)
1192                 if len(parts) == 2:
1193                     name, option = parts
1194                 elif len(parts) == 1:
1195                     name = parts[0]
1196                     option = None
1197                 else:
1198                     raise AssertionError
1199                 if option is not None:
1200                     option = DocOption(tag, option)
1201                 options.add(name, option)
1202                 opened = -1
1203
1204         return options