giscanner/annotationparser.py

   1 # -*- Mode: Python -*-
   2 # GObject-Introspection - a framework for introspecting GObject libraries
   3 # Copyright (C) 2008-2010 Johan Dahlin
   4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20 #
  21
  22
  23 # AnnotationParser - extract annotations from GTK-Doc comment blocks
  24
  25
  26 import re
  27
  28 from . import message
  29 from .odict import odict
  30
  31
  32 # GTK-Doc comment block parts
  33 PART_IDENTIFIER = 'identifier'
  34 PART_PARAMETERS = 'parameters'
  35 PART_DESCRIPTION = 'description'
  36 PART_TAGS = 'tags'
  37
  38 # Identifiers
  39 IDENTIFIER_SECTION = 'section'
  40 IDENTIFIER_SYMBOL = 'symbol'
  41 IDENTIFIER_PROPERTY = 'property'
  42 IDENTIFIER_SIGNAL = 'signal'
  43
  44 # Tags - annotations applied to comment blocks
  45 TAG_VFUNC = 'virtual'
  46 TAG_SINCE = 'since'
  47 TAG_STABILITY = 'stability'
  48 TAG_DEPRECATED = 'deprecated'
  49 TAG_RETURNS = 'returns'
  50 TAG_RETURNVALUE = 'return value'
  51 TAG_DESCRIPTION = 'description'
  52 TAG_ATTRIBUTES = 'attributes'
  53 TAG_RENAME_TO = 'rename to'
  54 TAG_TYPE = 'type'
  55 TAG_UNREF_FUNC = 'unref func'
  56 TAG_REF_FUNC = 'ref func'
  57 TAG_SET_VALUE_FUNC = 'set value func'
  58 TAG_GET_VALUE_FUNC = 'get value func'
  59 TAG_TRANSFER = 'transfer'
  60 TAG_VALUE = 'value'
  61 _ALL_TAGS = [TAG_VFUNC,
  62              TAG_SINCE,
  63              TAG_STABILITY,
  64              TAG_DEPRECATED,
  65              TAG_RETURNS,
  66              TAG_RETURNVALUE,
  67              TAG_DESCRIPTION,
  68              TAG_ATTRIBUTES,
  69              TAG_RENAME_TO,
  70              TAG_TYPE,
  71              TAG_UNREF_FUNC,
  72              TAG_REF_FUNC,
  73              TAG_SET_VALUE_FUNC,
  74              TAG_GET_VALUE_FUNC,
  75              TAG_TRANSFER,
  76              TAG_VALUE]
  77
  78 # Options - annotations for parameters and return values
  79 OPT_ALLOW_NONE = 'allow-none'
  80 OPT_ARRAY = 'array'
  81 OPT_ATTRIBUTE = 'attribute'
  82 OPT_CLOSURE = 'closure'
  83 OPT_DESTROY = 'destroy'
  84 OPT_ELEMENT_TYPE = 'element-type'
  85 OPT_FOREIGN = 'foreign'
  86 OPT_IN = 'in'
  87 OPT_INOUT = 'inout'
  88 OPT_INOUT_ALT = 'in-out'
  89 OPT_OUT = 'out'
  90 OPT_SCOPE = 'scope'
  91 OPT_TRANSFER = 'transfer'
  92 OPT_TYPE = 'type'
  93 OPT_SKIP = 'skip'
  94 OPT_CONSTRUCTOR = 'constructor'
  95 OPT_METHOD = 'method'
  96
  97 ALL_OPTIONS = [
  98     OPT_ALLOW_NONE,
  99     OPT_ARRAY,
 100     OPT_ATTRIBUTE,
 101     OPT_CLOSURE,
 102     OPT_DESTROY,
 103     OPT_ELEMENT_TYPE,
 104     OPT_FOREIGN,
 105     OPT_IN,
 106     OPT_INOUT,
 107     OPT_INOUT_ALT,
 108     OPT_OUT,
 109     OPT_SCOPE,
 110     OPT_TRANSFER,
 111     OPT_TYPE,
 112     OPT_SKIP,
 113     OPT_CONSTRUCTOR,
 114     OPT_METHOD]
 115
 116 # Array options - array specific annotations
 117 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
 118 OPT_ARRAY_LENGTH = 'length'
 119 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
 120
 121 # Out options
 122 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
 123 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
 124
 125 # Scope options
 126 OPT_SCOPE_ASYNC = 'async'
 127 OPT_SCOPE_CALL = 'call'
 128 OPT_SCOPE_NOTIFIED = 'notified'
 129
 130 # Transfer options
 131 OPT_TRANSFER_NONE = 'none'
 132 OPT_TRANSFER_CONTAINER = 'container'
 133 OPT_TRANSFER_FULL = 'full'
 134 OPT_TRANSFER_FLOATING = 'floating'
 135
 136
 137 #The following regular expression programs are built to:
 138 # - match (or substitute) a single comment block line at a time;
 139 # - support (but remains untested) LOCALE and UNICODE modes.
 140
 141 # Program matching the start of a comment block.
 142 #
 143 # Results in 0 symbolic groups.
 144 COMMENT_START_RE = re.compile(r'''
 145     ^                                        # start
 146     [^\S\n\r]*                               # 0 or more whitespace characters
 147     /                                        # 1 forward slash character
 148     \*{2}                                    # exactly 2 asterisk characters
 149     [^\S\n\r]*                               # 0 or more whitespace characters
 150     $                                        # end
 151     ''',
 152     re.VERBOSE)
 153
 154 # Program matching the end of a comment block.
 155 #
 156 # Results in 0 symbolic groups.
 157 COMMENT_END_RE = re.compile(r'''
 158     ^                                        # start
 159     [^\S\n\r]*                               # 0 or more whitespace characters
 160     \*+                                      # 1 or more asterisk characters
 161     /                                        # 1 forward slash character
 162     $                                        # end
 163     ''',
 164     re.VERBOSE)
 165
 166 # Program matching the ' * ' at the beginning of every
 167 # line inside a comment block.
 168 #
 169 # Results in 0 symbolic groups.
 170 COMMENT_ASTERISK_RE = re.compile(r'''
 171     ^                                        # start
 172     [^\S\n\r]*                               # 0 or more whitespace characters
 173     \*                                       # 1 asterisk character
 174     [^\S\n\r]?                               # 0 or 1 whitespace characters. Careful,
 175                                              # removing more than 1 whitespace
 176                                              # character would break embedded
 177                                              # example program indentation
 178     ''',
 179     re.VERBOSE)
 180
 181 # Program matching the indentation at the beginning of every
 182 # line (stripped from the ' * ') inside a comment block.
 183 #
 184 # Results in 1 symbolic group:
 185 #   - group 1 = indentation
 186 COMMENT_INDENTATION_RE = re.compile(r'''
 187     ^
 188     (?P<indentation>[^\S\n\r]*)              # 0 or more whitespace characters
 189     .*
 190     $
 191     ''',
 192     re.VERBOSE)
 193
 194 # Program matching an empty line.
 195 #
 196 # Results in 0 symbolic groups.
 197 EMPTY_LINE_RE = re.compile(r'''
 198     ^                                        # start
 199     [^\S\n\r]*                               # 0 or more whitespace characters
 200     $                                        # end
 201     ''',
 202     re.VERBOSE)
 203
 204 # Program matching SECTION identifiers.
 205 #
 206 # Results in 2 symbolic groups:
 207 #   - group 1 = colon
 208 #   - group 2 = section_name
 209 SECTION_RE = re.compile(r'''
 210     ^                                        # start
 211     [^\S\n\r]*                               # 0 or more whitespace characters
 212     SECTION                                  # SECTION
 213     [^\S\n\r]*                               # 0 or more whitespace characters
 214     (?P<colon>:?)                            # colon
 215     [^\S\n\r]*                               # 0 or more whitespace characters
 216     (?P<section_name>\w\S+)?                 # section name
 217     [^\S\n\r]*                               # 0 or more whitespace characters
 218     $
 219     ''',
 220     re.VERBOSE)
 221
 222 # Program matching symbol (function, constant, struct and enum) identifiers.
 223 #
 224 # Results in 3 symbolic groups:
 225 #   - group 1 = symbol_name
 226 #   - group 2 = colon
 227 #   - group 3 = annotations
 228 SYMBOL_RE = re.compile(r'''
 229     ^                                        # start
 230     [^\S\n\r]*                               # 0 or more whitespace characters
 231     (?P<symbol_name>[\w-]*\w)                # symbol name
 232     [^\S\n\r]*                               # 0 or more whitespace characters
 233     (?P<colon>:?)                            # colon
 234     [^\S\n\r]*                               # 0 or more whitespace characters
 235     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 236     [^\S\n\r]*                               # 0 or more whitespace characters
 237     $                                        # end
 238     ''',
 239     re.VERBOSE)
 240
 241 # Program matching property identifiers.
 242 #
 243 # Results in 4 symbolic groups:
 244 #   - group 1 = class_name
 245 #   - group 2 = property_name
 246 #   - group 3 = colon
 247 #   - group 4 = annotations
 248 PROPERTY_RE = re.compile(r'''
 249     ^                                        # start
 250     [^\S\n\r]*                               # 0 or more whitespace characters
 251     (?P<class_name>[\w]+)                    # class name
 252     [^\S\n\r]*                               # 0 or more whitespace characters
 253     :{1}                                     # required colon
 254     [^\S\n\r]*                               # 0 or more whitespace characters
 255     (?P<property_name>[\w-]*\w)              # property name
 256     [^\S\n\r]*                               # 0 or more whitespace characters
 257     (?P<colon>:?)                            # colon
 258     [^\S\n\r]*                               # 0 or more whitespace characters
 259     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 260     [^\S\n\r]*                               # 0 or more whitespace characters
 261     $                                        # end
 262     ''',
 263     re.VERBOSE)
 264
 265 # Program matching signal identifiers.
 266 #
 267 # Results in 4 symbolic groups:
 268 #   - group 1 = class_name
 269 #   - group 2 = signal_name
 270 #   - group 3 = colon
 271 #   - group 4 = annotations
 272 SIGNAL_RE = re.compile(r'''
 273     ^                                        # start
 274     [^\S\n\r]*                               # 0 or more whitespace characters
 275     (?P<class_name>[\w]+)                    # class name
 276     [^\S\n\r]*                               # 0 or more whitespace characters
 277     :{2}                                     # 2 required colons
 278     [^\S\n\r]*                               # 0 or more whitespace characters
 279     (?P<signal_name>[\w-]*\w)                # signal name
 280     [^\S\n\r]*                               # 0 or more whitespace characters
 281     (?P<colon>:?)                            # colon
 282     [^\S\n\r]*                               # 0 or more whitespace characters
 283     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 284     [^\S\n\r]*                               # 0 or more whitespace characters
 285     $                                        # end
 286     ''',
 287     re.VERBOSE)
 288
 289 # Program matching parameters.
 290 #
 291 # Results in 4 symbolic groups:
 292 #   - group 1 = parameter_name
 293 #   - group 2 = annotations
 294 #   - group 3 = colon
 295 #   - group 4 = description
 296 PARAMETER_RE = re.compile(r'''
 297     ^                                        # start
 298     [^\S\n\r]*                               # 0 or more whitespace characters
 299     @                                        # @ character
 300     (?P<parameter_name>[\w-]*\w|\.\.\.)      # parameter name
 301     [^\S\n\r]*                               # 0 or more whitespace characters
 302     :{1}                                     # required colon
 303     [^\S\n\r]*                               # 0 or more whitespace characters
 304     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 305     (?P<colon>:?)                            # colon
 306     [^\S\n\r]*                               # 0 or more whitespace characters
 307     (?P<description>.*?)                     # description
 308     [^\S\n\r]*                               # 0 or more whitespace characters
 309     $                                        # end
 310     ''',
 311     re.VERBOSE)
 312
 313 # Program matching tags.
 314 #
 315 # Results in 4 symbolic groups:
 316 #   - group 1 = tag_name
 317 #   - group 2 = annotations
 318 #   - group 3 = colon
 319 #   - group 4 = description
 320 _all_tags = '|'.join(_ALL_TAGS).replace(' ', '\\ ')
 321 TAG_RE = re.compile(r'''
 322     ^                                        # start
 323     [^\S\n\r]*                               # 0 or more whitespace characters
 324     (?P<tag_name>''' + _all_tags + r''')     # tag name
 325     [^\S\n\r]*                               # 0 or more whitespace characters
 326     :{1}                                     # required colon
 327     [^\S\n\r]*                               # 0 or more whitespace characters
 328     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 329     (?P<colon>:?)                            # colon
 330     [^\S\n\r]*                               # 0 or more whitespace characters
 331     (?P<description>.*?)                     # description
 332     [^\S\n\r]*                               # 0 or more whitespace characters
 333     $                                        # end
 334     ''',
 335     re.VERBOSE | re.IGNORECASE)
 336
 337 # Program matching multiline annotation continuations.
 338 # This is used on multiline parameters and tags (but not on the first line) to
 339 # generate warnings about invalid annotations spanning multiple lines.
 340 #
 341 # Results in 3 symbolic groups:
 342 #   - group 2 = annotations
 343 #   - group 3 = colon
 344 #   - group 4 = description
 345 MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile(r'''
 346     ^                                        # start
 347     [^\S\n\r]*                               # 0 or more whitespace characters
 348     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 349     (?P<colon>:)                             # colon
 350     [^\S\n\r]*                               # 0 or more whitespace characters
 351     (?P<description>.*?)                     # description
 352     [^\S\n\r]*                               # 0 or more whitespace characters
 353     $                                        # end
 354     ''',
 355     re.VERBOSE)
 356
 357
 358 class DocBlock(object):
 359
 360     def __init__(self, name):
 361         self.name = name
 362         self.options = DocOptions()
 363         self.value = None
 364         self.tags = odict()
 365         self.comment = None
 366         self.params = odict()
 367         self.position = None
 368
 369     def __cmp__(self, other):
 370         return cmp(self.name, other.name)
 371
 372     def __repr__(self):
 373         return '<DocBlock %r %r>' % (self.name, self.options)
 374
 375     def get_tag(self, name):
 376         return self.tags.get(name)
 377
 378     def get_param(self, name):
 379         return self.params.get(name)
 380
 381     def to_gtk_doc(self):
 382         options = ''
 383         if self.options:
 384             options += ' '
 385             options += ' '.join('(%s)' % o for o in self.options)
 386         lines = [self.name]
 387         if 'SECTION' not in self.name:
 388             lines[0] += ':'
 389         lines[0] += options
 390         for param in self.params.values():
 391             lines.append(param.to_gtk_doc_param())
 392         if self.comment:
 393             lines.append('')
 394             for l in self.comment.split('\n'):
 395                 lines.append(l)
 396         if self.tags:
 397             lines.append('')
 398             for tag in self.tags.values():
 399                 lines.append(tag.to_gtk_doc_tag())
 400
 401         comment = ''
 402         comment += '/**\n'
 403         for line in lines:
 404             line = line.rstrip()
 405             if line:
 406                 comment += ' * %s\n' % (line, )
 407             else:
 408                 comment += ' *\n'
 409         comment += ' */\n'
 410         return comment
 411
 412     def validate(self):
 413         for param in self.params.values():
 414             param.validate()
 415
 416         for tag in self.tags.values():
 417             tag.validate()
 418
 419
 420 class DocTag(object):
 421
 422     def __init__(self, block, name):
 423         self.block = block
 424         self.name = name
 425         self.options = DocOptions()
 426         self.comment = None
 427         self.value = ''
 428         self.position = None
 429
 430     def __repr__(self):
 431         return '<DocTag %r %r>' % (self.name, self.options)
 432
 433     def _validate_option(self, name, value, required=False,
 434                          n_params=None, choices=None):
 435         if required and value is None:
 436             message.warn('%s annotation needs a value' % (
 437                 name, ), self.position)
 438             return
 439
 440         if n_params is not None:
 441             if n_params == 0:
 442                 s = 'no value'
 443             elif n_params == 1:
 444                 s = 'one value'
 445             else:
 446                 s = '%d values' % (n_params, )
 447             if ((n_params > 0 and (value is None or value.length() != n_params)) or
 448                 n_params == 0 and value is not None):
 449                 if value is None:
 450                     length = 0
 451                 else:
 452                     length = value.length()
 453                 message.warn('%s annotation needs %s, not %d' % (
 454                     name, s, length), self.position)
 455                 return
 456
 457         if choices is not None:
 458             valuestr = value.one()
 459             if valuestr not in choices:
 460                 message.warn('invalid %s annotation value: %r' % (
 461                     name, valuestr, ), self.position)
 462                 return
 463
 464     def _validate_array(self, option, value):
 465         if value is None:
 466             return
 467
 468         for name, v in value.all().items():
 469             if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
 470                 try:
 471                     int(v)
 472                 except (TypeError, ValueError):
 473                     if v is None:
 474                         message.warn(
 475                             'array option %s needs a value' % (
 476                             name, ),
 477                             positions=self.position)
 478                     else:
 479                         message.warn(
 480                             'invalid array %s option value %r, '
 481                             'must be an integer' % (name, v, ),
 482                             positions=self.position)
 483             elif name == OPT_ARRAY_LENGTH:
 484                 if v is None:
 485                     message.warn(
 486                         'array option length needs a value',
 487                         positions=self.position)
 488             else:
 489                 message.warn(
 490                     'invalid array annotation value: %r' % (
 491                     name, ), self.position)
 492
 493     def _validate_closure(self, option, value):
 494         if value is not None and value.length() > 1:
 495             message.warn(
 496                 'closure takes at most 1 value, %d given' % (
 497                 value.length()), self.position)
 498
 499     def _validate_element_type(self, option, value):
 500         self._validate_option(option, value, required=True)
 501         if value is None:
 502             message.warn(
 503                 'element-type takes at least one value, none given',
 504                 self.position)
 505             return
 506         if value.length() > 2:
 507             message.warn(
 508                 'element-type takes at most 2 values, %d given' % (
 509                 value.length()), self.position)
 510             return
 511
 512     def _validate_out(self, option, value):
 513         if value is None:
 514             return
 515         if value.length() > 1:
 516             message.warn(
 517                 'out annotation takes at most 1 value, %d given' % (
 518                 value.length()), self.position)
 519             return
 520         value_str = value.one()
 521         if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
 522                              OPT_OUT_CALLER_ALLOCATES]:
 523             message.warn("out annotation value is invalid: %r" % (
 524                 value_str), self.position)
 525             return
 526
 527     def _get_gtk_doc_value(self):
 528         def serialize_one(option, value, fmt, fmt2):
 529             if value:
 530                 if type(value) != str:
 531                     value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
 532                                       for k, v in value.all().items()))
 533                 return fmt % (option, value)
 534             else:
 535                 return fmt2 % (option, )
 536         annotations = []
 537         for option, value in self.options.items():
 538             annotations.append(
 539                 serialize_one(option, value, '(%s %s)', '(%s)'))
 540         if annotations:
 541             return ' '.join(annotations) + ': '
 542         else:
 543             return self.value
 544
 545     def to_gtk_doc_param(self):
 546         return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
 547
 548     def to_gtk_doc_tag(self):
 549         return '%s: %s%s' % (self.name.capitalize(),
 550                              self._get_gtk_doc_value(),
 551                              self.comment or '')
 552
 553     def validate(self):
 554         if self.name == TAG_ATTRIBUTES:
 555             # The 'Attributes:' tag allows free form annotations so the
 556             # validation below is most certainly going to fail.
 557             return
 558
 559         for option, value in self.options.items():
 560             if option == OPT_ALLOW_NONE:
 561                 self._validate_option(option, value, n_params=0)
 562             elif option == OPT_ARRAY:
 563                 self._validate_array(option, value)
 564             elif option == OPT_ATTRIBUTE:
 565                 self._validate_option(option, value, n_params=2)
 566             elif option == OPT_CLOSURE:
 567                 self._validate_closure(option, value)
 568             elif option == OPT_DESTROY:
 569                 self._validate_option(option, value, n_params=1)
 570             elif option == OPT_ELEMENT_TYPE:
 571                 self._validate_element_type(option, value)
 572             elif option == OPT_FOREIGN:
 573                 self._validate_option(option, value, n_params=0)
 574             elif option == OPT_IN:
 575                 self._validate_option(option, value, n_params=0)
 576             elif option in [OPT_INOUT, OPT_INOUT_ALT]:
 577                 self._validate_option(option, value, n_params=0)
 578             elif option == OPT_OUT:
 579                 self._validate_out(option, value)
 580             elif option == OPT_SCOPE:
 581                 self._validate_option(
 582                     option, value, required=True,
 583                     n_params=1,
 584                     choices=[OPT_SCOPE_ASYNC,
 585                              OPT_SCOPE_CALL,
 586                              OPT_SCOPE_NOTIFIED])
 587             elif option == OPT_SKIP:
 588                 self._validate_option(option, value, n_params=0)
 589             elif option == OPT_TRANSFER:
 590                 self._validate_option(
 591                     option, value, required=True,
 592                     n_params=1,
 593                     choices=[OPT_TRANSFER_FULL,
 594                              OPT_TRANSFER_CONTAINER,
 595                              OPT_TRANSFER_NONE,
 596                              OPT_TRANSFER_FLOATING])
 597             elif option == OPT_TYPE:
 598                 self._validate_option(option, value, required=True,
 599                                       n_params=1)
 600             elif option == OPT_CONSTRUCTOR:
 601                 self._validate_option(option, value, n_params=0)
 602             elif option == OPT_METHOD:
 603                 self._validate_option(option, value, n_params=0)
 604             else:
 605                 message.warn('invalid annotation option: %s' % (option, ),
 606                              self.position)
 607
 608
 609 class DocOptions(object):
 610     def __init__(self):
 611         self.values = []
 612         self.position = None
 613
 614     def __repr__(self):
 615         return '<DocOptions %r>' % (self.values, )
 616
 617     def __getitem__(self, item):
 618         for key, value in self.values:
 619             if key == item:
 620                 return value
 621         raise KeyError
 622
 623     def __nonzero__(self):
 624         return bool(self.values)
 625
 626     def __iter__(self):
 627         return (k for k, v in self.values)
 628
 629     def add(self, name, value):
 630         self.values.append((name, value))
 631
 632     def get(self, item, default=None):
 633         for key, value in self.values:
 634             if key == item:
 635                 return value
 636         return default
 637
 638     def getall(self, item):
 639         for key, value in self.values:
 640             if key == item:
 641                 yield value
 642
 643     def items(self):
 644         return iter(self.values)
 645
 646
 647 class DocOption(object):
 648
 649     def __init__(self, tag, option):
 650         self.tag = tag
 651         self._array = []
 652         self._dict = odict()
 653         # (annotation option1=value1 option2=value2) etc
 654         for p in option.split(' '):
 655             if '=' in p:
 656                 name, value = p.split('=', 1)
 657             else:
 658                 name = p
 659                 value = None
 660             self._dict[name] = value
 661             if value is None:
 662                 self._array.append(name)
 663             else:
 664                 self._array.append((name, value))
 665
 666     def __repr__(self):
 667         return '<DocOption %r>' % (self._array, )
 668
 669     def length(self):
 670         return len(self._array)
 671
 672     def one(self):
 673         assert len(self._array) == 1
 674         return self._array[0]
 675
 676     def flat(self):
 677         return self._array
 678
 679     def all(self):
 680         return self._dict
 681
 682
 683 class AnnotationParser(object):
 684     """
 685     GTK-Doc comment block parser.
 686
 687     Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
 688     :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
 689     parser tries to accept malformed input whenever possible and does not emit
 690     syntax errors. However, it does emit warnings at the slightest indication
 691     of malformed input when possible. It is usually a good idea to heed these
 692     warnings as malformed input is known to result in invalid GTK-Doc output.
 693
 694     A GTK-Doc comment block can be constructed out of multiple parts that can
 695     be combined to write different types of documentation.
 696     See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
 697     Each part can be further divided into fields which are separated by `:` characters.
 698
 699     Possible parts and the fields they are constructed from look like the
 700     following (optional fields are enclosed in square brackets):
 701
 702     .. code-block:: c
 703         /**
 704          * identifier_name [:annotations]
 705          * @parameter_name [:annotations] [:description]
 706          *
 707          * comment_block_description
 708          * tag_name [:annotations] [:description]
 709          */
 710
 711     The order in which the different parts have to be specified is important::
 712
 713         - There has to be exactly 1 `identifier` part on the first line of the
 714           comment block which consists of:
 715               * an `identifier_name` field
 716               * an optional `annotations` field
 717         - Followed by 0 or more `parameters` parts, each consisting of:
 718               * a `parameter_name` field
 719               * an optional `annotations` field
 720               * an optional `description` field
 721         - Followed by at least 1 empty line signaling the beginning of
 722           the `comment_block_description` part
 723         - Followed by an optional `comment block description` part.
 724         - Followed by 0 or more `tag` parts, each consisting of:
 725               * a `tag_name` field
 726               * an optional `annotations` field
 727               * an optional `description` field
 728
 729     Additionally, the following restrictions are in effect::
 730
 731         - Parts can optionally be separated by an empty line, except between
 732           the `parameter` parts and the `comment block description` part where
 733           an empty line is required (see above).
 734         - Parts and fields cannot span multiple lines, except for
 735           `parameter descriptions`, `tag descriptions` and the
 736           `comment_block_description` fields.
 737         - `parameter descriptions` fields can not span multiple paragraphs.
 738         - `tag descriptions` and `comment block description` fields can
 739           span multiple paragraphs.
 740
 741     .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
 742         `ScanSourceFile()`_ function and is currently in sync with GTK-Doc
 743         commit `47abcd5`_.
 744
 745     .. _GTK-Doc's documentation:
 746             http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
 747     .. _ScanSourceFile():
 748             http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 749     .. _47abcd5: 47abcd53b8489ebceec9e394676512a181c1f1f6
 750     """
 751
 752     def parse(self, comments):
 753         """
 754         Parses multiple GTK-Doc comment blocks.
 755
 756         :param comments: a list of (comment, filename, lineno) tuples
 757         :returns: a dictionary mapping identifier names to :class:`DocBlock` objects
 758         """
 759
 760         comment_blocks = {}
 761
 762         for comment in comments:
 763             comment_block = self.parse_comment_block(comment)
 764
 765             if comment_block is not None:
 766                 # Note: previous versions of this parser did not check
 767                 # if an identifier was already stored in comment_blocks,
 768                 # so when multiple comment blocks where encountered documenting
 769                 # the same identifier the last one seen "wins".
 770                 # Keep this behavior for backwards compatibility, but
 771                 # emit a warning.
 772                 if comment_block.name in comment_blocks:
 773                     message.warn("multiple comment blocks documenting '%s:' identifier." %
 774                                  (comment_block.name),
 775                                  comment_block.position)
 776
 777                 comment_blocks[comment_block.name] = comment_block
 778
 779         return comment_blocks
 780
 781     def parse_comment_block(self, comment):
 782         """
 783         Parses a single GTK-Doc comment block.
 784
 785         :param comment: a (comment, filename, lineno) tuple
 786         :returns: a :class:`DocBlock` object or ``None``
 787         """
 788
 789         comment, filename, lineno = comment
 790
 791         # Assign line numbers to each line of the comment block,
 792         # which will later be used as the offset to calculate the
 793         # real line number in the source file
 794         comment_lines = list(enumerate(comment.split('\n')))
 795
 796         # Check for the start the comment block.
 797         if COMMENT_START_RE.match(comment_lines[0][1]):
 798             del comment_lines[0]
 799         else:
 800             # Not a GTK-Doc comment block.
 801             return None
 802
 803         # Check for the end the comment block.
 804         if COMMENT_END_RE.match(comment_lines[-1][1]):
 805             del comment_lines[-1]
 806         else:
 807             # Not a GTK-Doc comment block.
 808             return None
 809
 810         # If we get this far, we are inside a GTK-Doc comment block.
 811         return self._parse_comment_block(comment_lines, filename, lineno)
 812
 813     def _parse_comment_block(self, comment_lines, filename, lineno):
 814         """
 815         Parses a single GTK-Doc comment block already stripped from its
 816         comment start (/**) and comment end (*/) marker lines.
 817
 818         :param comment_lines: list of (line_offset, line) tuples representing a
 819                               GTK-Doc comment block already stripped from it's
 820                               start (/**) and end (*/) marker lines
 821         :param filename: source file name where the comment block originated from
 822         :param lineno:  line in the source file where the comment block starts
 823         :returns: a :class:`DocBlock` object or ``None``
 824
 825         .. NOTE:: If you are tempted to refactor this method and split it
 826             further up (for example into _parse_identifier(), _parse_parameters(),
 827             _parse_description(), _parse_tags() methods) then please resist the
 828             urge. It is considered important that this method should be more or
 829             less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
 830
 831             The different parsing steps are marked with a comment surrounded
 832             by `#` characters in an attempt to make it clear what is going on.
 833
 834         .. _ScanSourceFile():
 835                 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 836         """
 837         comment_block = None
 838         part_indent = None
 839         line_indent = None
 840         in_part = None
 841         identifier = None
 842         current_param = None
 843         current_tag = None
 844         returns_seen = False
 845
 846         for line_offset, line in comment_lines:
 847             position = message.Position(filename, line_offset + lineno)
 848
 849             # Store the original line (without \n) and column offset
 850             # so we can generate meaningful warnings later on.
 851             original_line = line
 852             column_offset = 0
 853
 854             # Get rid of ' * ' at start of the line.
 855             result = COMMENT_ASTERISK_RE.match(line)
 856             if result:
 857                 column_offset = result.end(0)
 858                 line = line[result.end(0):]
 859
 860             # Store indentation level of the line.
 861             result = COMMENT_INDENTATION_RE.match(line)
 862             line_indent = len(result.group('indentation').replace('\t', '  '))
 863
 864             ####################################################################
 865             # Check for GTK-Doc comment block identifier.
 866             ####################################################################
 867             if not comment_block:
 868                 if not identifier:
 869                     result = SECTION_RE.match(line)
 870                     if result:
 871                         identifier = IDENTIFIER_SECTION
 872                         identifier_name = 'SECTION:%s' % (result.group('section_name'))
 873                         column = result.start('section_name') + column_offset
 874
 875                 if not identifier:
 876                     result = SYMBOL_RE.match(line)
 877                     if result:
 878                         identifier = IDENTIFIER_SYMBOL
 879                         identifier_name = '%s' % (result.group('symbol_name'))
 880                         column = result.start('symbol_name') + column_offset
 881
 882                 if not identifier:
 883                     result = PROPERTY_RE.match(line)
 884                     if result:
 885                         identifier = IDENTIFIER_PROPERTY
 886                         identifier_name = '%s:%s' % (result.group('class_name'),
 887                                                      result.group('property_name'))
 888                         column = result.start('property_name') + column_offset
 889
 890                 if not identifier:
 891                     result = SIGNAL_RE.match(line)
 892                     if result:
 893                         identifier = IDENTIFIER_SIGNAL
 894                         identifier_name = '%s::%s' % (result.group('class_name'),
 895                                                       result.group('signal_name'))
 896                         column = result.start('signal_name') + column_offset
 897
 898                 if identifier:
 899                     in_part = PART_IDENTIFIER
 900                     part_indent = line_indent
 901
 902                     comment_block = DocBlock(identifier_name)
 903                     comment_block.position = position
 904
 905                     if 'colon' in result.groupdict() and result.group('colon') != ':':
 906                         colon_start = result.start('colon')
 907                         colon_column = column_offset + colon_start
 908                         marker = ' '*colon_column + '^'
 909                         message.warn("missing ':' at column %s:\n%s\n%s" %
 910                                      (colon_column + 1, original_line, marker),
 911                                      position)
 912
 913                     if 'annotations' in result.groupdict():
 914                         comment_block.options = self.parse_options(comment_block,
 915                                                                    result.group('annotations'))
 916
 917                     continue
 918                 else:
 919                     # If we get here, the identifier was not recognized, so
 920                     # ignore the rest of the block just like the old annotation
 921                     # parser did. Doing this is a bit more strict than
 922                     # gtkdoc-mkdb (which continues to search for the identifier
 923                     # until either it is found or the end of the block is
 924                     # reached). In practice, however, ignoring the block is the
 925                     # right thing to do because sooner or later some long
 926                     # descriptions will contain something matching an identifier
 927                     # pattern by accident.
 928                     marker = ' '*column_offset + '^'
 929                     message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
 930                                  'found:\n%s\n%s' % (original_line, marker),
 931                                  position)
 932
 933                     return None
 934
 935             ####################################################################
 936             # Check for comment block parameters.
 937             ####################################################################
 938             result = PARAMETER_RE.match(line)
 939             if result:
 940                 param_name = result.group('parameter_name')
 941                 param_annotations = result.group('annotations')
 942                 param_description = result.group('description')
 943
 944                 if in_part == PART_IDENTIFIER:
 945                     in_part = PART_PARAMETERS
 946
 947                 part_indent = line_indent
 948
 949                 if in_part != PART_PARAMETERS:
 950                     column = result.start('parameter_name') + column_offset
 951                     marker = ' '*column + '^'
 952                     message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
 953                                  (param_name, original_line, marker),
 954                                  position)
 955
 956                 # Old style GTK-Doc allowed return values to be specified as
 957                 # parameters instead of tags.
 958                 if param_name.lower() == TAG_RETURNS:
 959                     param_name = TAG_RETURNS
 960
 961                     if not returns_seen:
 962                         returns_seen = True
 963                     else:
 964                         message.warn("encountered multiple 'Returns' parameters or tags for "
 965                                      "'%s'." % (comment_block.name),
 966                                      position)
 967                 elif param_name in comment_block.params.keys():
 968                     column = result.start('parameter_name') + column_offset
 969                     marker = ' '*column + '^'
 970                     message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
 971                                  (param_name, comment_block.name, original_line, marker),
 972                                  position)
 973
 974                 tag = DocTag(comment_block, param_name)
 975                 tag.position = position
 976                 tag.comment = param_description
 977                 if param_annotations:
 978                     tag.options = self.parse_options(tag, param_annotations)
 979                 if param_name == TAG_RETURNS:
 980                     comment_block.tags[param_name] = tag
 981                 else:
 982                     comment_block.params[param_name] = tag
 983                 current_param = tag
 984                 continue
 985
 986             ####################################################################
 987             # Check for comment block description.
 988             #
 989             # When we are parsing comment block parameters or the comment block
 990             # identifier (when there are no parameters) and encounter an empty
 991             # line, we must be parsing the comment block description.
 992             ####################################################################
 993             if (EMPTY_LINE_RE.match(line)
 994             and in_part in [PART_IDENTIFIER, PART_PARAMETERS]):
 995                 in_part = PART_DESCRIPTION
 996                 part_indent = line_indent
 997                 continue
 998
 999             ####################################################################
1000             # Check for GTK-Doc comment block tags.
1001             ####################################################################
1002             result = TAG_RE.match(line)
1003             if result and line_indent <= part_indent:
1004                 tag_name = result.group('tag_name')
1005                 tag_annotations = result.group('annotations')
1006                 tag_description = result.group('description')
1007
1008                 marker = ' '*(result.start('tag_name') + column_offset) + '^'
1009
1010                 # Deprecated GTK-Doc Description: tag
1011                 if tag_name.lower() == TAG_DESCRIPTION:
1012                     message.warn("GTK-Doc tag \"Description:\" has been deprecated:\n%s\n%s" %
1013                                  (original_line, marker),
1014                                  position)
1015
1016                     in_part = PART_DESCRIPTION
1017                     part_indent = line_indent
1018
1019                     if not comment_block.comment:
1020                         comment_block.comment = tag_description
1021                     else:
1022                         comment_block.comment += '\n' + tag_description
1023                     continue
1024
1025                 # Now that the deprecated stuff is out of the way, continue parsing real tags
1026                 if in_part == PART_DESCRIPTION:
1027                     in_part = PART_TAGS
1028
1029                 part_indent = line_indent
1030
1031                 if in_part != PART_TAGS:
1032                     column = result.start('tag_name') + column_offset
1033                     marker = ' '*column + '^'
1034                     message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
1035                                  (tag_name, original_line, marker),
1036                                  position)
1037
1038                 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1039                     if not returns_seen:
1040                         returns_seen = True
1041                     else:
1042                         message.warn("encountered multiple 'Returns' parameters or tags for "
1043                                      "'%s'." % (comment_block.name),
1044                                      position)
1045
1046                     tag = DocTag(comment_block, TAG_RETURNS)
1047                     tag.position = position
1048                     tag.comment = tag_description
1049                     if tag_annotations:
1050                         tag.options = self.parse_options(tag, tag_annotations)
1051                     comment_block.tags[TAG_RETURNS] = tag
1052                     current_tag = tag
1053                     continue
1054                 else:
1055                     if tag_name.lower() in comment_block.tags.keys():
1056                         column = result.start('tag_name') + column_offset
1057                         marker = ' '*column + '^'
1058                         message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
1059                                      (tag_name, comment_block.name, original_line, marker),
1060                                      position)
1061
1062                     tag = DocTag(comment_block, tag_name.lower())
1063                     tag.position = position
1064                     tag.value = tag_description
1065                     if tag_annotations:
1066                         if tag_name.lower() == TAG_ATTRIBUTES:
1067                             tag.options = self.parse_options(tag, tag_annotations)
1068                         else:
1069                             message.warn("annotations not supported for tag '%s:'." %
1070                                          (tag_name),
1071                                          position)
1072                     comment_block.tags[tag_name.lower()] = tag
1073                     current_tag = tag
1074                     continue
1075
1076             ####################################################################
1077             # If we get here, we must be in the middle of a multiline
1078             # comment block, parameter or tag description.
1079             ####################################################################
1080             if in_part in [PART_IDENTIFIER, PART_DESCRIPTION]:
1081                 if not comment_block.comment:
1082                     comment_block.comment = line
1083                 else:
1084                     comment_block.comment += '\n' + line
1085                 continue
1086             elif in_part == PART_PARAMETERS:
1087                 self._validate_multiline_annotation_continuation(line, original_line,
1088                                                                  column_offset, position)
1089
1090                 # Append to parameter description.
1091                 current_param.comment += ' ' + line.strip()
1092             elif in_part == PART_TAGS:
1093                 self._validate_multiline_annotation_continuation(line, original_line,
1094                                                                  column_offset, position)
1095
1096                 # Append to tag description.
1097                 if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1098                     current_tag.comment += ' ' + line.strip()
1099                 else:
1100                     current_tag.value += ' ' + line.strip()
1101
1102         ########################################################################
1103         # Finished parsing this comment block.
1104         ########################################################################
1105         # We have picked up a couple of \n characters that where not
1106         # intended. Strip those.
1107         if comment_block.comment:
1108             comment_block.comment = comment_block.comment.strip()
1109
1110         for tag in comment_block.tags.values():
1111             self._clean_comment_block_part(tag)
1112
1113         for param in comment_block.params.values():
1114             self._clean_comment_block_part(param)
1115
1116         # Validate and store block.
1117         comment_block.validate()
1118         return comment_block
1119
1120     def _clean_comment_block_part(self, part):
1121         if part.comment:
1122             part.comment = part.comment.strip()
1123         else:
1124             part.comment = None
1125
1126         if part.value:
1127             part.value = part.value.strip()
1128         else:
1129             part.value = ''
1130
1131     def _validate_multiline_annotation_continuation(self, line, original_line,
1132                                                           column_offset, position):
1133         '''
1134         Validate parameters and tags (except the first line) and generate
1135         warnings about invalid annotations spanning multiple lines.
1136
1137         :param line: line to validate, stripped from ' * ' at start of the line.
1138         :param original_line: original line to validate (used in warning messages)
1139         :param column_offset: column width of ' * ' at the time it was stripped from `line`
1140         :param position: position of `line` in the source file
1141         '''
1142
1143         result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line)
1144         if result:
1145             column = result.start('annotations') + column_offset
1146             marker = ' '*column + '^'
1147             message.warn('ignoring invalid multiline annotation continuation:\n'
1148                          '%s\n%s' % (original_line, marker),
1149                          position)
1150
1151     @classmethod
1152     def parse_options(cls, tag, value):
1153         # (annotation)
1154         # (annotation opt1 opt2 ...)
1155         # (annotation opt1=value1 opt2=value2 ...)
1156         opened = -1
1157         options = DocOptions()
1158         options.position = tag.position
1159
1160         for i, c in enumerate(value):
1161             if c == '(' and opened == -1:
1162                 opened = i+1
1163             if c == ')' and opened != -1:
1164                 segment = value[opened:i]
1165                 parts = segment.split(' ', 1)
1166                 if len(parts) == 2:
1167                     name, option = parts
1168                 elif len(parts) == 1:
1169                     name = parts[0]
1170                     option = None
1171                 else:
1172                     raise AssertionError
1173                 if option is not None:
1174                     option = DocOption(tag, option)
1175                 options.add(name, option)
1176                 opened = -1
1177
1178         return options