giscanner/annotationparser.py

   1 # -*- Mode: Python -*-
   2 # GObject-Introspection - a framework for introspecting GObject libraries
   3 # Copyright (C) 2008-2010 Johan Dahlin
   4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20 #
  21
  22
  23 # AnnotationParser - extract annotations from GTK-Doc comment blocks
  24
  25
  26 import re
  27
  28 from . import message
  29 from .odict import odict
  30
  31
  32 # GTK-Doc comment block parts
  33 PART_IDENTIFIER = 'identifier'
  34 PART_PARAMETERS = 'parameters'
  35 PART_DESCRIPTION = 'description'
  36 PART_TAGS = 'tags'
  37
  38 # Identifiers
  39 IDENTIFIER_SECTION = 'section'
  40 IDENTIFIER_SYMBOL = 'symbol'
  41 IDENTIFIER_PROPERTY = 'property'
  42 IDENTIFIER_SIGNAL = 'signal'
  43
  44 # Tags - annotations applied to comment blocks
  45 TAG_VFUNC = 'virtual'
  46 TAG_SINCE = 'since'
  47 TAG_STABILITY = 'stability'
  48 TAG_DEPRECATED = 'deprecated'
  49 TAG_RETURNS = 'returns'
  50 TAG_RETURNVALUE = 'return value'
  51 TAG_DESCRIPTION = 'description'
  52 TAG_ATTRIBUTES = 'attributes'
  53 TAG_RENAME_TO = 'rename to'
  54 TAG_TYPE = 'type'
  55 TAG_UNREF_FUNC = 'unref func'
  56 TAG_REF_FUNC = 'ref func'
  57 TAG_SET_VALUE_FUNC = 'set value func'
  58 TAG_GET_VALUE_FUNC = 'get value func'
  59 TAG_TRANSFER = 'transfer'
  60 TAG_VALUE = 'value'
  61 _ALL_TAGS = [TAG_VFUNC,
  62              TAG_SINCE,
  63              TAG_STABILITY,
  64              TAG_DEPRECATED,
  65              TAG_RETURNS,
  66              TAG_RETURNVALUE,
  67              TAG_DESCRIPTION,
  68              TAG_ATTRIBUTES,
  69              TAG_RENAME_TO,
  70              TAG_TYPE,
  71              TAG_UNREF_FUNC,
  72              TAG_REF_FUNC,
  73              TAG_SET_VALUE_FUNC,
  74              TAG_GET_VALUE_FUNC,
  75              TAG_TRANSFER,
  76              TAG_VALUE]
  77
  78 # Options - annotations for parameters and return values
  79 OPT_ALLOW_NONE = 'allow-none'
  80 OPT_ARRAY = 'array'
  81 OPT_ATTRIBUTE = 'attribute'
  82 OPT_CLOSURE = 'closure'
  83 OPT_DESTROY = 'destroy'
  84 OPT_ELEMENT_TYPE = 'element-type'
  85 OPT_FOREIGN = 'foreign'
  86 OPT_IN = 'in'
  87 OPT_INOUT = 'inout'
  88 OPT_INOUT_ALT = 'in-out'
  89 OPT_OUT = 'out'
  90 OPT_SCOPE = 'scope'
  91 OPT_TRANSFER = 'transfer'
  92 OPT_TYPE = 'type'
  93 OPT_SKIP = 'skip'
  94 OPT_CONSTRUCTOR = 'constructor'
  95 OPT_METHOD = 'method'
  96
  97 ALL_OPTIONS = [
  98     OPT_ALLOW_NONE,
  99     OPT_ARRAY,
 100     OPT_ATTRIBUTE,
 101     OPT_CLOSURE,
 102     OPT_DESTROY,
 103     OPT_ELEMENT_TYPE,
 104     OPT_FOREIGN,
 105     OPT_IN,
 106     OPT_INOUT,
 107     OPT_INOUT_ALT,
 108     OPT_OUT,
 109     OPT_SCOPE,
 110     OPT_TRANSFER,
 111     OPT_TYPE,
 112     OPT_SKIP,
 113     OPT_CONSTRUCTOR,
 114     OPT_METHOD]
 115
 116 # Array options - array specific annotations
 117 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
 118 OPT_ARRAY_LENGTH = 'length'
 119 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
 120
 121 # Out options
 122 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
 123 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
 124
 125 # Scope options
 126 OPT_SCOPE_ASYNC = 'async'
 127 OPT_SCOPE_CALL = 'call'
 128 OPT_SCOPE_NOTIFIED = 'notified'
 129
 130 # Transfer options
 131 OPT_TRANSFER_NONE = 'none'
 132 OPT_TRANSFER_CONTAINER = 'container'
 133 OPT_TRANSFER_FULL = 'full'
 134 OPT_TRANSFER_FLOATING = 'floating'
 135
 136
 137 #The following regular expression programs are built to:
 138 # - match (or substitute) a single comment block line at a time;
 139 # - support (but remains untested) LOCALE and UNICODE modes.
 140
 141 # Program matching the start of a comment block.
 142 #
 143 # Results in 0 symbolic groups.
 144 COMMENT_START_RE = re.compile(r'''
 145     ^                                        # start
 146     [^\S\n\r]*                               # 0 or more whitespace characters
 147     /                                        # 1 forward slash character
 148     \*{2}                                    # exactly 2 asterisk characters
 149     [^\S\n\r]*                               # 0 or more whitespace characters
 150     $                                        # end
 151     ''',
 152     re.VERBOSE)
 153
 154 # Program matching the end of a comment block.
 155 #
 156 # Results in 0 symbolic groups.
 157 COMMENT_END_RE = re.compile(r'''
 158     ^                                        # start
 159     [^\S\n\r]*                               # 0 or more whitespace characters
 160     \*+                                      # 1 or more asterisk characters
 161     /                                        # 1 forward slash character
 162     $                                        # end
 163     ''',
 164     re.VERBOSE)
 165
 166 # Program matching the ' * ' at the beginning of every
 167 # line inside a comment block.
 168 #
 169 # Results in 0 symbolic groups.
 170 COMMENT_ASTERISK_RE = re.compile(r'''
 171     ^                                        # start
 172     [^\S\n\r]*                               # 0 or more whitespace characters
 173     \*                                       # 1 asterisk character
 174     [^\S\n\r]?                               # 0 or 1 whitespace characters. Careful,
 175                                              # removing more than 1 whitespace
 176                                              # character would break embedded
 177                                              # example program indentation
 178     ''',
 179     re.VERBOSE)
 180
 181 # Program matching the indentation at the beginning of every
 182 # line (stripped from the ' * ') inside a comment block.
 183 #
 184 # Results in 1 symbolic group:
 185 #   - group 1 = indentation
 186 COMMENT_INDENTATION_RE = re.compile(r'''
 187     ^
 188     (?P<indentation>[^\S\n\r]*)              # 0 or more whitespace characters
 189     .*
 190     $
 191     ''',
 192     re.VERBOSE)
 193
 194 # Program matching an empty line.
 195 #
 196 # Results in 0 symbolic groups.
 197 EMPTY_LINE_RE = re.compile(r'''
 198     ^                                        # start
 199     [^\S\n\r]*                               # 0 or more whitespace characters
 200     $                                        # end
 201     ''',
 202     re.VERBOSE)
 203
 204 # Program matching SECTION identifiers.
 205 #
 206 # Results in 2 symbolic groups:
 207 #   - group 1 = colon
 208 #   - group 2 = section_name
 209 SECTION_RE = re.compile(r'''
 210     ^                                        # start
 211     [^\S\n\r]*                               # 0 or more whitespace characters
 212     SECTION                                  # SECTION
 213     [^\S\n\r]*                               # 0 or more whitespace characters
 214     (?P<colon>:?)                            # colon
 215     [^\S\n\r]*                               # 0 or more whitespace characters
 216     (?P<section_name>\w\S+)?                 # section name
 217     [^\S\n\r]*                               # 0 or more whitespace characters
 218     $
 219     ''',
 220     re.VERBOSE)
 221
 222 # Program matching symbol (function, constant, struct and enum) identifiers.
 223 #
 224 # Results in 3 symbolic groups:
 225 #   - group 1 = symbol_name
 226 #   - group 2 = colon
 227 #   - group 3 = annotations
 228 SYMBOL_RE = re.compile(r'''
 229     ^                                        # start
 230     [^\S\n\r]*                               # 0 or more whitespace characters
 231     (?P<symbol_name>[\w-]*\w)                # symbol name
 232     [^\S\n\r]*                               # 0 or more whitespace characters
 233     (?P<colon>:?)                            # colon
 234     [^\S\n\r]*                               # 0 or more whitespace characters
 235     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 236     [^\S\n\r]*                               # 0 or more whitespace characters
 237     $                                        # end
 238     ''',
 239     re.VERBOSE)
 240
 241 # Program matching property identifiers.
 242 #
 243 # Results in 4 symbolic groups:
 244 #   - group 1 = class_name
 245 #   - group 2 = property_name
 246 #   - group 3 = colon
 247 #   - group 4 = annotations
 248 PROPERTY_RE = re.compile(r'''
 249     ^                                        # start
 250     [^\S\n\r]*                               # 0 or more whitespace characters
 251     (?P<class_name>[\w]+)                    # class name
 252     [^\S\n\r]*                               # 0 or more whitespace characters
 253     :{1}                                     # required colon
 254     [^\S\n\r]*                               # 0 or more whitespace characters
 255     (?P<property_name>[\w-]*\w)              # property name
 256     [^\S\n\r]*                               # 0 or more whitespace characters
 257     (?P<colon>:?)                            # colon
 258     [^\S\n\r]*                               # 0 or more whitespace characters
 259     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 260     [^\S\n\r]*                               # 0 or more whitespace characters
 261     $                                        # end
 262     ''',
 263     re.VERBOSE)
 264
 265 # Program matching signal identifiers.
 266 #
 267 # Results in 4 symbolic groups:
 268 #   - group 1 = class_name
 269 #   - group 2 = signal_name
 270 #   - group 3 = colon
 271 #   - group 4 = annotations
 272 SIGNAL_RE = re.compile(r'''
 273     ^                                        # start
 274     [^\S\n\r]*                               # 0 or more whitespace characters
 275     (?P<class_name>[\w]+)                    # class name
 276     [^\S\n\r]*                               # 0 or more whitespace characters
 277     :{2}                                     # 2 required colons
 278     [^\S\n\r]*                               # 0 or more whitespace characters
 279     (?P<signal_name>[\w-]*\w)                # signal name
 280     [^\S\n\r]*                               # 0 or more whitespace characters
 281     (?P<colon>:?)                            # colon
 282     [^\S\n\r]*                               # 0 or more whitespace characters
 283     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 284     [^\S\n\r]*                               # 0 or more whitespace characters
 285     $                                        # end
 286     ''',
 287     re.VERBOSE)
 288
 289 # Program matching parameters.
 290 #
 291 # Results in 4 symbolic groups:
 292 #   - group 1 = parameter_name
 293 #   - group 2 = annotations
 294 #   - group 3 = colon
 295 #   - group 4 = description
 296 PARAMETER_RE = re.compile(r'''
 297     ^                                        # start
 298     [^\S\n\r]*                               # 0 or more whitespace characters
 299     @                                        # @ character
 300     (?P<parameter_name>[\w-]*\w|\.\.\.)      # parameter name
 301     [^\S\n\r]*                               # 0 or more whitespace characters
 302     :{1}                                     # required colon
 303     [^\S\n\r]*                               # 0 or more whitespace characters
 304     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 305     (?P<colon>:?)                            # colon
 306     [^\S\n\r]*                               # 0 or more whitespace characters
 307     (?P<description>.*?)                     # description
 308     [^\S\n\r]*                               # 0 or more whitespace characters
 309     $                                        # end
 310     ''',
 311     re.VERBOSE)
 312
 313 # Program matching tags.
 314 #
 315 # Results in 4 symbolic groups:
 316 #   - group 1 = tag_name
 317 #   - group 2 = annotations
 318 #   - group 3 = colon
 319 #   - group 4 = description
 320 _all_tags = '|'.join(_ALL_TAGS).replace(' ', '\\ ')
 321 TAG_RE = re.compile(r'''
 322     ^                                        # start
 323     [^\S\n\r]*                               # 0 or more whitespace characters
 324     (?P<tag_name>''' + _all_tags + r''')     # tag name
 325     [^\S\n\r]*                               # 0 or more whitespace characters
 326     :{1}                                     # required colon
 327     [^\S\n\r]*                               # 0 or more whitespace characters
 328     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 329     (?P<colon>:?)                            # colon
 330     [^\S\n\r]*                               # 0 or more whitespace characters
 331     (?P<description>.*?)                     # description
 332     [^\S\n\r]*                               # 0 or more whitespace characters
 333     $                                        # end
 334     ''',
 335     re.VERBOSE | re.IGNORECASE)
 336
 337 # Program matching multiline annotation continuations.
 338 # This is used on multiline parameters and tags (but not on the first line) to
 339 # generate warnings about invalid annotations spanning multiple lines.
 340 #
 341 # Results in 3 symbolic groups:
 342 #   - group 2 = annotations
 343 #   - group 3 = colon
 344 #   - group 4 = description
 345 MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile(r'''
 346     ^                                        # start
 347     [^\S\n\r]*                               # 0 or more whitespace characters
 348     (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
 349     (?P<colon>:)                             # colon
 350     [^\S\n\r]*                               # 0 or more whitespace characters
 351     (?P<description>.*?)                     # description
 352     [^\S\n\r]*                               # 0 or more whitespace characters
 353     $                                        # end
 354     ''',
 355     re.VERBOSE)
 356
 357
 358 class DocBlock(object):
 359
 360     def __init__(self, name):
 361         self.name = name
 362         self.options = DocOptions()
 363         self.value = None
 364         self.tags = odict()
 365         self.comment = None
 366         self.params = odict()
 367         self.position = None
 368
 369     def __cmp__(self, other):
 370         return cmp(self.name, other.name)
 371
 372     def __repr__(self):
 373         return '<DocBlock %r %r>' % (self.name, self.options)
 374
 375     def get_tag(self, name):
 376         return self.tags.get(name)
 377
 378     def get_param(self, name):
 379         return self.params.get(name)
 380
 381     def to_gtk_doc(self):
 382         options = ''
 383         if self.options:
 384             options += ' '
 385             options += ' '.join('(%s)' % o for o in self.options)
 386         lines = [self.name]
 387         if 'SECTION' not in self.name:
 388             lines[0] += ':'
 389         lines[0] += options
 390         for param in self.params.values():
 391             lines.append(param.to_gtk_doc_param())
 392         lines.append('')
 393         for l in self.comment.split('\n'):
 394             lines.append(l)
 395         if self.tags:
 396             lines.append('')
 397             for tag in self.tags.values():
 398                 lines.append(tag.to_gtk_doc_tag())
 399
 400         comment = ''
 401         comment += '/**\n'
 402         for line in lines:
 403             line = line.rstrip()
 404             if line:
 405                 comment += ' * %s\n' % (line, )
 406             else:
 407                 comment += ' *\n'
 408         comment += ' */\n'
 409         return comment
 410
 411     def validate(self):
 412         for param in self.params.values():
 413             param.validate()
 414
 415         for tag in self.tags.values():
 416             tag.validate()
 417
 418
 419 class DocTag(object):
 420
 421     def __init__(self, block, name):
 422         self.block = block
 423         self.name = name
 424         self.options = DocOptions()
 425         self.comment = None
 426         self.value = ''
 427         self.position = None
 428
 429     def __repr__(self):
 430         return '<DocTag %r %r>' % (self.name, self.options)
 431
 432     def _validate_option(self, name, value, required=False,
 433                          n_params=None, choices=None):
 434         if required and value is None:
 435             message.warn('%s annotation needs a value' % (
 436                 name, ), self.position)
 437             return
 438
 439         if n_params is not None:
 440             if n_params == 0:
 441                 s = 'no value'
 442             elif n_params == 1:
 443                 s = 'one value'
 444             else:
 445                 s = '%d values' % (n_params, )
 446             if ((n_params > 0 and (value is None or value.length() != n_params)) or
 447                 n_params == 0 and value is not None):
 448                 if value is None:
 449                     length = 0
 450                 else:
 451                     length = value.length()
 452                 message.warn('%s annotation needs %s, not %d' % (
 453                     name, s, length), self.position)
 454                 return
 455
 456         if choices is not None:
 457             valuestr = value.one()
 458             if valuestr not in choices:
 459                 message.warn('invalid %s annotation value: %r' % (
 460                     name, valuestr, ), self.position)
 461                 return
 462
 463     def _validate_array(self, option, value):
 464         if value is None:
 465             return
 466
 467         for name, v in value.all().items():
 468             if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
 469                 try:
 470                     int(v)
 471                 except (TypeError, ValueError):
 472                     if v is None:
 473                         message.warn(
 474                             'array option %s needs a value' % (
 475                             name, ),
 476                             positions=self.position)
 477                     else:
 478                         message.warn(
 479                             'invalid array %s option value %r, '
 480                             'must be an integer' % (name, v, ),
 481                             positions=self.position)
 482             elif name == OPT_ARRAY_LENGTH:
 483                 if v is None:
 484                     message.warn(
 485                         'array option length needs a value',
 486                         positions=self.position)
 487             else:
 488                 message.warn(
 489                     'invalid array annotation value: %r' % (
 490                     name, ), self.position)
 491
 492     def _validate_closure(self, option, value):
 493         if value is not None and value.length() > 1:
 494             message.warn(
 495                 'closure takes at most 1 value, %d given' % (
 496                 value.length()), self.position)
 497
 498     def _validate_element_type(self, option, value):
 499         self._validate_option(option, value, required=True)
 500         if value is None:
 501             message.warn(
 502                 'element-type takes at least one value, none given',
 503                 self.position)
 504             return
 505         if value.length() > 2:
 506             message.warn(
 507                 'element-type takes at most 2 values, %d given' % (
 508                 value.length()), self.position)
 509             return
 510
 511     def _validate_out(self, option, value):
 512         if value is None:
 513             return
 514         if value.length() > 1:
 515             message.warn(
 516                 'out annotation takes at most 1 value, %d given' % (
 517                 value.length()), self.position)
 518             return
 519         value_str = value.one()
 520         if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
 521                              OPT_OUT_CALLER_ALLOCATES]:
 522             message.warn("out annotation value is invalid: %r" % (
 523                 value_str), self.position)
 524             return
 525
 526     def _get_gtk_doc_value(self):
 527         def serialize_one(option, value, fmt, fmt2):
 528             if value:
 529                 if type(value) != str:
 530                     value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
 531                                       for k, v in value.all().items()))
 532                 return fmt % (option, value)
 533             else:
 534                 return fmt2 % (option, )
 535         annotations = []
 536         for option, value in self.options.items():
 537             annotations.append(
 538                 serialize_one(option, value, '(%s %s)', '(%s)'))
 539         if annotations:
 540             return ' '.join(annotations) + ': '
 541         else:
 542             return self.value
 543
 544     def to_gtk_doc_param(self):
 545         return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
 546
 547     def to_gtk_doc_tag(self):
 548         return '%s: %s%s' % (self.name.capitalize(),
 549                              self._get_gtk_doc_value(),
 550                              self.comment or '')
 551
 552     def validate(self):
 553         if self.name == TAG_ATTRIBUTES:
 554             # The 'Attributes:' tag allows free form annotations so the
 555             # validation below is most certainly going to fail.
 556             return
 557
 558         for option, value in self.options.items():
 559             if option == OPT_ALLOW_NONE:
 560                 self._validate_option(option, value, n_params=0)
 561             elif option == OPT_ARRAY:
 562                 self._validate_array(option, value)
 563             elif option == OPT_ATTRIBUTE:
 564                 self._validate_option(option, value, n_params=2)
 565             elif option == OPT_CLOSURE:
 566                 self._validate_closure(option, value)
 567             elif option == OPT_DESTROY:
 568                 self._validate_option(option, value, n_params=1)
 569             elif option == OPT_ELEMENT_TYPE:
 570                 self._validate_element_type(option, value)
 571             elif option == OPT_FOREIGN:
 572                 self._validate_option(option, value, n_params=0)
 573             elif option == OPT_IN:
 574                 self._validate_option(option, value, n_params=0)
 575             elif option in [OPT_INOUT, OPT_INOUT_ALT]:
 576                 self._validate_option(option, value, n_params=0)
 577             elif option == OPT_OUT:
 578                 self._validate_out(option, value)
 579             elif option == OPT_SCOPE:
 580                 self._validate_option(
 581                     option, value, required=True,
 582                     n_params=1,
 583                     choices=[OPT_SCOPE_ASYNC,
 584                              OPT_SCOPE_CALL,
 585                              OPT_SCOPE_NOTIFIED])
 586             elif option == OPT_SKIP:
 587                 self._validate_option(option, value, n_params=0)
 588             elif option == OPT_TRANSFER:
 589                 self._validate_option(
 590                     option, value, required=True,
 591                     n_params=1,
 592                     choices=[OPT_TRANSFER_FULL,
 593                              OPT_TRANSFER_CONTAINER,
 594                              OPT_TRANSFER_NONE,
 595                              OPT_TRANSFER_FLOATING])
 596             elif option == OPT_TYPE:
 597                 self._validate_option(option, value, required=True,
 598                                       n_params=1)
 599             elif option == OPT_CONSTRUCTOR:
 600                 self._validate_option(option, value, n_params=0)
 601             elif option == OPT_METHOD:
 602                 self._validate_option(option, value, n_params=0)
 603             else:
 604                 message.warn('invalid annotation option: %s' % (option, ),
 605                              self.position)
 606
 607
 608 class DocOptions(object):
 609     def __init__(self):
 610         self.values = []
 611         self.position = None
 612
 613     def __repr__(self):
 614         return '<DocOptions %r>' % (self.values, )
 615
 616     def __getitem__(self, item):
 617         for key, value in self.values:
 618             if key == item:
 619                 return value
 620         raise KeyError
 621
 622     def __nonzero__(self):
 623         return bool(self.values)
 624
 625     def __iter__(self):
 626         return (k for k, v in self.values)
 627
 628     def add(self, name, value):
 629         self.values.append((name, value))
 630
 631     def get(self, item, default=None):
 632         for key, value in self.values:
 633             if key == item:
 634                 return value
 635         return default
 636
 637     def getall(self, item):
 638         for key, value in self.values:
 639             if key == item:
 640                 yield value
 641
 642     def items(self):
 643         return iter(self.values)
 644
 645
 646 class DocOption(object):
 647
 648     def __init__(self, tag, option):
 649         self.tag = tag
 650         self._array = []
 651         self._dict = odict()
 652         # (annotation option1=value1 option2=value2) etc
 653         for p in option.split(' '):
 654             if '=' in p:
 655                 name, value = p.split('=', 1)
 656             else:
 657                 name = p
 658                 value = None
 659             self._dict[name] = value
 660             if value is None:
 661                 self._array.append(name)
 662             else:
 663                 self._array.append((name, value))
 664
 665     def __repr__(self):
 666         return '<DocOption %r>' % (self._array, )
 667
 668     def length(self):
 669         return len(self._array)
 670
 671     def one(self):
 672         assert len(self._array) == 1
 673         return self._array[0]
 674
 675     def flat(self):
 676         return self._array
 677
 678     def all(self):
 679         return self._dict
 680
 681
 682 class AnnotationParser(object):
 683     """
 684     GTK-Doc comment block parser.
 685
 686     Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
 687     :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
 688     parser tries to accept malformed input whenever possible and does not emit
 689     syntax errors. However, it does emit warnings at the slightest indication
 690     of malformed input when possible. It is usually a good idea to heed these
 691     warnings as malformed input is known to result in invalid GTK-Doc output.
 692
 693     A GTK-Doc comment block can be constructed out of multiple parts that can
 694     be combined to write different types of documentation.
 695     See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
 696     Each part can be further divided into fields which are separated by `:` characters.
 697
 698     Possible parts and the fields they are constructed from look like the
 699     following (optional fields are enclosed in square brackets):
 700
 701     .. code-block:: c
 702         /**
 703          * identifier_name [:annotations]
 704          * @parameter_name [:annotations] [:description]
 705          *
 706          * comment_block_description
 707          * tag_name [:annotations] [:description]
 708          */
 709
 710     The order in which the different parts have to be specified is important::
 711
 712         - There has to be exactly 1 `identifier` part on the first line of the
 713           comment block which consists of:
 714               * an `identifier_name` field
 715               * an optional `annotations` field
 716         - Followed by 0 or more `parameters` parts, each consisting of:
 717               * a `parameter_name` field
 718               * an optional `annotations` field
 719               * an optional `description` field
 720         - Followed by at least 1 empty line signaling the beginning of
 721           the `comment_block_description` part
 722         - Followed by an optional `comment block description` part.
 723         - Followed by 0 or more `tag` parts, each consisting of:
 724               * a `tag_name` field
 725               * an optional `annotations` field
 726               * an optional `description` field
 727
 728     Additionally, the following restrictions are in effect::
 729
 730         - Parts can optionally be separated by an empty line, except between
 731           the `parameter` parts and the `comment block description` part where
 732           an empty line is required (see above).
 733         - Parts and fields cannot span multiple lines, except for
 734           `parameter descriptions`, `tag descriptions` and the
 735           `comment_block_description` fields.
 736         - `parameter descriptions` fields can not span multiple paragraphs.
 737         - `tag descriptions` and `comment block description` fields can
 738           span multiple paragraphs.
 739
 740     .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
 741         `ScanSourceFile()`_ function and is currently in sync with GTK-Doc
 742         commit `47abcd5`_.
 743
 744     .. _GTK-Doc's documentation:
 745             http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
 746     .. _ScanSourceFile():
 747             http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 748     .. _47abcd5: 47abcd53b8489ebceec9e394676512a181c1f1f6
 749     """
 750
 751     def parse(self, comments):
 752         """
 753         Parses multiple GTK-Doc comment blocks.
 754
 755         :param comments: a list of (comment, filename, lineno) tuples
 756         :returns: a dictionary mapping identifier names to :class:`DocBlock` objects
 757         """
 758
 759         comment_blocks = {}
 760
 761         for comment in comments:
 762             comment_block = self.parse_comment_block(comment)
 763
 764             if comment_block is not None:
 765                 # Note: previous versions of this parser did not check
 766                 # if an identifier was already stored in comment_blocks,
 767                 # so when multiple comment blocks where encountered documenting
 768                 # the same identifier the last one seen "wins".
 769                 # Keep this behavior for backwards compatibility, but
 770                 # emit a warning.
 771                 if comment_block.name in comment_blocks:
 772                     message.warn("multiple comment blocks documenting '%s:' identifier." %
 773                                  (comment_block.name),
 774                                  comment_block.position)
 775
 776                 comment_blocks[comment_block.name] = comment_block
 777
 778         return comment_blocks
 779
 780     def parse_comment_block(self, comment):
 781         """
 782         Parses a single GTK-Doc comment block.
 783
 784         :param comment: a (comment, filename, lineno) tuple
 785         :returns: a :class:`DocBlock` object or ``None``
 786         """
 787
 788         comment, filename, lineno = comment
 789
 790         # Assign line numbers to each line of the comment block,
 791         # which will later be used as the offset to calculate the
 792         # real line number in the source file
 793         comment_lines = list(enumerate(comment.split('\n')))
 794
 795         # Check for the start the comment block.
 796         if COMMENT_START_RE.match(comment_lines[0][1]):
 797             del comment_lines[0]
 798         else:
 799             # Not a GTK-Doc comment block.
 800             return None
 801
 802         # Check for the end the comment block.
 803         if COMMENT_END_RE.match(comment_lines[-1][1]):
 804             del comment_lines[-1]
 805         else:
 806             # Not a GTK-Doc comment block.
 807             return None
 808
 809         # If we get this far, we are inside a GTK-Doc comment block.
 810         return self._parse_comment_block(comment_lines, filename, lineno)
 811
 812     def _parse_comment_block(self, comment_lines, filename, lineno):
 813         """
 814         Parses a single GTK-Doc comment block already stripped from its
 815         comment start (/**) and comment end (*/) marker lines.
 816
 817         :param comment_lines: list of (line_offset, line) tuples representing a
 818                               GTK-Doc comment block already stripped from it's
 819                               start (/**) and end (*/) marker lines
 820         :param filename: source file name where the comment block originated from
 821         :param lineno:  line in the source file where the comment block starts
 822         :returns: a :class:`DocBlock` object or ``None``
 823
 824         .. NOTE:: If you are tempted to refactor this method and split it
 825             further up (for example into _parse_identifier(), _parse_parameters(),
 826             _parse_description(), _parse_tags() methods) then please resist the
 827             urge. It is considered important that this method should be more or
 828             less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
 829
 830             The different parsing steps are marked with a comment surrounded
 831             by `#` characters in an attempt to make it clear what is going on.
 832
 833         .. _ScanSourceFile():
 834                 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
 835         """
 836         comment_block = None
 837         part_indent = None
 838         line_indent = None
 839         in_part = None
 840         identifier = None
 841         current_param = None
 842         current_tag = None
 843         returns_seen = False
 844
 845         for line_offset, line in comment_lines:
 846             position = message.Position(filename, line_offset + lineno)
 847
 848             # Store the original line (without \n) and column offset
 849             # so we can generate meaningful warnings later on.
 850             original_line = line
 851             column_offset = 0
 852
 853             # Get rid of ' * ' at start of the line.
 854             result = COMMENT_ASTERISK_RE.match(line)
 855             if result:
 856                 column_offset = result.end(0)
 857                 line = line[result.end(0):]
 858
 859             # Store indentation level of the line.
 860             result = COMMENT_INDENTATION_RE.match(line)
 861             line_indent = len(result.group('indentation').replace('\t', '  '))
 862
 863             ####################################################################
 864             # Check for GTK-Doc comment block identifier.
 865             ####################################################################
 866             if not comment_block:
 867                 if not identifier:
 868                     result = SECTION_RE.match(line)
 869                     if result:
 870                         identifier = IDENTIFIER_SECTION
 871                         identifier_name = 'SECTION:%s' % (result.group('section_name'))
 872                         column = result.start('section_name') + column_offset
 873
 874                 if not identifier:
 875                     result = SYMBOL_RE.match(line)
 876                     if result:
 877                         identifier = IDENTIFIER_SYMBOL
 878                         identifier_name = '%s' % (result.group('symbol_name'))
 879                         column = result.start('symbol_name') + column_offset
 880
 881                 if not identifier:
 882                     result = PROPERTY_RE.match(line)
 883                     if result:
 884                         identifier = IDENTIFIER_PROPERTY
 885                         identifier_name = '%s:%s' % (result.group('class_name'),
 886                                                      result.group('property_name'))
 887                         column = result.start('property_name') + column_offset
 888
 889                 if not identifier:
 890                     result = SIGNAL_RE.match(line)
 891                     if result:
 892                         identifier = IDENTIFIER_SIGNAL
 893                         identifier_name = '%s::%s' % (result.group('class_name'),
 894                                                       result.group('signal_name'))
 895                         column = result.start('signal_name') + column_offset
 896
 897                 if identifier:
 898                     in_part = PART_IDENTIFIER
 899                     part_indent = line_indent
 900
 901                     comment_block = DocBlock(identifier_name)
 902                     comment_block.position = position
 903
 904                     if 'colon' in result.groupdict() and result.group('colon') != ':':
 905                         colon_start = result.start('colon')
 906                         colon_column = column_offset + colon_start
 907                         marker = ' '*colon_column + '^'
 908                         message.warn("missing ':' at column %s:\n%s\n%s" %
 909                                      (colon_column + 1, original_line, marker),
 910                                      position)
 911
 912                     if 'annotations' in result.groupdict():
 913                         comment_block.options = self.parse_options(comment_block,
 914                                                                    result.group('annotations'))
 915
 916                     continue
 917                 else:
 918                     # If we get here, the identifier was not recognized, so
 919                     # ignore the rest of the block just like the old annotation
 920                     # parser did. Doing this is a bit more strict than
 921                     # gtkdoc-mkdb (which continues to search for the identifier
 922                     # until either it is found or the end of the block is
 923                     # reached). In practice, however, ignoring the block is the
 924                     # right thing to do because sooner or later some long
 925                     # descriptions will contain something matching an identifier
 926                     # pattern by accident.
 927                     marker = ' '*column_offset + '^'
 928                     message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
 929                                  'found:\n%s\n%s' % (original_line, marker),
 930                                  position)
 931
 932                     return None
 933
 934             ####################################################################
 935             # Check for comment block parameters.
 936             ####################################################################
 937             result = PARAMETER_RE.match(line)
 938             if result:
 939                 param_name = result.group('parameter_name')
 940                 param_annotations = result.group('annotations')
 941                 param_description = result.group('description')
 942
 943                 if in_part == PART_IDENTIFIER:
 944                     in_part = PART_PARAMETERS
 945
 946                 part_indent = line_indent
 947
 948                 if in_part != PART_PARAMETERS:
 949                     column = result.start('parameter_name') + column_offset
 950                     marker = ' '*column + '^'
 951                     message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
 952                                  (param_name, original_line, marker),
 953                                  position)
 954
 955                 # Old style GTK-Doc allowed return values to be specified as
 956                 # parameters instead of tags.
 957                 if param_name.lower() == TAG_RETURNS:
 958                     param_name = TAG_RETURNS
 959
 960                     if not returns_seen:
 961                         returns_seen = True
 962                     else:
 963                         message.warn("encountered multiple 'Returns' parameters or tags for "
 964                                      "'%s'." % (comment_block.name),
 965                                      position)
 966                 elif param_name in comment_block.params.keys():
 967                     column = result.start('parameter_name') + column_offset
 968                     marker = ' '*column + '^'
 969                     message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
 970                                  (param_name, comment_block.name, original_line, marker),
 971                                  position)
 972
 973                 tag = DocTag(comment_block, param_name)
 974                 tag.position = position
 975                 tag.comment = param_description
 976                 if param_annotations:
 977                     tag.options = self.parse_options(tag, param_annotations)
 978                 if param_name == TAG_RETURNS:
 979                     comment_block.tags[param_name] = tag
 980                 else:
 981                     comment_block.params[param_name] = tag
 982                 current_param = tag
 983                 continue
 984
 985             ####################################################################
 986             # Check for comment block description.
 987             #
 988             # When we are parsing comment block parameters or the comment block
 989             # identifier (when there are no parameters) and encounter an empty
 990             # line, we must be parsing the comment block description.
 991             ####################################################################
 992             if (EMPTY_LINE_RE.match(line)
 993             and in_part in [PART_IDENTIFIER, PART_PARAMETERS]):
 994                 in_part = PART_DESCRIPTION
 995                 part_indent = line_indent
 996                 continue
 997
 998             ####################################################################
 999             # Check for GTK-Doc comment block tags.
1000             ####################################################################
1001             result = TAG_RE.match(line)
1002             if result and line_indent <= part_indent:
1003                 tag_name = result.group('tag_name')
1004                 tag_annotations = result.group('annotations')
1005                 tag_description = result.group('description')
1006
1007                 marker = ' '*(result.start('tag_name') + column_offset) + '^'
1008
1009                 # Deprecated GTK-Doc Description: tag
1010                 if tag_name.lower() == TAG_DESCRIPTION:
1011                     message.warn("GTK-Doc tag \"Description:\" has been deprecated:\n%s\n%s" %
1012                                  (original_line, marker),
1013                                  position)
1014
1015                     in_part = PART_DESCRIPTION
1016                     part_indent = line_indent
1017
1018                     if not comment_block.comment:
1019                         comment_block.comment = tag_description
1020                     else:
1021                         comment_block.comment += '\n' + tag_description
1022                     continue
1023
1024                 # Now that the deprecated stuff is out of the way, continue parsing real tags
1025                 if in_part == PART_DESCRIPTION:
1026                     in_part = PART_TAGS
1027
1028                 part_indent = line_indent
1029
1030                 if in_part != PART_TAGS:
1031                     column = result.start('tag_name') + column_offset
1032                     marker = ' '*column + '^'
1033                     message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
1034                                  (tag_name, original_line, marker),
1035                                  position)
1036
1037                 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1038                     if not returns_seen:
1039                         returns_seen = True
1040                     else:
1041                         message.warn("encountered multiple 'Returns' parameters or tags for "
1042                                      "'%s'." % (comment_block.name),
1043                                      position)
1044
1045                     tag = DocTag(comment_block, TAG_RETURNS)
1046                     tag.position = position
1047                     tag.comment = tag_description
1048                     if tag_annotations:
1049                         tag.options = self.parse_options(tag, tag_annotations)
1050                     comment_block.tags[TAG_RETURNS] = tag
1051                     current_tag = tag
1052                     continue
1053                 else:
1054                     if tag_name.lower() in comment_block.tags.keys():
1055                         column = result.start('tag_name') + column_offset
1056                         marker = ' '*column + '^'
1057                         message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
1058                                      (tag_name, comment_block.name, original_line, marker),
1059                                      position)
1060
1061                     tag = DocTag(comment_block, tag_name.lower())
1062                     tag.position = position
1063                     tag.value = tag_description
1064                     if tag_annotations:
1065                         if tag_name.lower() == TAG_ATTRIBUTES:
1066                             tag.options = self.parse_options(tag, tag_annotations)
1067                         else:
1068                             message.warn("annotations not supported for tag '%s:'." %
1069                                          (tag_name),
1070                                          position)
1071                     comment_block.tags[tag_name.lower()] = tag
1072                     current_tag = tag
1073                     continue
1074
1075             ####################################################################
1076             # If we get here, we must be in the middle of a multiline
1077             # comment block, parameter or tag description.
1078             ####################################################################
1079             if in_part in [PART_IDENTIFIER, PART_DESCRIPTION]:
1080                 if not comment_block.comment:
1081                     comment_block.comment = line
1082                 else:
1083                     comment_block.comment += '\n' + line
1084                 continue
1085             elif in_part == PART_PARAMETERS:
1086                 self._validate_multiline_annotation_continuation(line, original_line,
1087                                                                  column_offset, position)
1088
1089                 # Append to parameter description.
1090                 current_param.comment += ' ' + line.strip()
1091             elif in_part == PART_TAGS:
1092                 self._validate_multiline_annotation_continuation(line, original_line,
1093                                                                  column_offset, position)
1094
1095                 # Append to tag description.
1096                 if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
1097                     current_tag.comment += ' ' + line.strip()
1098                 else:
1099                     current_tag.value += ' ' + line.strip()
1100
1101         ########################################################################
1102         # Finished parsing this comment block.
1103         ########################################################################
1104         # We have picked up a couple of \n characters that where not
1105         # intended. Strip those.
1106         if comment_block.comment:
1107             comment_block.comment = comment_block.comment.strip()
1108         else:
1109             comment_block.comment = ''
1110
1111         for tag in comment_block.tags.values():
1112             self._clean_comment_block_part(tag)
1113
1114         for param in comment_block.params.values():
1115             self._clean_comment_block_part(param)
1116
1117         # Validate and store block.
1118         comment_block.validate()
1119         return comment_block
1120
1121     def _clean_comment_block_part(self, part):
1122         if part.comment:
1123             part.comment = part.comment.strip()
1124         else:
1125             part.comment = None
1126
1127         if part.value:
1128             part.value = part.value.strip()
1129         else:
1130             part.value = ''
1131
1132     def _validate_multiline_annotation_continuation(self, line, original_line,
1133                                                           column_offset, position):
1134         '''
1135         Validate parameters and tags (except the first line) and generate
1136         warnings about invalid annotations spanning multiple lines.
1137
1138         :param line: line to validate, stripped from ' * ' at start of the line.
1139         :param original_line: original line to validate (used in warning messages)
1140         :param column_offset: column width of ' * ' at the time it was stripped from `line`
1141         :param position: position of `line` in the source file
1142         '''
1143
1144         result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line)
1145         if result:
1146             column = result.start('annotations') + column_offset
1147             marker = ' '*column + '^'
1148             message.warn('ignoring invalid multiline annotation continuation:\n'
1149                          '%s\n%s' % (original_line, marker),
1150                          position)
1151
1152     @classmethod
1153     def parse_options(cls, tag, value):
1154         # (annotation)
1155         # (annotation opt1 opt2 ...)
1156         # (annotation opt1=value1 opt2=value2 ...)
1157         opened = -1
1158         options = DocOptions()
1159         options.position = tag.position
1160
1161         for i, c in enumerate(value):
1162             if c == '(' and opened == -1:
1163                 opened = i+1
1164             if c == ')' and opened != -1:
1165                 segment = value[opened:i]
1166                 parts = segment.split(' ', 1)
1167                 if len(parts) == 2:
1168                     name, option = parts
1169                 elif len(parts) == 1:
1170                     name = parts[0]
1171                     option = None
1172                 else:
1173                     raise AssertionError
1174                 if option is not None:
1175                     option = DocOption(tag, option)
1176                 options.add(name, option)
1177                 opened = -1
1178
1179         return options