Split parameter and tag storage in annotationparser parse tree
[platform/upstream/gobject-introspection.git] / giscanner / annotationparser.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008-2010 Johan Dahlin
4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
5 #
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20 #
21
22
23 # AnnotationParser - extract annotations from gtk-doc comments
24
25
26 import re
27
28 from . import message
29 from .annotationpatterns import (COMMENT_START_RE, COMMENT_END_RE,
30                                  COMMENT_STAR_RE, EMPTY_LINE_RE,
31                                  SECTION_RE, SYMBOL_RE, PROPERTY_RE, SIGNAL_RE,
32                                  PARAMETER_RE, DESCRIPTION_TAG_RE, TAG_RE,
33                                  MULTILINE_ANNOTATION_CONTINUATION_RE)
34 from .odict import odict
35
36
37 # GTK-Doc comment block parts
38 PART_IDENTIFIER = 'identifier'
39 PART_PARAMETERS = 'parameters'
40 PART_DESCRIPTION = 'description'
41 PART_TAGS = 'tags'
42
43 # Identifiers
44 IDENTIFIER_SECTION = 'section'
45 IDENTIFIER_SYMBOL = 'symbol'
46 IDENTIFIER_PROPERTY = 'property'
47 IDENTIFIER_SIGNAL = 'signal'
48
49 # Tags - annotations applied to comment blocks
50 TAG_VFUNC = 'virtual'
51 TAG_SINCE = 'since'
52 TAG_STABILITY = 'stability'
53 TAG_DEPRECATED = 'deprecated'
54 TAG_RETURNS = 'returns'
55 TAG_RETURNVALUE = 'return value'
56 TAG_ATTRIBUTES = 'attributes'
57 TAG_RENAME_TO = 'rename to'
58 TAG_TYPE = 'type'
59 TAG_UNREF_FUNC = 'unref func'
60 TAG_REF_FUNC = 'ref func'
61 TAG_SET_VALUE_FUNC = 'set value func'
62 TAG_GET_VALUE_FUNC = 'get value func'
63 TAG_TRANSFER = 'transfer'
64 TAG_VALUE = 'value'
65 _ALL_TAGS = [TAG_VFUNC,
66              TAG_SINCE,
67              TAG_STABILITY,
68              TAG_DEPRECATED,
69              TAG_RETURNS,
70              TAG_RETURNVALUE,
71              TAG_ATTRIBUTES,
72              TAG_RENAME_TO,
73              TAG_TYPE,
74              TAG_UNREF_FUNC,
75              TAG_REF_FUNC,
76              TAG_SET_VALUE_FUNC,
77              TAG_GET_VALUE_FUNC,
78              TAG_TRANSFER,
79              TAG_VALUE]
80
81 # Options - annotations for parameters and return values
82 OPT_ALLOW_NONE = 'allow-none'
83 OPT_ARRAY = 'array'
84 OPT_ATTRIBUTE = 'attribute'
85 OPT_CLOSURE = 'closure'
86 OPT_DESTROY = 'destroy'
87 OPT_ELEMENT_TYPE = 'element-type'
88 OPT_FOREIGN = 'foreign'
89 OPT_IN = 'in'
90 OPT_INOUT = 'inout'
91 OPT_INOUT_ALT = 'in-out'
92 OPT_OUT = 'out'
93 OPT_SCOPE = 'scope'
94 OPT_TRANSFER = 'transfer'
95 OPT_TYPE = 'type'
96 OPT_SKIP = 'skip'
97 OPT_CONSTRUCTOR = 'constructor'
98 OPT_METHOD = 'method'
99
100 ALL_OPTIONS = [
101     OPT_ALLOW_NONE,
102     OPT_ARRAY,
103     OPT_ATTRIBUTE,
104     OPT_CLOSURE,
105     OPT_DESTROY,
106     OPT_ELEMENT_TYPE,
107     OPT_FOREIGN,
108     OPT_IN,
109     OPT_INOUT,
110     OPT_INOUT_ALT,
111     OPT_OUT,
112     OPT_SCOPE,
113     OPT_TRANSFER,
114     OPT_TYPE,
115     OPT_SKIP,
116     OPT_CONSTRUCTOR,
117     OPT_METHOD]
118
119 # Array options - array specific annotations
120 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
121 OPT_ARRAY_LENGTH = 'length'
122 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
123
124 # Out options
125 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
126 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
127
128 # Scope options
129 OPT_SCOPE_ASYNC = 'async'
130 OPT_SCOPE_CALL = 'call'
131 OPT_SCOPE_NOTIFIED = 'notified'
132
133 # Transfer options
134 OPT_TRANSFER_NONE = 'none'
135 OPT_TRANSFER_CONTAINER = 'container'
136 OPT_TRANSFER_FULL = 'full'
137 OPT_TRANSFER_FLOATING = 'floating'
138
139
140 class DocBlock(object):
141
142     def __init__(self, name):
143         self.name = name
144         self.options = DocOptions()
145         self.value = None
146         self.tags = odict()
147         self.comment = None
148         self.params = odict()
149         self.position = None
150
151     def __cmp__(self, other):
152         return cmp(self.name, other.name)
153
154     def __repr__(self):
155         return '<DocBlock %r %r>' % (self.name, self.options)
156
157     def set_position(self, position):
158         self.position = position
159         self.options.position = position
160
161     def get_tag(self, name):
162         return self.tags.get(name)
163
164     def get_param(self, name):
165         return self.params.get(name)
166
167     def to_gtk_doc(self):
168         options = ''
169         if self.options:
170             options += ' '
171             options += ' '.join('(%s)' % o for o in self.options)
172         lines = [self.name]
173         if 'SECTION' not in self.name:
174             lines[0] += ':'
175         lines[0] += options
176         for param in self.params.values():
177             lines.append(param.to_gtk_doc_param())
178         lines.append('')
179         for l in self.comment.split('\n'):
180             lines.append(l)
181         if self.tags:
182             lines.append('')
183             for tag in self.tags.values():
184                 lines.append(tag.to_gtk_doc_tag())
185
186         comment = ''
187         comment += '/**\n'
188         for line in lines:
189             line = line.rstrip()
190             if line:
191                 comment += ' * %s\n' % (line, )
192             else:
193                 comment += ' *\n'
194         comment += ' */\n'
195         return comment
196
197     def validate(self):
198         for param in self.params.values():
199             param.validate()
200
201         for tag in self.tags.values():
202             tag.validate()
203
204
205 class DocTag(object):
206
207     def __init__(self, block, name):
208         self.block = block
209         self.name = name
210         self.options = DocOptions()
211         self.comment = None
212         self.value = ''
213         self.position = None
214
215     def __repr__(self):
216         return '<DocTag %r %r>' % (self.name, self.options)
217
218     def _validate_option(self, name, value, required=False,
219                          n_params=None, choices=None):
220         if required and value is None:
221             message.warn('%s annotation needs a value' % (
222                 name, ), self.position)
223             return
224
225         if n_params is not None:
226             if n_params == 0:
227                 s = 'no value'
228             elif n_params == 1:
229                 s = 'one value'
230             else:
231                 s = '%d values' % (n_params, )
232             if ((n_params > 0 and (value is None or value.length() != n_params)) or
233                 n_params == 0 and value is not None):
234                 if value is None:
235                     length = 0
236                 else:
237                     length = value.length()
238                 message.warn('%s annotation needs %s, not %d' % (
239                     name, s, length), self.position)
240                 return
241
242         if choices is not None:
243             valuestr = value.one()
244             if valuestr not in choices:
245                 message.warn('invalid %s annotation value: %r' % (
246                     name, valuestr, ), self.position)
247                 return
248
249     def set_position(self, position):
250         self.position = position
251         self.options.position = position
252
253     def _get_gtk_doc_value(self):
254         def serialize_one(option, value, fmt, fmt2):
255             if value:
256                 if type(value) != str:
257                     value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
258                                       for k, v in value.all().iteritems()))
259                 return fmt % (option, value)
260             else:
261                 return fmt2 % (option, )
262         annotations = []
263         for option, value in self.options.iteritems():
264             annotations.append(
265                 serialize_one(option, value, '(%s %s)', '(%s)'))
266         if annotations:
267             return ' '.join(annotations) + ': '
268         else:
269             return self.value
270
271     def to_gtk_doc_param(self):
272         return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
273
274     def to_gtk_doc_tag(self):
275         return '%s: %s%s' % (self.name.capitalize(),
276                              self._get_gtk_doc_value(),
277                              self.comment or '')
278
279     def validate(self):
280         if self.name == TAG_ATTRIBUTES:
281             # The 'Attributes:' tag allows free form annotations so the
282             # validation below is most certainly going to fail.
283             return
284
285         for option in self.options:
286             value = self.options[option]
287             if option == OPT_ALLOW_NONE:
288                 self._validate_option('allow-none', value, n_params=0)
289             elif option == OPT_ARRAY:
290                 if value is None:
291                     continue
292                 for name, v in value.all().iteritems():
293                     if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
294                         try:
295                             int(v)
296                         except (TypeError, ValueError):
297                             if v is None:
298                                 message.warn(
299                                     'array option %s needs a value' % (
300                                     name, ),
301                                     positions=self.position)
302                             else:
303                                 message.warn(
304                                     'invalid array %s option value %r, '
305                                     'must be an integer' % (name, v, ),
306                                     positions=self.position)
307                             continue
308                     elif name == OPT_ARRAY_LENGTH:
309                         if v is None:
310                             message.warn(
311                                 'array option length needs a value',
312                                 positions=self.position)
313                             continue
314                     else:
315                         message.warn(
316                             'invalid array annotation value: %r' % (
317                             name, ), self.position)
318
319             elif option == OPT_ATTRIBUTE:
320                 self._validate_option('attribute', value, n_params=2)
321             elif option == OPT_CLOSURE:
322                 if value is not None and value.length() > 1:
323                     message.warn(
324                         'closure takes at maximium 1 value, %d given' % (
325                         value.length()), self.position)
326                     continue
327             elif option == OPT_DESTROY:
328                 self._validate_option('destroy', value, n_params=1)
329             elif option == OPT_ELEMENT_TYPE:
330                 self._validate_option('element-type', value, required=True)
331                 if value is None:
332                     message.warn(
333                         'element-type takes at least one value, none given',
334                         self.position)
335                     continue
336                 if value.length() > 2:
337                     message.warn(
338                         'element-type takes at maximium 2 values, %d given' % (
339                         value.length()), self.position)
340                     continue
341             elif option == OPT_FOREIGN:
342                 self._validate_option('foreign', value, n_params=0)
343             elif option == OPT_IN:
344                 self._validate_option('in', value, n_params=0)
345             elif option in [OPT_INOUT, OPT_INOUT_ALT]:
346                 self._validate_option('inout', value, n_params=0)
347             elif option == OPT_OUT:
348                 if value is None:
349                     continue
350                 if value.length() > 1:
351                     message.warn(
352                         'out annotation takes at maximium 1 value, %d given' % (
353                         value.length()), self.position)
354                     continue
355                 value_str = value.one()
356                 if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
357                                      OPT_OUT_CALLER_ALLOCATES]:
358                     message.warn("out annotation value is invalid: %r" % (
359                         value_str), self.position)
360                     continue
361             elif option == OPT_SCOPE:
362                 self._validate_option(
363                     'scope', value, required=True,
364                     n_params=1,
365                     choices=[OPT_SCOPE_ASYNC,
366                              OPT_SCOPE_CALL,
367                              OPT_SCOPE_NOTIFIED])
368             elif option == OPT_SKIP:
369                 self._validate_option('skip', value, n_params=0)
370             elif option == OPT_TRANSFER:
371                 self._validate_option(
372                     'transfer', value, required=True,
373                     n_params=1,
374                     choices=[OPT_TRANSFER_FULL,
375                              OPT_TRANSFER_CONTAINER,
376                              OPT_TRANSFER_NONE,
377                              OPT_TRANSFER_FLOATING])
378             elif option == OPT_TYPE:
379                 self._validate_option('type', value, required=True,
380                                       n_params=1)
381             elif option == OPT_CONSTRUCTOR:
382                 self._validate_option('constructor', value, n_params=0)
383             elif option == OPT_METHOD:
384                 self._validate_option('method', value, n_params=0)
385             else:
386                 message.warn('invalid annotation option: %s' % (option, ),
387                              self.position)
388
389
390 class DocOptions(object):
391     def __init__(self):
392         self.values = []
393
394     def __repr__(self):
395         return '<DocOptions %r>' % (self.values, )
396
397     def __getitem__(self, item):
398         for key, value in self.values:
399             if key == item:
400                 return value
401         raise KeyError
402
403     def __nonzero__(self):
404         return bool(self.values)
405
406     def __iter__(self):
407         return (k for k, v in self.values)
408
409     def add(self, name, value):
410         self.values.append((name, value))
411
412     def get(self, item, default=None):
413         for key, value in self.values:
414             if key == item:
415                 return value
416         return default
417
418     def getall(self, item):
419         for key, value in self.values:
420             if key == item:
421                 yield value
422
423     def iteritems(self):
424         return iter(self.values)
425
426
427 class DocOption(object):
428
429     def __init__(self, tag, option):
430         self.tag = tag
431         self._array = []
432         self._dict = {}
433         # (annotation option1=value1 option2=value2) etc
434         for p in option.split(' '):
435             if '=' in p:
436                 name, value = p.split('=', 1)
437             else:
438                 name = p
439                 value = None
440             self._dict[name] = value
441             if value is None:
442                 self._array.append(name)
443             else:
444                 self._array.append((name, value))
445
446     def __repr__(self):
447         return '<DocOption %r>' % (self._array, )
448
449     def length(self):
450         return len(self._array)
451
452     def one(self):
453         assert len(self._array) == 1
454         return self._array[0]
455
456     def flat(self):
457         return self._array
458
459     def all(self):
460         return self._dict
461
462
463 class AnnotationParser(object):
464     """
465     GTK-Doc comment block parser.
466
467     Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
468     :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
469     parser tries to accept malformed input whenever possible and does not emit
470     syntax errors. However, it does emit warnings at the slightest indication
471     of malformed input when possible.
472
473     A GTK-Doc comment block can be constructed out of multiple parts that can
474     be combined to write different types of documentation.
475     See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
476     Each part can be further divided into fields which are separated by `:` characters.
477
478     Possible parts and the fields they are constructed from look like the
479     following (optional fields are enclosed in square brackets):
480
481     .. code-block:: c
482         /**
483          * identifier_name: [annotations]
484          * @parameter_name: [annotations:] [description]
485          *
486          * description
487          * tag_name: [annotations:] [description]
488          */
489
490     - Parts and fields cannot span multiple lines, except for parameter descriptions,
491       tag descriptions and comment block descriptions.
492     - There has to be exactly 1 `identifier` part on the first line of the
493       comment block which consists of:
494           * an `identifier_name` field
495           * an optional `annotations` field
496     - There can be 0 or more `parameter` parts following the `identifier` part,
497       each consisting of:
498           * a `parameter_name` filed
499           * an optional `annotations` field
500           * an optional `description` field
501     - An empty lines signals the end of the `parameter` parts and the beginning
502       of the (free form) comment block `description` part.
503     - There can be 0 or 1 `description` parts following the `description` part.
504     - There can be 0 or more `tag` parts following the `description` part,
505       each consisting of:
506           * a `tag_name` field
507           * an optional `annotations` field
508           * an optional `description` field
509
510     .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
511         `ScanSourceFile()`_ function and is currently in sync with gtk-doc
512         commit `b41641b`_.
513
514     .. _types of documentation:
515             http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
516     .. _ScanSourceFile():
517             http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
518     .. _b41641b: b41641bd75f870afff7561ceed8a08456da57565
519     """
520
521     def parse(self, comments):
522         """
523         Parses multiple GTK-Doc comment blocks.
524
525         :param comments: a list of (comment, filename, lineno) tuples
526         :returns: a list of :class:`DocBlock` or ``None`` objects
527         """
528
529         comment_blocks = {}
530
531         for comment in comments:
532             comment_block = self.parse_comment_block(comment)
533
534             if comment_block is not None:
535                 if comment_block.name in comment_blocks:
536                     message.warn("multiple comment blocks documenting '%s:' identifier." %
537                                  (comment_block.name),
538                                  comment_block.position)
539
540                 # Always store the block even if it's a duplicate for
541                 # backward compatibility...
542                 comment_blocks[comment_block.name] = comment_block
543
544         return comment_blocks
545
546     def parse_comment_block(self, comment):
547         """
548         Parses a single GTK-Doc comment block.
549
550         :param comment: a (comment, filename, lineno) tuple
551         :returns: a :class:`DocBlock` object or ``None``
552         """
553
554         comment, filename, lineno = comment
555         comment_lines = list(enumerate(comment.split('\n')))
556
557         # Check for the start the comment block.
558         if COMMENT_START_RE.search(comment_lines[0][1]):
559             del comment_lines[0]
560         else:
561             # Not a GTK-Doc comment block.
562             return None
563
564         # Check for the end the comment block.
565         if COMMENT_END_RE.search(comment_lines[-1][1]):
566             del comment_lines[-1]
567
568         # If we get this far, we are inside a GTK-Doc comment block.
569         return self._parse_comment_block(comment_lines, filename, lineno)
570
571     def _parse_comment_block(self, comment_lines, filename, lineno):
572         """
573         Parses a single GTK-Doc comment block stripped from it's
574         comment start (/**) and comment end (*/) marker lines.
575
576         :param comment_lines: GTK-Doc comment block stripped from it's comment
577                               start (/**) and comment end (*/) marker lines
578         :param filename: source file name where the comment block originated from
579         :param lineno:  line in the source file where the comment block starts
580         :returns: a :class:`DocBlock` object or ``None``
581
582         .. NOTE:: If you are tempted to refactor this method and split it
583             further up (for example into _parse_identifier(), _parse_parameters(),
584             _parse_description(), _parse_tags() methods) then please resist the
585             urge. It is considered important that this method should be more or
586             less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
587
588             The different parsing steps are marked with a comment surrounded
589             by `#` characters in an attempt to make it clear what is going on.
590
591         .. _ScanSourceFile():
592                 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
593         """
594         comment_block = None
595         in_part = None
596         identifier = None
597         current_param = None
598         current_tag = None
599         returns_seen = False
600
601         for line_offset, line in comment_lines:
602             position = message.Position(filename, line_offset + lineno)
603
604             result = COMMENT_STAR_RE.match(line)
605             if result:
606                 # Store the original line (without \n) and column offset
607                 # so we can generate meaningful warnings later on.
608                 original_line = line
609                 column_offset = result.end(0)
610
611                 # Get rid of ' * ' at start of the line.
612                 line = line[result.end(0):]
613
614             ####################################################################
615             # Check for GTK-Doc comment block identifier.
616             ####################################################################
617             if not comment_block:
618                 # The correct identifier name would have the colon at the end
619                 # but maintransformer.py does not expect us to do that. So
620                 # make sure to compute an identifier_name without the colon and
621                 # a real_identifier_name with the colon.
622
623                 if not identifier:
624                     result = SECTION_RE.search(line)
625                     if result:
626                         identifier = IDENTIFIER_SECTION
627                         real_identifier_name = 'SECTION:%s' % (result.group('section_name'))
628                         identifier_name = real_identifier_name
629                         column = result.start('section_name') + column_offset
630
631                 if not identifier:
632                     result = SYMBOL_RE.search(line)
633                     if result:
634                         identifier = IDENTIFIER_SYMBOL
635                         real_identifier_name = '%s:' % (result.group('symbol_name'))
636                         identifier_name = '%s' % (result.group('symbol_name'))
637                         column = result.start('symbol_name') + column_offset
638
639                 if not identifier:
640                     result = PROPERTY_RE.search(line)
641                     if result:
642                         identifier = IDENTIFIER_PROPERTY
643                         real_identifier_name = '%s:%s:' % (result.group('class_name'),
644                                                            result.group('property_name'))
645                         identifier_name = '%s:%s' % (result.group('class_name'),
646                                                      result.group('property_name'))
647                         column = result.start('property_name') + column_offset
648
649                 if not identifier:
650                     result = SIGNAL_RE.search(line)
651                     if result:
652                         identifier = IDENTIFIER_SIGNAL
653                         real_identifier_name = '%s::%s:' % (result.group('class_name'),
654                                                             result.group('signal_name'))
655                         identifier_name = '%s::%s' % (result.group('class_name'),
656                                                       result.group('signal_name'))
657                         column = result.start('signal_name') + column_offset
658
659                 if identifier:
660                     in_part = PART_IDENTIFIER
661
662                     comment_block = DocBlock(identifier_name)
663                     comment_block.set_position(position)
664
665                     if 'annotations' in result.groupdict():
666                         comment_block.options = self.parse_options(comment_block,
667                                                                    result.group('annotations'))
668
669                     if 'colon' in result.groupdict() and result.group('colon') != ':':
670                         colon_start = result.start('colon')
671                         colon_column = column_offset + colon_start
672                         marker = ' '*colon_column + '^'
673                         message.warn("missing ':' at column %s:\n%s\n%s" %
674                                      (colon_start, original_line, marker),
675                                      position)
676                     continue
677                 else:
678                     # If we get here, the identifier was not recognized, so
679                     # ignore the rest of the block just like the old annotation
680                     # parser did. Doing this is a bit more strict than
681                     # gtkdoc-mkdb (which continues to search for the identifier
682                     # until either it is found or the end of the block is
683                     # reached). In practice, however, ignoring the block is the
684                     # right thing to do because sooner or later some long
685                     # descriptions will contain something matching an identifier
686                     # pattern by accident.
687                     marker = ' '*column_offset + '^'
688                     message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
689                                  'found:\n%s\n%s' % (original_line, marker),
690                                  position)
691
692                     return None
693
694             ####################################################################
695             # Check for comment block parameters.
696             ####################################################################
697             result = PARAMETER_RE.search(line)
698             if result:
699                 param_name = result.group('parameter_name')
700                 param_annotations = result.group('annotations')
701                 param_description = result.group('description')
702
703                 if in_part == PART_IDENTIFIER:
704                     in_part = PART_PARAMETERS
705
706                 if in_part != PART_PARAMETERS:
707                     column = result.start('parameter_name') + column_offset
708                     marker = ' '*column + '^'
709                     message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
710                                  (param_name, original_line, marker),
711                                  position)
712
713                 # Old style GTK-Doc allowed return values to be specified as
714                 # parameters instead of tags.
715                 if param_name.lower() == TAG_RETURNS:
716                     param_name = TAG_RETURNS
717
718                     if not returns_seen:
719                         returns_seen = True
720                     else:
721                         message.warn("encountered multiple 'Returns' parameters or tags for "
722                                      "'%s'." % (comment_block.name),
723                                      position)
724                 elif param_name in comment_block.params.keys():
725                     column = result.start('parameter_name') + column_offset
726                     marker = ' '*column + '^'
727                     message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
728                                  (param_name, comment_block.name, original_line, marker),
729                                  position)
730
731                 tag = DocTag(comment_block, param_name)
732                 tag.set_position(position)
733                 tag.comment = param_description
734                 if param_annotations:
735                     tag.options = self.parse_options(tag, param_annotations)
736                 if param_name == TAG_RETURNS:
737                     comment_block.tags[param_name] = tag
738                 else:
739                     comment_block.params[param_name] = tag
740                 current_param = tag
741                 continue
742
743             ####################################################################
744             # Check for comment block description.
745             #
746             # When we are parsing comment block parameters or the comment block
747             # identifier (when there are no parameters) and encounter an empty
748             # line, we must be parsing the comment block description
749             ####################################################################
750             if (EMPTY_LINE_RE.search(line)
751             and (in_part == PART_IDENTIFIER or in_part == PART_PARAMETERS)):
752                 in_part = PART_DESCRIPTION
753                 continue
754
755             ####################################################################
756             # Check for GTK-Doc comment block tags.
757             ####################################################################
758             result = TAG_RE.search(line)
759             if result:
760                 tag_name = result.group('tag_name')
761                 tag_annotations = result.group('annotations')
762                 tag_description = result.group('description')
763
764                 if in_part == PART_DESCRIPTION:
765                     in_part = PART_TAGS
766
767                 if in_part != PART_TAGS:
768                     column = result.start('tag_name') + column_offset
769                     marker = ' '*column + '^'
770                     message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
771                                  (tag_name, original_line, marker),
772                                  position)
773
774                 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
775                     if not returns_seen:
776                         returns_seen = True
777                     else:
778                         message.warn("encountered multiple 'Returns' parameters or tags for "
779                                      "'%s'." % (comment_block.name),
780                                      position)
781
782                     tag = DocTag(comment_block, TAG_RETURNS)
783                     tag.position = position
784                     tag.comment = tag_description
785                     if tag_annotations:
786                         tag.options = self.parse_options(tag, tag_annotations)
787                     comment_block.tags[TAG_RETURNS] = tag
788                     current_tag = tag
789                     continue
790                 else:
791                     if tag_name.lower() in comment_block.tags.keys():
792                         column = result.start('tag_name') + column_offset
793                         marker = ' '*column + '^'
794                         message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
795                                      (tag_name, comment_block.name, original_line, marker),
796                                      position)
797
798                     tag = DocTag(comment_block, tag_name.lower())
799                     tag.position = position
800                     tag.value = tag_description
801                     if tag_annotations:
802                         if tag_name.lower() == TAG_ATTRIBUTES:
803                             tag.options = self.parse_options(tag, tag_annotations)
804                         else:
805                             message.warn("annotations not supported for tag '%s'." %
806                                          (tag_name),
807                                          position)
808                     comment_block.tags[tag_name.lower()] = tag
809                     current_tag = tag
810                     continue
811
812             ####################################################################
813             # If we get here, we must be in the middle of a multiline
814             # comment block, parameter or tag description.
815             ####################################################################
816             if in_part == PART_DESCRIPTION:
817                 if not comment_block.comment:
818                     # Backwards compatibility with old style GTK-Doc
819                     # comment blocks where Description used to be a comment
820                     # block tag. Simply get rid of 'Description:'.
821                     line = re.sub(DESCRIPTION_TAG_RE, '', line)
822                     comment_block.comment = line
823                 else:
824                     comment_block.comment += '\n' + line
825                 continue
826             elif in_part == PART_PARAMETERS:
827                 if not current_param:
828                     message.warn('parameter expected:\n%s' %
829                                  (line),
830                                  position)
831                 else:
832                     self._validate_multiline_annotation_continuation(line, original_line,
833                                                                      column_offset, position)
834
835                     # Append to parameter description.
836                     current_param.comment += ' ' + line.strip()
837             elif in_part == PART_TAGS:
838                 if not current_tag:
839                     message.warn('tag expected:\n%s' %
840                                  (line),
841                                  position)
842                 else:
843                     self._validate_multiline_annotation_continuation(line, original_line,
844                                                                      column_offset, position)
845
846                     # Append to tag description.
847                     if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
848                         current_tag.comment += ' ' + line.strip()
849                     else:
850                         current_tag.value += ' ' + line.strip()
851
852         ########################################################################
853         # Finished parsing this comment block.
854         ########################################################################
855         # We have picked up a couple of \n characters that where not
856         # intended. Strip those.
857         if comment_block.comment:
858             comment_block.comment = comment_block.comment.strip()
859         else:
860             comment_block.comment = ''
861
862         for tag in comment_block.tags.itervalues():
863             self._clean_comment_block_part(tag)
864
865         for param in comment_block.params.itervalues():
866             self._clean_comment_block_part(param)
867
868         # Validate and store block.
869         comment_block.validate()
870         return comment_block
871
872     def _clean_comment_block_part(self, part):
873         if part.comment:
874             part.comment = part.comment.strip()
875         else:
876             part.comment = None
877
878         if part.value:
879             part.value = part.value.strip()
880         else:
881             part.value = ''
882
883     def _validate_multiline_annotation_continuation(self, line, original_line,
884                                                           column_offset, position):
885         '''
886         Validate parameters and tags (except the first line) and generate
887         warnings about invalid annotations spanning multiple lines.
888
889         :param line: line to validate, stripped from ' * ' at start of the line.
890         :param original_line: original line to validate (used in warning messages)
891         :param column_offset: column width of ' * ' at the time it was stripped from `line`
892         :param position: position of `line` in the source file
893         '''
894
895         result = MULTILINE_ANNOTATION_CONTINUATION_RE.search(line)
896         if result:
897             line = result.group('description')
898             column = result.start('annotations') + column_offset
899             marker = ' '*column + '^'
900             message.warn('ignoring invalid multiline annotation continuation:\n'
901                          '%s\n%s' % (original_line, marker),
902                          position)
903
904     @classmethod
905     def parse_options(cls, tag, value):
906         # (foo)
907         # (bar opt1 opt2 ...)
908         opened = -1
909         options = DocOptions()
910         options.position = tag.position
911
912         for i, c in enumerate(value):
913             if c == '(' and opened == -1:
914                 opened = i+1
915             if c == ')' and opened != -1:
916                 segment = value[opened:i]
917                 parts = segment.split(' ', 1)
918                 if len(parts) == 2:
919                     name, option = parts
920                 elif len(parts) == 1:
921                     name = parts[0]
922                     option = None
923                 else:
924                     raise AssertionError
925                 if option is not None:
926                     option = DocOption(tag, option)
927                 options.add(name, option)
928                 opened = -1
929
930         return options