2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008-2010 Johan Dahlin
4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 # AnnotationParser - extract annotations from gtk-doc comments
29 from .annotationpatterns import (COMMENT_START_RE, COMMENT_END_RE,
30 COMMENT_STAR_RE, EMPTY_LINE_RE,
31 SECTION_RE, SYMBOL_RE, PROPERTY_RE, SIGNAL_RE,
32 PARAMETER_RE, DESCRIPTION_TAG_RE, TAG_RE,
33 MULTILINE_ANNOTATION_CONTINUATION_RE)
34 from .odict import odict
37 # GTK-Doc comment block parts
38 PART_IDENTIFIER = 'identifier'
39 PART_PARAMETERS = 'parameters'
40 PART_DESCRIPTION = 'description'
44 IDENTIFIER_SECTION = 'section'
45 IDENTIFIER_SYMBOL = 'symbol'
46 IDENTIFIER_PROPERTY = 'property'
47 IDENTIFIER_SIGNAL = 'signal'
49 # Tags - annotations applied to comment blocks
52 TAG_STABILITY = 'stability'
53 TAG_DEPRECATED = 'deprecated'
54 TAG_RETURNS = 'returns'
55 TAG_RETURNVALUE = 'return value'
56 TAG_ATTRIBUTES = 'attributes'
57 TAG_RENAME_TO = 'rename to'
59 TAG_UNREF_FUNC = 'unref func'
60 TAG_REF_FUNC = 'ref func'
61 TAG_SET_VALUE_FUNC = 'set value func'
62 TAG_GET_VALUE_FUNC = 'get value func'
63 TAG_TRANSFER = 'transfer'
65 _ALL_TAGS = [TAG_VFUNC,
81 # Options - annotations for parameters and return values
82 OPT_ALLOW_NONE = 'allow-none'
84 OPT_ATTRIBUTE = 'attribute'
85 OPT_CLOSURE = 'closure'
86 OPT_DESTROY = 'destroy'
87 OPT_ELEMENT_TYPE = 'element-type'
88 OPT_FOREIGN = 'foreign'
91 OPT_INOUT_ALT = 'in-out'
94 OPT_TRANSFER = 'transfer'
97 OPT_CONSTRUCTOR = 'constructor'
119 # Array options - array specific annotations
120 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
121 OPT_ARRAY_LENGTH = 'length'
122 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
125 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
126 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
129 OPT_SCOPE_ASYNC = 'async'
130 OPT_SCOPE_CALL = 'call'
131 OPT_SCOPE_NOTIFIED = 'notified'
134 OPT_TRANSFER_NONE = 'none'
135 OPT_TRANSFER_CONTAINER = 'container'
136 OPT_TRANSFER_FULL = 'full'
137 OPT_TRANSFER_FLOATING = 'floating'
140 class DocBlock(object):
142 def __init__(self, name):
144 self.options = DocOptions()
148 self.params = odict()
151 def __cmp__(self, other):
152 return cmp(self.name, other.name)
155 return '<DocBlock %r %r>' % (self.name, self.options)
157 def set_position(self, position):
158 self.position = position
159 self.options.position = position
161 def get_tag(self, name):
162 return self.tags.get(name)
164 def get_param(self, name):
165 return self.params.get(name)
167 def to_gtk_doc(self):
171 options += ' '.join('(%s)' % o for o in self.options)
173 if 'SECTION' not in self.name:
176 for param in self.params.values():
177 lines.append(param.to_gtk_doc_param())
179 for l in self.comment.split('\n'):
183 for tag in self.tags.values():
184 lines.append(tag.to_gtk_doc_tag())
191 comment += ' * %s\n' % (line, )
198 for param in self.params.values():
201 for tag in self.tags.values():
205 class DocTag(object):
207 def __init__(self, block, name):
210 self.options = DocOptions()
216 return '<DocTag %r %r>' % (self.name, self.options)
218 def _validate_option(self, name, value, required=False,
219 n_params=None, choices=None):
220 if required and value is None:
221 message.warn('%s annotation needs a value' % (
222 name, ), self.position)
225 if n_params is not None:
231 s = '%d values' % (n_params, )
232 if ((n_params > 0 and (value is None or value.length() != n_params)) or
233 n_params == 0 and value is not None):
237 length = value.length()
238 message.warn('%s annotation needs %s, not %d' % (
239 name, s, length), self.position)
242 if choices is not None:
243 valuestr = value.one()
244 if valuestr not in choices:
245 message.warn('invalid %s annotation value: %r' % (
246 name, valuestr, ), self.position)
249 def set_position(self, position):
250 self.position = position
251 self.options.position = position
253 def _get_gtk_doc_value(self):
254 def serialize_one(option, value, fmt, fmt2):
256 if type(value) != str:
257 value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
258 for k, v in value.all().iteritems()))
259 return fmt % (option, value)
261 return fmt2 % (option, )
263 for option, value in self.options.iteritems():
265 serialize_one(option, value, '(%s %s)', '(%s)'))
267 return ' '.join(annotations) + ': '
271 def to_gtk_doc_param(self):
272 return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
274 def to_gtk_doc_tag(self):
275 return '%s: %s%s' % (self.name.capitalize(),
276 self._get_gtk_doc_value(),
280 if self.name == TAG_ATTRIBUTES:
281 # The 'Attributes:' tag allows free form annotations so the
282 # validation below is most certainly going to fail.
285 for option in self.options:
286 value = self.options[option]
287 if option == OPT_ALLOW_NONE:
288 self._validate_option('allow-none', value, n_params=0)
289 elif option == OPT_ARRAY:
292 for name, v in value.all().iteritems():
293 if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
296 except (TypeError, ValueError):
299 'array option %s needs a value' % (
301 positions=self.position)
304 'invalid array %s option value %r, '
305 'must be an integer' % (name, v, ),
306 positions=self.position)
308 elif name == OPT_ARRAY_LENGTH:
311 'array option length needs a value',
312 positions=self.position)
316 'invalid array annotation value: %r' % (
317 name, ), self.position)
319 elif option == OPT_ATTRIBUTE:
320 self._validate_option('attribute', value, n_params=2)
321 elif option == OPT_CLOSURE:
322 if value is not None and value.length() > 1:
324 'closure takes at maximium 1 value, %d given' % (
325 value.length()), self.position)
327 elif option == OPT_DESTROY:
328 self._validate_option('destroy', value, n_params=1)
329 elif option == OPT_ELEMENT_TYPE:
330 self._validate_option('element-type', value, required=True)
333 'element-type takes at least one value, none given',
336 if value.length() > 2:
338 'element-type takes at maximium 2 values, %d given' % (
339 value.length()), self.position)
341 elif option == OPT_FOREIGN:
342 self._validate_option('foreign', value, n_params=0)
343 elif option == OPT_IN:
344 self._validate_option('in', value, n_params=0)
345 elif option in [OPT_INOUT, OPT_INOUT_ALT]:
346 self._validate_option('inout', value, n_params=0)
347 elif option == OPT_OUT:
350 if value.length() > 1:
352 'out annotation takes at maximium 1 value, %d given' % (
353 value.length()), self.position)
355 value_str = value.one()
356 if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
357 OPT_OUT_CALLER_ALLOCATES]:
358 message.warn("out annotation value is invalid: %r" % (
359 value_str), self.position)
361 elif option == OPT_SCOPE:
362 self._validate_option(
363 'scope', value, required=True,
365 choices=[OPT_SCOPE_ASYNC,
368 elif option == OPT_SKIP:
369 self._validate_option('skip', value, n_params=0)
370 elif option == OPT_TRANSFER:
371 self._validate_option(
372 'transfer', value, required=True,
374 choices=[OPT_TRANSFER_FULL,
375 OPT_TRANSFER_CONTAINER,
377 OPT_TRANSFER_FLOATING])
378 elif option == OPT_TYPE:
379 self._validate_option('type', value, required=True,
381 elif option == OPT_CONSTRUCTOR:
382 self._validate_option('constructor', value, n_params=0)
383 elif option == OPT_METHOD:
384 self._validate_option('method', value, n_params=0)
386 message.warn('invalid annotation option: %s' % (option, ),
390 class DocOptions(object):
395 return '<DocOptions %r>' % (self.values, )
397 def __getitem__(self, item):
398 for key, value in self.values:
403 def __nonzero__(self):
404 return bool(self.values)
407 return (k for k, v in self.values)
409 def add(self, name, value):
410 self.values.append((name, value))
412 def get(self, item, default=None):
413 for key, value in self.values:
418 def getall(self, item):
419 for key, value in self.values:
424 return iter(self.values)
427 class DocOption(object):
429 def __init__(self, tag, option):
433 # (annotation option1=value1 option2=value2) etc
434 for p in option.split(' '):
436 name, value = p.split('=', 1)
440 self._dict[name] = value
442 self._array.append(name)
444 self._array.append((name, value))
447 return '<DocOption %r>' % (self._array, )
450 return len(self._array)
453 assert len(self._array) == 1
454 return self._array[0]
463 class AnnotationParser(object):
465 GTK-Doc comment block parser.
467 Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
468 :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
469 parser tries to accept malformed input whenever possible and does not emit
470 syntax errors. However, it does emit warnings at the slightest indication
471 of malformed input when possible.
473 A GTK-Doc comment block can be constructed out of multiple parts that can
474 be combined to write different types of documentation.
475 See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
476 Each part can be further divided into fields which are separated by `:` characters.
478 Possible parts and the fields they are constructed from look like the
479 following (optional fields are enclosed in square brackets):
483 * identifier_name: [annotations]
484 * @parameter_name: [annotations:] [description]
487 * tag_name: [annotations:] [description]
490 - Parts and fields cannot span multiple lines, except for parameter descriptions,
491 tag descriptions and comment block descriptions.
492 - There has to be exactly 1 `identifier` part on the first line of the
493 comment block which consists of:
494 * an `identifier_name` field
495 * an optional `annotations` field
496 - There can be 0 or more `parameter` parts following the `identifier` part,
498 * a `parameter_name` filed
499 * an optional `annotations` field
500 * an optional `description` field
501 - An empty lines signals the end of the `parameter` parts and the beginning
502 of the (free form) comment block `description` part.
503 - There can be 0 or 1 `description` parts following the `description` part.
504 - There can be 0 or more `tag` parts following the `description` part,
507 * an optional `annotations` field
508 * an optional `description` field
510 .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
511 `ScanSourceFile()`_ function and is currently in sync with gtk-doc
514 .. _types of documentation:
515 http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
516 .. _ScanSourceFile():
517 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
518 .. _b41641b: b41641bd75f870afff7561ceed8a08456da57565
521 def parse(self, comments):
523 Parses multiple GTK-Doc comment blocks.
525 :param comments: a list of (comment, filename, lineno) tuples
526 :returns: a list of :class:`DocBlock` or ``None`` objects
531 for comment in comments:
532 comment_block = self.parse_comment_block(comment)
534 if comment_block is not None:
535 if comment_block.name in comment_blocks:
536 message.warn("multiple comment blocks documenting '%s:' identifier." %
537 (comment_block.name),
538 comment_block.position)
540 # Always store the block even if it's a duplicate for
541 # backward compatibility...
542 comment_blocks[comment_block.name] = comment_block
544 return comment_blocks
546 def parse_comment_block(self, comment):
548 Parses a single GTK-Doc comment block.
550 :param comment: a (comment, filename, lineno) tuple
551 :returns: a :class:`DocBlock` object or ``None``
554 comment, filename, lineno = comment
555 comment_lines = list(enumerate(comment.split('\n')))
557 # Check for the start the comment block.
558 if COMMENT_START_RE.search(comment_lines[0][1]):
561 # Not a GTK-Doc comment block.
564 # Check for the end the comment block.
565 if COMMENT_END_RE.search(comment_lines[-1][1]):
566 del comment_lines[-1]
568 # If we get this far, we are inside a GTK-Doc comment block.
569 return self._parse_comment_block(comment_lines, filename, lineno)
571 def _parse_comment_block(self, comment_lines, filename, lineno):
573 Parses a single GTK-Doc comment block stripped from it's
574 comment start (/**) and comment end (*/) marker lines.
576 :param comment_lines: GTK-Doc comment block stripped from it's comment
577 start (/**) and comment end (*/) marker lines
578 :param filename: source file name where the comment block originated from
579 :param lineno: line in the source file where the comment block starts
580 :returns: a :class:`DocBlock` object or ``None``
582 .. NOTE:: If you are tempted to refactor this method and split it
583 further up (for example into _parse_identifier(), _parse_parameters(),
584 _parse_description(), _parse_tags() methods) then please resist the
585 urge. It is considered important that this method should be more or
586 less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
588 The different parsing steps are marked with a comment surrounded
589 by `#` characters in an attempt to make it clear what is going on.
591 .. _ScanSourceFile():
592 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
601 for line_offset, line in comment_lines:
602 position = message.Position(filename, line_offset + lineno)
604 result = COMMENT_STAR_RE.match(line)
606 # Store the original line (without \n) and column offset
607 # so we can generate meaningful warnings later on.
609 column_offset = result.end(0)
611 # Get rid of ' * ' at start of the line.
612 line = line[result.end(0):]
614 ####################################################################
615 # Check for GTK-Doc comment block identifier.
616 ####################################################################
617 if not comment_block:
618 # The correct identifier name would have the colon at the end
619 # but maintransformer.py does not expect us to do that. So
620 # make sure to compute an identifier_name without the colon and
621 # a real_identifier_name with the colon.
624 result = SECTION_RE.search(line)
626 identifier = IDENTIFIER_SECTION
627 real_identifier_name = 'SECTION:%s' % (result.group('section_name'))
628 identifier_name = real_identifier_name
629 column = result.start('section_name') + column_offset
632 result = SYMBOL_RE.search(line)
634 identifier = IDENTIFIER_SYMBOL
635 real_identifier_name = '%s:' % (result.group('symbol_name'))
636 identifier_name = '%s' % (result.group('symbol_name'))
637 column = result.start('symbol_name') + column_offset
640 result = PROPERTY_RE.search(line)
642 identifier = IDENTIFIER_PROPERTY
643 real_identifier_name = '%s:%s:' % (result.group('class_name'),
644 result.group('property_name'))
645 identifier_name = '%s:%s' % (result.group('class_name'),
646 result.group('property_name'))
647 column = result.start('property_name') + column_offset
650 result = SIGNAL_RE.search(line)
652 identifier = IDENTIFIER_SIGNAL
653 real_identifier_name = '%s::%s:' % (result.group('class_name'),
654 result.group('signal_name'))
655 identifier_name = '%s::%s' % (result.group('class_name'),
656 result.group('signal_name'))
657 column = result.start('signal_name') + column_offset
660 in_part = PART_IDENTIFIER
662 comment_block = DocBlock(identifier_name)
663 comment_block.set_position(position)
665 if 'annotations' in result.groupdict():
666 comment_block.options = self.parse_options(comment_block,
667 result.group('annotations'))
669 if 'colon' in result.groupdict() and result.group('colon') != ':':
670 colon_start = result.start('colon')
671 colon_column = column_offset + colon_start
672 marker = ' '*colon_column + '^'
673 message.warn("missing ':' at column %s:\n%s\n%s" %
674 (colon_start, original_line, marker),
678 # If we get here, the identifier was not recognized, so
679 # ignore the rest of the block just like the old annotation
680 # parser did. Doing this is a bit more strict than
681 # gtkdoc-mkdb (which continues to search for the identifier
682 # until either it is found or the end of the block is
683 # reached). In practice, however, ignoring the block is the
684 # right thing to do because sooner or later some long
685 # descriptions will contain something matching an identifier
686 # pattern by accident.
687 marker = ' '*column_offset + '^'
688 message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
689 'found:\n%s\n%s' % (original_line, marker),
694 ####################################################################
695 # Check for comment block parameters.
696 ####################################################################
697 result = PARAMETER_RE.search(line)
699 param_name = result.group('parameter_name')
700 param_annotations = result.group('annotations')
701 param_description = result.group('description')
703 if in_part == PART_IDENTIFIER:
704 in_part = PART_PARAMETERS
706 if in_part != PART_PARAMETERS:
707 column = result.start('parameter_name') + column_offset
708 marker = ' '*column + '^'
709 message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
710 (param_name, original_line, marker),
713 # Old style GTK-Doc allowed return values to be specified as
714 # parameters instead of tags.
715 if param_name.lower() == TAG_RETURNS:
716 param_name = TAG_RETURNS
721 message.warn("encountered multiple 'Returns' parameters or tags for "
722 "'%s'." % (comment_block.name),
724 elif param_name in comment_block.params.keys():
725 column = result.start('parameter_name') + column_offset
726 marker = ' '*column + '^'
727 message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
728 (param_name, comment_block.name, original_line, marker),
731 tag = DocTag(comment_block, param_name)
732 tag.set_position(position)
733 tag.comment = param_description
734 if param_annotations:
735 tag.options = self.parse_options(tag, param_annotations)
736 if param_name == TAG_RETURNS:
737 comment_block.tags[param_name] = tag
739 comment_block.params[param_name] = tag
743 ####################################################################
744 # Check for comment block description.
746 # When we are parsing comment block parameters or the comment block
747 # identifier (when there are no parameters) and encounter an empty
748 # line, we must be parsing the comment block description
749 ####################################################################
750 if (EMPTY_LINE_RE.search(line)
751 and (in_part == PART_IDENTIFIER or in_part == PART_PARAMETERS)):
752 in_part = PART_DESCRIPTION
755 ####################################################################
756 # Check for GTK-Doc comment block tags.
757 ####################################################################
758 result = TAG_RE.search(line)
760 tag_name = result.group('tag_name')
761 tag_annotations = result.group('annotations')
762 tag_description = result.group('description')
764 if in_part == PART_DESCRIPTION:
767 if in_part != PART_TAGS:
768 column = result.start('tag_name') + column_offset
769 marker = ' '*column + '^'
770 message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
771 (tag_name, original_line, marker),
774 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
778 message.warn("encountered multiple 'Returns' parameters or tags for "
779 "'%s'." % (comment_block.name),
782 tag = DocTag(comment_block, TAG_RETURNS)
783 tag.position = position
784 tag.comment = tag_description
786 tag.options = self.parse_options(tag, tag_annotations)
787 comment_block.tags[TAG_RETURNS] = tag
791 if tag_name.lower() in comment_block.tags.keys():
792 column = result.start('tag_name') + column_offset
793 marker = ' '*column + '^'
794 message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
795 (tag_name, comment_block.name, original_line, marker),
798 tag = DocTag(comment_block, tag_name.lower())
799 tag.position = position
800 tag.value = tag_description
802 if tag_name.lower() == TAG_ATTRIBUTES:
803 tag.options = self.parse_options(tag, tag_annotations)
805 message.warn("annotations not supported for tag '%s'." %
808 comment_block.tags[tag_name.lower()] = tag
812 ####################################################################
813 # If we get here, we must be in the middle of a multiline
814 # comment block, parameter or tag description.
815 ####################################################################
816 if in_part == PART_DESCRIPTION:
817 if not comment_block.comment:
818 # Backwards compatibility with old style GTK-Doc
819 # comment blocks where Description used to be a comment
820 # block tag. Simply get rid of 'Description:'.
821 line = re.sub(DESCRIPTION_TAG_RE, '', line)
822 comment_block.comment = line
824 comment_block.comment += '\n' + line
826 elif in_part == PART_PARAMETERS:
827 if not current_param:
828 message.warn('parameter expected:\n%s' %
832 self._validate_multiline_annotation_continuation(line, original_line,
833 column_offset, position)
835 # Append to parameter description.
836 current_param.comment += ' ' + line.strip()
837 elif in_part == PART_TAGS:
839 message.warn('tag expected:\n%s' %
843 self._validate_multiline_annotation_continuation(line, original_line,
844 column_offset, position)
846 # Append to tag description.
847 if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
848 current_tag.comment += ' ' + line.strip()
850 current_tag.value += ' ' + line.strip()
852 ########################################################################
853 # Finished parsing this comment block.
854 ########################################################################
855 # We have picked up a couple of \n characters that where not
856 # intended. Strip those.
857 if comment_block.comment:
858 comment_block.comment = comment_block.comment.strip()
860 comment_block.comment = ''
862 for tag in comment_block.tags.itervalues():
863 self._clean_comment_block_part(tag)
865 for param in comment_block.params.itervalues():
866 self._clean_comment_block_part(param)
868 # Validate and store block.
869 comment_block.validate()
872 def _clean_comment_block_part(self, part):
874 part.comment = part.comment.strip()
879 part.value = part.value.strip()
883 def _validate_multiline_annotation_continuation(self, line, original_line,
884 column_offset, position):
886 Validate parameters and tags (except the first line) and generate
887 warnings about invalid annotations spanning multiple lines.
889 :param line: line to validate, stripped from ' * ' at start of the line.
890 :param original_line: original line to validate (used in warning messages)
891 :param column_offset: column width of ' * ' at the time it was stripped from `line`
892 :param position: position of `line` in the source file
895 result = MULTILINE_ANNOTATION_CONTINUATION_RE.search(line)
897 line = result.group('description')
898 column = result.start('annotations') + column_offset
899 marker = ' '*column + '^'
900 message.warn('ignoring invalid multiline annotation continuation:\n'
901 '%s\n%s' % (original_line, marker),
905 def parse_options(cls, tag, value):
907 # (bar opt1 opt2 ...)
909 options = DocOptions()
910 options.position = tag.position
912 for i, c in enumerate(value):
913 if c == '(' and opened == -1:
915 if c == ')' and opened != -1:
916 segment = value[opened:i]
917 parts = segment.split(' ', 1)
920 elif len(parts) == 1:
925 if option is not None:
926 option = DocOption(tag, option)
927 options.add(name, option)