2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008-2010 Johan Dahlin
4 # Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 # AnnotationParser - extract annotations from GTK-Doc comment blocks
29 from .annotationpatterns import (COMMENT_START_RE, COMMENT_END_RE,
30 COMMENT_ASTERISK_RE, EMPTY_LINE_RE,
31 SECTION_RE, SYMBOL_RE, PROPERTY_RE, SIGNAL_RE,
32 PARAMETER_RE, DESCRIPTION_TAG_RE, TAG_RE,
33 MULTILINE_ANNOTATION_CONTINUATION_RE)
34 from .odict import odict
37 # GTK-Doc comment block parts
38 PART_IDENTIFIER = 'identifier'
39 PART_PARAMETERS = 'parameters'
40 PART_DESCRIPTION = 'description'
44 IDENTIFIER_SECTION = 'section'
45 IDENTIFIER_SYMBOL = 'symbol'
46 IDENTIFIER_PROPERTY = 'property'
47 IDENTIFIER_SIGNAL = 'signal'
49 # Tags - annotations applied to comment blocks
52 TAG_STABILITY = 'stability'
53 TAG_DEPRECATED = 'deprecated'
54 TAG_RETURNS = 'returns'
55 TAG_RETURNVALUE = 'return value'
56 TAG_ATTRIBUTES = 'attributes'
57 TAG_RENAME_TO = 'rename to'
59 TAG_UNREF_FUNC = 'unref func'
60 TAG_REF_FUNC = 'ref func'
61 TAG_SET_VALUE_FUNC = 'set value func'
62 TAG_GET_VALUE_FUNC = 'get value func'
63 TAG_TRANSFER = 'transfer'
65 _ALL_TAGS = [TAG_VFUNC,
81 # Options - annotations for parameters and return values
82 OPT_ALLOW_NONE = 'allow-none'
84 OPT_ATTRIBUTE = 'attribute'
85 OPT_CLOSURE = 'closure'
86 OPT_DESTROY = 'destroy'
87 OPT_ELEMENT_TYPE = 'element-type'
88 OPT_FOREIGN = 'foreign'
91 OPT_INOUT_ALT = 'in-out'
94 OPT_TRANSFER = 'transfer'
97 OPT_CONSTRUCTOR = 'constructor'
119 # Array options - array specific annotations
120 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
121 OPT_ARRAY_LENGTH = 'length'
122 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
125 OPT_OUT_CALLER_ALLOCATES = 'caller-allocates'
126 OPT_OUT_CALLEE_ALLOCATES = 'callee-allocates'
129 OPT_SCOPE_ASYNC = 'async'
130 OPT_SCOPE_CALL = 'call'
131 OPT_SCOPE_NOTIFIED = 'notified'
134 OPT_TRANSFER_NONE = 'none'
135 OPT_TRANSFER_CONTAINER = 'container'
136 OPT_TRANSFER_FULL = 'full'
137 OPT_TRANSFER_FLOATING = 'floating'
140 class DocBlock(object):
142 def __init__(self, name):
144 self.options = DocOptions()
148 self.params = odict()
151 def __cmp__(self, other):
152 return cmp(self.name, other.name)
155 return '<DocBlock %r %r>' % (self.name, self.options)
157 def get_tag(self, name):
158 return self.tags.get(name)
160 def get_param(self, name):
161 return self.params.get(name)
163 def to_gtk_doc(self):
167 options += ' '.join('(%s)' % o for o in self.options)
169 if 'SECTION' not in self.name:
172 for param in self.params.values():
173 lines.append(param.to_gtk_doc_param())
175 for l in self.comment.split('\n'):
179 for tag in self.tags.values():
180 lines.append(tag.to_gtk_doc_tag())
187 comment += ' * %s\n' % (line, )
194 for param in self.params.values():
197 for tag in self.tags.values():
201 class DocTag(object):
203 def __init__(self, block, name):
206 self.options = DocOptions()
212 return '<DocTag %r %r>' % (self.name, self.options)
214 def _validate_option(self, name, value, required=False,
215 n_params=None, choices=None):
216 if required and value is None:
217 message.warn('%s annotation needs a value' % (
218 name, ), self.position)
221 if n_params is not None:
227 s = '%d values' % (n_params, )
228 if ((n_params > 0 and (value is None or value.length() != n_params)) or
229 n_params == 0 and value is not None):
233 length = value.length()
234 message.warn('%s annotation needs %s, not %d' % (
235 name, s, length), self.position)
238 if choices is not None:
239 valuestr = value.one()
240 if valuestr not in choices:
241 message.warn('invalid %s annotation value: %r' % (
242 name, valuestr, ), self.position)
245 def _validate_array(self, option, value):
249 for name, v in value.all().items():
250 if name in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]:
253 except (TypeError, ValueError):
256 'array option %s needs a value' % (
258 positions=self.position)
261 'invalid array %s option value %r, '
262 'must be an integer' % (name, v, ),
263 positions=self.position)
264 elif name == OPT_ARRAY_LENGTH:
267 'array option length needs a value',
268 positions=self.position)
271 'invalid array annotation value: %r' % (
272 name, ), self.position)
274 def _validate_closure(self, option, value):
275 if value is not None and value.length() > 1:
277 'closure takes at most 1 value, %d given' % (
278 value.length()), self.position)
280 def _validate_element_type(self, option, value):
281 self._validate_option(option, value, required=True)
284 'element-type takes at least one value, none given',
287 if value.length() > 2:
289 'element-type takes at most 2 values, %d given' % (
290 value.length()), self.position)
293 def _validate_out(self, option, value):
296 if value.length() > 1:
298 'out annotation takes at most 1 value, %d given' % (
299 value.length()), self.position)
301 value_str = value.one()
302 if value_str not in [OPT_OUT_CALLEE_ALLOCATES,
303 OPT_OUT_CALLER_ALLOCATES]:
304 message.warn("out annotation value is invalid: %r" % (
305 value_str), self.position)
308 def _get_gtk_doc_value(self):
309 def serialize_one(option, value, fmt, fmt2):
311 if type(value) != str:
312 value = ' '.join((serialize_one(k, v, '%s=%s', '%s')
313 for k, v in value.all().items()))
314 return fmt % (option, value)
316 return fmt2 % (option, )
318 for option, value in self.options.items():
320 serialize_one(option, value, '(%s %s)', '(%s)'))
322 return ' '.join(annotations) + ': '
326 def to_gtk_doc_param(self):
327 return '@%s: %s%s' % (self.name, self._get_gtk_doc_value(), self.comment)
329 def to_gtk_doc_tag(self):
330 return '%s: %s%s' % (self.name.capitalize(),
331 self._get_gtk_doc_value(),
335 if self.name == TAG_ATTRIBUTES:
336 # The 'Attributes:' tag allows free form annotations so the
337 # validation below is most certainly going to fail.
340 for option, value in self.options.items():
341 if option == OPT_ALLOW_NONE:
342 self._validate_option(option, value, n_params=0)
343 elif option == OPT_ARRAY:
344 self._validate_array(option, value)
345 elif option == OPT_ATTRIBUTE:
346 self._validate_option(option, value, n_params=2)
347 elif option == OPT_CLOSURE:
348 self._validate_closure(option, value)
349 elif option == OPT_DESTROY:
350 self._validate_option(option, value, n_params=1)
351 elif option == OPT_ELEMENT_TYPE:
352 self._validate_element_type(option, value)
353 elif option == OPT_FOREIGN:
354 self._validate_option(option, value, n_params=0)
355 elif option == OPT_IN:
356 self._validate_option(option, value, n_params=0)
357 elif option in [OPT_INOUT, OPT_INOUT_ALT]:
358 self._validate_option(option, value, n_params=0)
359 elif option == OPT_OUT:
360 self._validate_out(option, value)
361 elif option == OPT_SCOPE:
362 self._validate_option(
363 option, value, required=True,
365 choices=[OPT_SCOPE_ASYNC,
368 elif option == OPT_SKIP:
369 self._validate_option(option, value, n_params=0)
370 elif option == OPT_TRANSFER:
371 self._validate_option(
372 option, value, required=True,
374 choices=[OPT_TRANSFER_FULL,
375 OPT_TRANSFER_CONTAINER,
377 OPT_TRANSFER_FLOATING])
378 elif option == OPT_TYPE:
379 self._validate_option(option, value, required=True,
381 elif option == OPT_CONSTRUCTOR:
382 self._validate_option(option, value, n_params=0)
383 elif option == OPT_METHOD:
384 self._validate_option(option, value, n_params=0)
386 message.warn('invalid annotation option: %s' % (option, ),
390 class DocOptions(object):
396 return '<DocOptions %r>' % (self.values, )
398 def __getitem__(self, item):
399 for key, value in self.values:
404 def __nonzero__(self):
405 return bool(self.values)
408 return (k for k, v in self.values)
410 def add(self, name, value):
411 self.values.append((name, value))
413 def get(self, item, default=None):
414 for key, value in self.values:
419 def getall(self, item):
420 for key, value in self.values:
425 return iter(self.values)
428 class DocOption(object):
430 def __init__(self, tag, option):
434 # (annotation option1=value1 option2=value2) etc
435 for p in option.split(' '):
437 name, value = p.split('=', 1)
441 self._dict[name] = value
443 self._array.append(name)
445 self._array.append((name, value))
448 return '<DocOption %r>' % (self._array, )
451 return len(self._array)
454 assert len(self._array) == 1
455 return self._array[0]
464 class AnnotationParser(object):
466 GTK-Doc comment block parser.
468 Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
469 :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
470 parser tries to accept malformed input whenever possible and does not emit
471 syntax errors. However, it does emit warnings at the slightest indication
472 of malformed input when possible. It is usually a good idea to heed these
473 warnings as malformed input is known to result in invalid GTK-Doc output.
475 A GTK-Doc comment block can be constructed out of multiple parts that can
476 be combined to write different types of documentation.
477 See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
478 Each part can be further divided into fields which are separated by `:` characters.
480 Possible parts and the fields they are constructed from look like the
481 following (optional fields are enclosed in square brackets):
485 * identifier_name [:annotations]
486 * @parameter_name [:annotations] [:description]
488 * comment_block_description
489 * tag_name [:annotations] [:description]
492 The order in which the different parts have to be specified is important::
494 - There has to be exactly 1 `identifier` part on the first line of the
495 comment block which consists of:
496 * an `identifier_name` field
497 * an optional `annotations` field
498 - Followed by 0 or more `parameters` parts, each consisting of:
499 * a `parameter_name` field
500 * an optional `annotations` field
501 * an optional `description` field
502 - Followed by at least 1 empty line signaling the beginning of
503 the `comment_block_description` part
504 - Followed by an optional `comment block description` part.
505 - Followed by 0 or more `tag` parts, each consisting of:
507 * an optional `annotations` field
508 * an optional `description` field
510 Additionally, the following restrictions are in effect::
512 - Parts can optionally be separated by an empty line, except between
513 the `parameter` parts and the `comment block description` part where
514 an empty line is required (see above).
515 - Parts and fields cannot span multiple lines, except for
516 `parameter descriptions`, `tag descriptions` and the
517 `comment_block_description` fields.
518 - `parameter descriptions` fields can not span multiple paragraphs.
519 - `tag descriptions` and `comment block description` fields can
520 span multiple paragraphs.
522 .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
523 `ScanSourceFile()`_ function and is currently in sync with GTK-Doc
526 .. _GTK-Doc's documentation:
527 http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
528 .. _ScanSourceFile():
529 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
530 .. _b41641b: b41641bd75f870afff7561ceed8a08456da57565
533 def parse(self, comments):
535 Parses multiple GTK-Doc comment blocks.
537 :param comments: a list of (comment, filename, lineno) tuples
538 :returns: a dictionary mapping identifier names to :class:`DocBlock` objects
543 for comment in comments:
544 comment_block = self.parse_comment_block(comment)
546 if comment_block is not None:
547 # Note: previous versions of this parser did not check
548 # if an identifier was already stored in comment_blocks,
549 # so when multiple comment blocks where encountered documenting
550 # the same identifier the last one seen "wins".
551 # Keep this behavior for backwards compatibility, but
553 if comment_block.name in comment_blocks:
554 message.warn("multiple comment blocks documenting '%s:' identifier." %
555 (comment_block.name),
556 comment_block.position)
558 comment_blocks[comment_block.name] = comment_block
560 return comment_blocks
562 def parse_comment_block(self, comment):
564 Parses a single GTK-Doc comment block.
566 :param comment: a (comment, filename, lineno) tuple
567 :returns: a :class:`DocBlock` object or ``None``
570 comment, filename, lineno = comment
572 # Assign line numbers to each line of the comment block,
573 # which will later be used as the offset to calculate the
574 # real line number in the source file
575 comment_lines = list(enumerate(comment.split('\n')))
577 # Check for the start the comment block.
578 if COMMENT_START_RE.match(comment_lines[0][1]):
581 # Not a GTK-Doc comment block.
584 # Check for the end the comment block.
585 if COMMENT_END_RE.match(comment_lines[-1][1]):
586 del comment_lines[-1]
588 # If we get this far, we are inside a GTK-Doc comment block.
589 return self._parse_comment_block(comment_lines, filename, lineno)
591 def _parse_comment_block(self, comment_lines, filename, lineno):
593 Parses a single GTK-Doc comment block already stripped from its
594 comment start (/**) and comment end (*/) marker lines.
596 :param comment_lines: list of (line_offset, line) tuples representing a
597 GTK-Doc comment block already stripped from it's
598 start (/**) and end (*/) marker lines
599 :param filename: source file name where the comment block originated from
600 :param lineno: line in the source file where the comment block starts
601 :returns: a :class:`DocBlock` object or ``None``
603 .. NOTE:: If you are tempted to refactor this method and split it
604 further up (for example into _parse_identifier(), _parse_parameters(),
605 _parse_description(), _parse_tags() methods) then please resist the
606 urge. It is considered important that this method should be more or
607 less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
609 The different parsing steps are marked with a comment surrounded
610 by `#` characters in an attempt to make it clear what is going on.
612 .. _ScanSourceFile():
613 http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
622 for line_offset, line in comment_lines:
623 position = message.Position(filename, line_offset + lineno)
625 # Store the original line (without \n) and column offset
626 # so we can generate meaningful warnings later on.
630 # Get rid of ' * ' at start of the line.
631 result = COMMENT_ASTERISK_RE.match(line)
633 column_offset = result.end(0)
634 line = line[result.end(0):]
636 ####################################################################
637 # Check for GTK-Doc comment block identifier.
638 ####################################################################
639 if not comment_block:
641 result = SECTION_RE.match(line)
643 identifier = IDENTIFIER_SECTION
644 identifier_name = 'SECTION:%s' % (result.group('section_name'))
645 column = result.start('section_name') + column_offset
648 result = SYMBOL_RE.match(line)
650 identifier = IDENTIFIER_SYMBOL
651 identifier_name = '%s' % (result.group('symbol_name'))
652 column = result.start('symbol_name') + column_offset
655 result = PROPERTY_RE.match(line)
657 identifier = IDENTIFIER_PROPERTY
658 identifier_name = '%s:%s' % (result.group('class_name'),
659 result.group('property_name'))
660 column = result.start('property_name') + column_offset
663 result = SIGNAL_RE.match(line)
665 identifier = IDENTIFIER_SIGNAL
666 identifier_name = '%s::%s' % (result.group('class_name'),
667 result.group('signal_name'))
668 column = result.start('signal_name') + column_offset
671 in_part = PART_IDENTIFIER
673 comment_block = DocBlock(identifier_name)
674 comment_block.position = position
676 if 'colon' in result.groupdict() and result.group('colon') != ':':
677 colon_start = result.start('colon')
678 colon_column = column_offset + colon_start
679 marker = ' '*colon_column + '^'
680 message.warn("missing ':' at column %s:\n%s\n%s" %
681 (colon_column + 1, original_line, marker),
684 if 'annotations' in result.groupdict():
685 comment_block.options = self.parse_options(comment_block,
686 result.group('annotations'))
690 # If we get here, the identifier was not recognized, so
691 # ignore the rest of the block just like the old annotation
692 # parser did. Doing this is a bit more strict than
693 # gtkdoc-mkdb (which continues to search for the identifier
694 # until either it is found or the end of the block is
695 # reached). In practice, however, ignoring the block is the
696 # right thing to do because sooner or later some long
697 # descriptions will contain something matching an identifier
698 # pattern by accident.
699 marker = ' '*column_offset + '^'
700 message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
701 'found:\n%s\n%s' % (original_line, marker),
706 ####################################################################
707 # Check for comment block parameters.
708 ####################################################################
709 result = PARAMETER_RE.match(line)
711 param_name = result.group('parameter_name')
712 param_annotations = result.group('annotations')
713 param_description = result.group('description')
715 if in_part == PART_IDENTIFIER:
716 in_part = PART_PARAMETERS
718 if in_part != PART_PARAMETERS:
719 column = result.start('parameter_name') + column_offset
720 marker = ' '*column + '^'
721 message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
722 (param_name, original_line, marker),
725 # Old style GTK-Doc allowed return values to be specified as
726 # parameters instead of tags.
727 if param_name.lower() == TAG_RETURNS:
728 param_name = TAG_RETURNS
733 message.warn("encountered multiple 'Returns' parameters or tags for "
734 "'%s'." % (comment_block.name),
736 elif param_name in comment_block.params.keys():
737 column = result.start('parameter_name') + column_offset
738 marker = ' '*column + '^'
739 message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
740 (param_name, comment_block.name, original_line, marker),
743 tag = DocTag(comment_block, param_name)
744 tag.position = position
745 tag.comment = param_description
746 if param_annotations:
747 tag.options = self.parse_options(tag, param_annotations)
748 if param_name == TAG_RETURNS:
749 comment_block.tags[param_name] = tag
751 comment_block.params[param_name] = tag
755 ####################################################################
756 # Check for comment block description.
758 # When we are parsing comment block parameters or the comment block
759 # identifier (when there are no parameters) and encounter an empty
760 # line, we must be parsing the comment block description.
761 ####################################################################
762 if (EMPTY_LINE_RE.match(line)
763 and in_part in [PART_IDENTIFIER, PART_PARAMETERS]):
764 in_part = PART_DESCRIPTION
767 ####################################################################
768 # Check for GTK-Doc comment block tags.
769 ####################################################################
770 result = TAG_RE.match(line)
772 tag_name = result.group('tag_name')
773 tag_annotations = result.group('annotations')
774 tag_description = result.group('description')
776 if in_part == PART_DESCRIPTION:
779 if in_part != PART_TAGS:
780 column = result.start('tag_name') + column_offset
781 marker = ' '*column + '^'
782 message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
783 (tag_name, original_line, marker),
786 if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
790 message.warn("encountered multiple 'Returns' parameters or tags for "
791 "'%s'." % (comment_block.name),
794 tag = DocTag(comment_block, TAG_RETURNS)
795 tag.position = position
796 tag.comment = tag_description
798 tag.options = self.parse_options(tag, tag_annotations)
799 comment_block.tags[TAG_RETURNS] = tag
803 if tag_name.lower() in comment_block.tags.keys():
804 column = result.start('tag_name') + column_offset
805 marker = ' '*column + '^'
806 message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
807 (tag_name, comment_block.name, original_line, marker),
810 tag = DocTag(comment_block, tag_name.lower())
811 tag.position = position
812 tag.value = tag_description
814 if tag_name.lower() == TAG_ATTRIBUTES:
815 tag.options = self.parse_options(tag, tag_annotations)
817 message.warn("annotations not supported for tag '%s:'." %
820 comment_block.tags[tag_name.lower()] = tag
824 ####################################################################
825 # If we get here, we must be in the middle of a multiline
826 # comment block, parameter or tag description.
827 ####################################################################
828 if in_part in [PART_IDENTIFIER, PART_DESCRIPTION]:
829 if not comment_block.comment:
830 # Backwards compatibility with old style GTK-Doc
831 # comment blocks where Description used to be a comment
832 # block tag. Simply get rid of 'Description:'.
833 line = re.sub(DESCRIPTION_TAG_RE, '', line)
834 comment_block.comment = line
836 comment_block.comment += '\n' + line
838 elif in_part == PART_PARAMETERS:
839 self._validate_multiline_annotation_continuation(line, original_line,
840 column_offset, position)
842 # Append to parameter description.
843 current_param.comment += ' ' + line.strip()
844 elif in_part == PART_TAGS:
845 self._validate_multiline_annotation_continuation(line, original_line,
846 column_offset, position)
848 # Append to tag description.
849 if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
850 current_tag.comment += ' ' + line.strip()
852 current_tag.value += ' ' + line.strip()
854 ########################################################################
855 # Finished parsing this comment block.
856 ########################################################################
857 # We have picked up a couple of \n characters that where not
858 # intended. Strip those.
859 if comment_block.comment:
860 comment_block.comment = comment_block.comment.strip()
862 comment_block.comment = ''
864 for tag in comment_block.tags.values():
865 self._clean_comment_block_part(tag)
867 for param in comment_block.params.values():
868 self._clean_comment_block_part(param)
870 # Validate and store block.
871 comment_block.validate()
874 def _clean_comment_block_part(self, part):
876 part.comment = part.comment.strip()
881 part.value = part.value.strip()
885 def _validate_multiline_annotation_continuation(self, line, original_line,
886 column_offset, position):
888 Validate parameters and tags (except the first line) and generate
889 warnings about invalid annotations spanning multiple lines.
891 :param line: line to validate, stripped from ' * ' at start of the line.
892 :param original_line: original line to validate (used in warning messages)
893 :param column_offset: column width of ' * ' at the time it was stripped from `line`
894 :param position: position of `line` in the source file
897 result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line)
899 column = result.start('annotations') + column_offset
900 marker = ' '*column + '^'
901 message.warn('ignoring invalid multiline annotation continuation:\n'
902 '%s\n%s' % (original_line, marker),
906 def parse_options(cls, tag, value):
908 # (annotation opt1 opt2 ...)
909 # (annotation opt1=value1 opt2=value2 ...)
911 options = DocOptions()
912 options.position = tag.position
914 for i, c in enumerate(value):
915 if c == '(' and opened == -1:
917 if c == ')' and opened != -1:
918 segment = value[opened:i]
919 parts = segment.split(' ', 1)
922 elif len(parts) == 1:
927 if option is not None:
928 option = DocOption(tag, option)
929 options.add(name, option)