codegen/docextract_to_xml.py

   1 #!/usr/bin/env python
   2 # -*- Mode: Python; py-indent-offset: 4 -*-
   3 #
   4 # This litte script outputs the C doc comments to an XML format.
   5 # So far it's only used by gtkmm (The C++ bindings). Murray Cumming.
   6 # Usage example:
   7 # # ./docextract_to_xml.py -s /gnome/head/cvs/gtk+/gtk/ -s /gnome/head/cvs/gtk+/docs/reference/gtk/tmpl/ > gtk_docs.xml
   8
   9 import getopt
  10 import re
  11 import string
  12 import sys
  13
  14 import docextract
  15
  16 def usage():
  17     sys.stderr.write('usage: docextract_to_xml.py ' +
  18         '[-s /src/dir | --source-dir=/src/dir] ' +
  19         '[-a | --with-annotations] [-p | --with-properties] ' +
  20         '[-i | --with-signals ]\n')
  21     sys.exit(1)
  22
  23 # Translates special texts to &... HTML acceptable format.  Also replace
  24 # occurrences of '/*' and '*/' with '/ *' and '* /' respectively to avoid
  25 # comment errors (note the spaces).  Some function descriptions include C++
  26 # multi-line comments which cause errors when the description is included in a
  27 # C++ Doxygen comment block.
  28 def escape_text(unescaped_text):
  29     # Escape every "&" not part of an entity reference
  30     escaped_text = re.sub(r'&(?![A-Za-z]+;)', '&amp;', unescaped_text)
  31
  32     # These weird entities turn up in the output...
  33     escaped_text = string.replace(escaped_text, '&mdash;', '&#8212;')
  34     escaped_text = string.replace(escaped_text, '&ast;', '*')
  35     escaped_text = string.replace(escaped_text, '&percnt;', '%')
  36     escaped_text = string.replace(escaped_text, '&commat;', '@')
  37     escaped_text = string.replace(escaped_text, '&num;', '&#35;')
  38     escaped_text = string.replace(escaped_text, '&nbsp;', '&#160;')
  39     # This represents a '/' before or after an '*' so replace with slash but
  40     # with spaces.
  41     escaped_text = string.replace(escaped_text, '&sol;', ' / ')
  42
  43     # Escape for both tag contents and attribute values
  44     escaped_text = string.replace(escaped_text, '<', '&lt;')
  45     escaped_text = string.replace(escaped_text, '>', '&gt;')
  46     escaped_text = string.replace(escaped_text, '"', '&quot;')
  47
  48     # Replace C++ comment begin and ends to ones that don't affect Doxygen.
  49     escaped_text = string.replace(escaped_text, '/*', '/ *')
  50     escaped_text = string.replace(escaped_text, '*/', '* /')
  51
  52     return escaped_text
  53
  54 def print_annotations(annotations):
  55     for annotation in annotations:
  56         print "<annotation name=" + annotation[0] +  ">" + \
  57                 escape_text(annotation[1]) + "</annotation>"
  58
  59 if __name__ == '__main__':
  60     try:
  61         opts, args = getopt.getopt(sys.argv[1:], "d:s:o:api",
  62                                    ["source-dir=", "with-annotations",
  63                                      "with-properties", "with-signals"])
  64     except getopt.error, e:
  65         sys.stderr.write('docextract_to_xml.py: %s\n' % e)
  66         usage()
  67     source_dirs = []
  68     with_annotations = False
  69     with_signals = False
  70     with_properties = False
  71     for opt, arg in opts:
  72         if opt in ('-s', '--source-dir'):
  73             source_dirs.append(arg)
  74         if opt in ('-a', '--with-annotations'):
  75             with_annotations = True
  76         if opt in ('-p', '--with-properties'):
  77             with_properties = True
  78         if opt in ('-i', '--with-signals'):
  79             with_signals = True
  80     if len(args) != 0:
  81         usage()
  82
  83     docs = docextract.extract(source_dirs);
  84     docextract.extract_tmpl(source_dirs, docs); #Try the tmpl sgml files too.
  85
  86     # print d.docs
  87
  88     if docs:
  89
  90         print "<root>"
  91
  92         for name, value in sorted(docs.items()):
  93             # Get the type of comment block ('function', 'signal' or
  94             # 'property') (the value is a GtkDoc).
  95             block_type = value.get_type()
  96
  97             # Skip signals if the option was not specified.
  98             if block_type == 'signal' and not with_signals:
  99                 continue
 100             # Likewise for properties.
 101             elif block_type == 'property' and not with_properties:
 102                 continue
 103
 104             print "<" + block_type + " name=\"" + escape_text(name) + "\">"
 105
 106             print "<description>"
 107             print escape_text(value.get_description())
 108             print "</description>"
 109
 110             # Loop through the parameters if not dealing with a property:
 111             if block_type != 'property':
 112                 print "<parameters>"
 113                 for name, description, annotations in value.params:
 114                         print "<parameter name=\"" + escape_text(name) + "\">"
 115                         print "<parameter_description>" + escape_text(description) + "</parameter_description>"
 116
 117                         if with_annotations:
 118                             print_annotations(annotations)
 119
 120                         print "</parameter>"
 121
 122                 print "</parameters>"
 123
 124                 # Show the return-type (also if not dealing with a property):
 125                 if with_annotations:
 126                     print "<return>"
 127                     print "<return_description>" + escape_text(value.ret[0]) + \
 128                             "</return_description>"
 129                     print_annotations(value.ret[1])
 130                     print "</return>"
 131                 else:
 132                     print "<return>" + escape_text(value.ret[0]) + "</return>"
 133
 134             if with_annotations:
 135                 print_annotations(value.get_annotations())
 136
 137             print "</" + block_type + ">\n"
 138
 139         print "</root>"