1 # -*- Mode: Python; py-indent-offset: 4 -*-
2 '''Simple module for extracting GNOME style doc comments from C
3 sources, so I can use them for other purposes.'''
5 import sys, os, string, re
7 # Used to tell if the "Since: ..." portion of the gtkdoc function description
8 # should be omitted. This is useful for some C++ modules such as gstreamermm
9 # that wrap C API which is still unstable and including this information would
11 # This variable is modified from docextract_to_xml based on the --no-since
12 # option being specified.
20 self.block_type = '' # The block type ('function', 'signal', 'property')
24 self.ret = ('', []) # (return, annotations)
25 def set_name(self, name):
27 def set_type(self, block_type):
28 self.block_type = block_type
30 return self.block_type
31 def add_param(self, name, description, annotations=[]):
34 self.params.append((name, description, annotations))
35 def append_to_last_param(self, extra):
36 self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra,
38 def append_to_named_param(self, name, extra):
39 for i in range(len(self.params)):
40 if self.params[i][0] == name:
41 self.params[i] = (name, self.params[i][1] + extra,
44 # fall through to adding extra parameter ...
45 self.add_param(name, extra)
46 def add_annotation(self, annotation):
47 self.annotations.append(annotation)
48 def get_annotations(self):
49 return self.annotations
50 def append_to_description(self, extra):
51 self.description = self.description + extra
52 def get_description(self):
53 return self.description
54 def add_return(self, first_line, annotations=[]):
55 self.ret = (first_line, annotations)
56 def append_to_return(self, extra):
57 self.ret = (self.ret[0] + extra, self.ret[1])
59 comment_start_pattern = re.compile(r'^\s*/\*\*\s')
60 comment_end_pattern = re.compile(r'^\s*\*+/')
61 comment_line_lead_pattern = re.compile(r'^\s*\*\s*')
62 comment_empty_line_pattern = re.compile(r'^\s*\**\s*$')
63 function_name_pattern = re.compile(r'^([a-z]\w*)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
64 signal_name_pattern = re.compile(r'^([A-Z]\w+::[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
65 property_name_pattern = re.compile(r'^([A-Z]\w+:[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
66 return_pattern = re.compile(r'^@?(returns:|return\s+value:)(.*\n?)$', re.IGNORECASE)
67 deprecated_pattern = re.compile(r'^(deprecated\s*:\s*.*\n?)$', re.IGNORECASE)
68 rename_to_pattern = re.compile(r'^(rename\s+to)\s*:\s*(.*\n?)$', re.IGNORECASE)
69 param_pattern = re.compile(r'^@(\S+)\s*:(.*\n?)$')
70 # Used to extract the annotations in the parameter and return descriptions
71 # extracted using above [param|return]_pattern patterns.
72 annotations_pattern = re.compile(r'^(?:(\s*\(.*\)\s*)*:)')
73 # Used to construct the annotation lists.
74 annotation_lead_pattern = re.compile(r'^\s*\(\s*(.*?)\s*\)\s*')
76 # These patterns determine the identifier of the current comment block. They
77 # are grouped in a list for easy determination of block identifiers (in
78 # skip_to_identifier). The function_name_pattern should be tested for last
79 # because it always matches signal and property identifiers.
80 identifier_patterns = [ signal_name_pattern, property_name_pattern, function_name_pattern ]
82 # This pattern is to match return sections that forget to have a colon (':')
83 # after the initial 'Return' phrase. It is not included by default in the list
84 # of final sections below because a lot of function descriptions begin with
85 # 'Returns ...' and the process_description() function would stop right at that
86 # first line, thinking it is a return section.
87 no_colon_return_pattern = re.compile(r'^@?(returns|return\s+value)\s*(.*\n?)$', re.IGNORECASE)
88 since_pattern = re.compile(r'^(since\s*:\s*.*\n?)$', re.IGNORECASE)
90 # These patterns normally will be encountered after the description. Knowing
91 # the order of their appearance is difficult so this list is used to test when
92 # one begins and the other ends when processing the rest of the sections after
94 final_section_patterns = [ return_pattern, since_pattern, deprecated_pattern, rename_to_pattern ]
96 def parse_file(fp, doc_dict):
100 line = skip_to_comment_block(fp, line)
101 line = skip_to_identifier(fp, line, cur_doc)
102 # See if the identifier is found (stored in the current GtkDoc by
103 # skip_to_identifier). If so, continue reading the rest of the comment
106 line = process_params(fp, line, cur_doc)
107 line = process_description(fp, line, cur_doc)
108 line = process_final_sections(fp, line, cur_doc)
109 # Add the current doc block to the dictionary of doc blocks.
110 doc_dict[cur_doc.name] = cur_doc
112 # Given a list of annotations as string of the form
113 # '(annotation1) (annotation2) ...' return a list of annotations of the form
114 # [ (name1, value1), (name2, value2) ... ]. Not all annotations have values so
115 # the values in the list of tuples could be empty ('').
116 def get_annotation_list(annotations):
119 match = annotation_lead_pattern.match(annotations)
121 annotation_contents = match.group(1)
122 name, split, value = annotation_contents.strip().partition(' ')
123 annotation_list.append((name, value))
124 # Remove first occurrence to continue processing.
125 annotations = annotation_lead_pattern.sub('', annotations)
128 return annotation_list
130 # Given a currently read line, test that line and continue reading until the
131 # beginning of a comment block is found or eof is reached. Return the last
133 def skip_to_comment_block(fp, line):
135 if comment_start_pattern.match(line):
140 # Given the current line in a comment block, continue skipping lines until a
141 # non-blank line in the comment block is found or until the end of the block
142 # (or eof) is reached. Returns the line where reading stopped.
143 def skip_to_nonblank(fp, line):
145 if not comment_empty_line_pattern.match(line):
148 # Stop processing if eof or end of comment block is reached.
149 if not line or comment_end_pattern.match(line):
153 # Given the first line of a comment block (the '/**'), see if the next
154 # non-blank line is the identifier of the comment block. Stop processing if
155 # the end of the block or eof is reached. Store the identifier (if there is
156 # one) and its type ('function', 'signal' or 'property') in the given GtkDoc.
157 # Return the line where the identifier is found or the line that stops the
158 # processing (if eof or the end of the comment block is found first).
159 def skip_to_identifier(fp, line, cur_doc):
160 # Skip the initial comment block line ('/**') if not eof.
161 if line: line = fp.readline()
163 # Now skip empty lines.
164 line = skip_to_nonblank(fp, line)
166 # See if the first non-blank line is the identifier.
167 if line and not comment_end_pattern.match(line):
168 # Remove the initial ' * ' in comment block line and see if there is an
170 line = comment_line_lead_pattern.sub('', line)
171 for pattern in identifier_patterns:
172 match = pattern.match(line)
174 # Set the GtkDoc name.
175 cur_doc.set_name(match.group(1))
176 # Get annotations and add them to the GtkDoc.
177 annotations = get_annotation_list(match.group(2))
178 for annotation in annotations:
179 cur_doc.add_annotation(annotation)
180 # Set the GtkDoc type.
181 if pattern == signal_name_pattern:
182 cur_doc.set_type('signal')
183 elif pattern == property_name_pattern:
184 cur_doc.set_type('property')
185 elif pattern == function_name_pattern:
186 cur_doc.set_type('function')
190 # Given a currently read line (presumably the identifier line), read the next
191 # lines, testing to see if the lines are part of parameter descriptions. If
192 # so, store the parameter descriptions in the given doc block. Stop on eof and
193 # return the last line that stops the processing.
194 def process_params(fp, line, cur_doc):
195 # Skip the identifier line if not eof. Also skip any blank lines in the
196 # comment block. Return if eof or the end of the comment block are
198 if line: line = fp.readline()
199 line = skip_to_nonblank(fp, line)
200 if not line or comment_end_pattern.match(line):
203 # Remove initial ' * ' in first non-empty comment block line.
204 line = comment_line_lead_pattern.sub('', line)
206 # Now process possible parameters as long as no eof or the end of the
207 # param section is not reached (which could be triggered by anything that
208 # doesn't match a '@param:..." line, even the end of the comment block).
209 match = param_pattern.match(line)
210 while line and match:
211 description = match.group(2)
213 # First extract the annotations from the description and save them.
215 annotation_match = annotations_pattern.match(description)
217 annotations = get_annotation_list(annotation_match.group(1))
218 # Remove the annotations from the description
219 description = annotations_pattern.sub('', description)
221 # Default to appending lines to current parameter.
222 append_func = cur_doc.append_to_last_param
224 # See if the return has been included as part of the parameter
225 # section and make sure that lines are added to the GtkDoc return if
227 if match.group(1).lower() == "returns":
228 cur_doc.add_return(description, annotations)
229 append_func = cur_doc.append_to_return
230 # If not, just add it as a regular parameter.
232 cur_doc.add_param(match.group(1), description, annotations)
234 # Now read lines and append them until next parameter, beginning of
235 # description (an empty line), the end of the comment block or eof.
238 # Stop processing if end of comment block or a blank comment line
240 if comment_empty_line_pattern.match(line) or \
241 comment_end_pattern.match(line):
244 # Remove initial ' * ' in comment block line.
245 line = comment_line_lead_pattern.sub('', line)
247 # Break from current param processing if a new one is
249 if param_pattern.match(line): break;
251 # Otherwise, just append the current line and get the next line.
255 # Re-evaluate match for while condition
256 match = param_pattern.match(line)
258 # End by returning the current line.
261 # Having processed parameters, read the following lines into the description of
262 # the current doc block until the end of the comment block, the end of file or
263 # a return section is encountered.
264 def process_description(fp, line, cur_doc):
265 # First skip empty lines returning on eof or end of comment block.
266 line = skip_to_nonblank(fp, line)
267 if not line or comment_end_pattern.match(line):
270 # Remove initial ' * ' in non-empty comment block line.
271 line = comment_line_lead_pattern.sub('', line)
273 # Also remove possible 'Description:' prefix.
274 if line[:12] == 'Description:': line = line[12:]
276 # Used to tell if the previous line was blank and a return section
277 # uncommonly marked with 'Returns ...' instead of 'Returns: ...' has
278 # started (assume it is non-empty to begin with).
279 prev_line = 'non-empty'
281 # Now read lines until a new section (like a return or a since section) is
284 # See if the description section has ended (if the line begins with
285 # 'Returns ...' and the previous line was empty -- this loop replaces
286 # empty lines with a newline).
287 if no_colon_return_pattern.match(line) and prev_line == '\n':
289 # Or if one of the patterns of the final sections match
290 for pattern in final_section_patterns:
291 if pattern.match(line):
294 # If not, append lines to description in the doc comment block.
295 cur_doc.append_to_description(line)
300 # Stop processing on eof or at the end of comment block.
301 if not line or comment_end_pattern.match(line):
304 # Remove initial ' * ' in line so that the text can be appended to the
305 # description of the comment block and make sure that if the line is
306 # empty it be interpreted as a newline.
307 line = comment_line_lead_pattern.sub('', line)
308 if not line: line = '\n'
310 # Given the line that ended the description (the first line of one of the final
311 # sections) process the final sections ('Returns:', 'Since:', etc.) until the
312 # end of the comment block or eof. Return the line that ends the processing.
313 def process_final_sections(fp, line, cur_doc):
314 while line and not comment_end_pattern.match(line):
315 # Remove leading ' * ' from current non-empty comment line.
316 line = comment_line_lead_pattern.sub('', line)
317 # Temporarily append the no colon return pattern to the final section
318 # patterns now that the description has been processed. It will be
319 # removed after the for loop below executes so that future descriptions
320 # that begin with 'Returns ...' are not interpreted as a return
322 final_section_patterns.append(no_colon_return_pattern)
323 for pattern in final_section_patterns:
324 match = pattern.match(line)
326 if pattern == return_pattern or \
327 pattern == no_colon_return_pattern:
328 # Dealing with a 'Returns:' so first extract the
329 # annotations from the description and save them.
330 description = match.group(2)
333 annotations_pattern.match(description)
336 get_annotation_list(annotation_match.group(1))
337 # Remove the annotations from the description
338 description = annotations_pattern.sub('', description)
340 # Now add the return.
341 cur_doc.add_return(description, annotations)
342 # In case more lines need to be appended.
343 append_func = cur_doc.append_to_return
344 elif pattern == rename_to_pattern:
345 # Dealing with a 'Rename to:' section (GObjectIntrospection
346 # annotation) so no further lines will be appended but this
347 # single one (and only to the annotations).
349 cur_doc.add_annotation((match.group(1),
352 # For all others ('Since:' and 'Deprecated:') just append
353 # the line to the description for now.
354 # But if --no-since is specified, don't append it.
355 if no_since and pattern == since_pattern:
358 cur_doc.append_to_description(line)
360 # In case more lines need to be appended.
361 append_func = cur_doc.append_to_description
363 # Stop final section pattern matching for loop since a match
364 # has already been found.
367 # Remove the no colon return pattern (which was temporarily added in
368 # the just executed loop) from the list of final section patterns.
369 final_section_patterns.pop()
373 # Now continue appending lines to current section until a new one is
374 # found or an eof or the end of the comment block is encountered.
376 while not finished and line and \
377 not comment_end_pattern.match(line):
378 # Remove leading ' * ' from line and make sure that if it is empty,
379 # it be interpreted as a newline.
380 line = comment_line_lead_pattern.sub('', line)
381 if not line: line = '\n'
383 for pattern in final_section_patterns:
384 if pattern.match(line):
388 # Break out of loop if a new section is found (determined in above
392 # Now it's safe to append line.
393 if append_func: append_func(line)
395 # Get the next line to continue processing.
400 def parse_dir(dir, doc_dict):
401 for file in os.listdir(dir):
402 if file in ('.', '..'): continue
403 path = os.path.join(dir, file)
404 if os.path.isdir(path):
405 parse_dir(path, doc_dict)
406 if len(file) > 2 and file[-2:] == '.c':
407 sys.stderr.write("Processing " + path + '\n')
408 parse_file(open(path, 'r'), doc_dict)
410 def extract(dirs, doc_dict=None):
411 if not doc_dict: doc_dict = {}
413 parse_dir(dir, doc_dict)
416 tmpl_section_pattern = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
417 def parse_tmpl(fp, doc_dict):
422 match = tmpl_section_pattern.match(line)
424 cur_doc = None # new input shouldn't affect the old doc dict
425 sect_type = match.group(1)
426 sect_name = match.group(2)
428 if sect_type == 'FUNCTION':
429 cur_doc = doc_dict.get(sect_name)
432 cur_doc.set_name(sect_name)
433 doc_dict[sect_name] = cur_doc
434 elif line == '<!-- # Unused Parameters # -->\n':
435 cur_doc = None # don't worry about unused params.
437 if line[:10] == '@Returns: ':
438 if string.strip(line[10:]):
439 cur_doc.append_to_return(line[10:])
441 pos = string.find(line, ':')
443 cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
445 cur_doc.append_to_description(line)
447 cur_doc.append_to_description(line)
451 def extract_tmpl(dirs, doc_dict=None):
452 if not doc_dict: doc_dict = {}
454 for file in os.listdir(dir):
455 if file in ('.', '..'): continue
456 path = os.path.join(dir, file)
457 if os.path.isdir(path):
459 if len(file) > 2 and file[-2:] == '.sgml':
460 parse_tmpl(open(path, 'r'), doc_dict)