1 # -*- Mode: Python; py-indent-offset: 4 -*-
2 '''Simple module for extracting GNOME style doc comments from C
3 sources, so I can use them for other purposes.'''
5 import sys, os, string, re
12 self.block_type = '' # The block type ('function', 'signal', 'property')
16 self.ret = ('', []) # (return, annotations)
17 def set_name(self, name):
19 def set_type(self, block_type):
20 self.block_type = block_type
22 return self.block_type
23 def add_param(self, name, description, annotations=[]):
26 self.params.append((name, description, annotations))
27 def append_to_last_param(self, extra):
28 self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra,
30 def append_to_named_param(self, name, extra):
31 for i in range(len(self.params)):
32 if self.params[i][0] == name:
33 self.params[i] = (name, self.params[i][1] + extra,
36 # fall through to adding extra parameter ...
37 self.add_param(name, extra)
38 def add_annotation(self, annotation):
39 self.annotations.append(annotation)
40 def get_annotations(self):
41 return self.annotations
42 def append_to_description(self, extra):
43 self.description = self.description + extra
44 def get_description(self):
45 return self.description
46 def add_return(self, first_line, annotations=[]):
47 self.ret = (first_line, annotations)
48 def append_to_return(self, extra):
49 self.ret = (self.ret[0] + extra, self.ret[1])
51 comment_start_pattern = re.compile(r'^\s*/\*\*\s')
52 comment_end_pattern = re.compile(r'^\s*\*+/')
53 comment_line_lead_pattern = re.compile(r'^\s*\*\s*')
54 comment_empty_line_pattern = re.compile(r'^\s*\**\s*$')
55 function_name_pattern = re.compile(r'^([a-z]\w*)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
56 signal_name_pattern = re.compile(r'^([A-Z]\w+::[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
57 property_name_pattern = re.compile(r'^([A-Z]\w+:[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
58 return_pattern = re.compile(r'^@?(returns:|return\s+value:)(.*\n?)$', re.IGNORECASE)
59 deprecated_pattern = re.compile(r'^(deprecated\s*:\s*.*\n?)$', re.IGNORECASE)
60 rename_to_pattern = re.compile(r'^(rename\s+to)\s*:\s*(.*\n?)$', re.IGNORECASE)
61 param_pattern = re.compile(r'^@(\S+)\s*:(.*\n?)$')
62 # Used to extract the annotations in the parameter and return descriptions
63 # extracted using above [param|return]_pattern patterns.
64 annotations_pattern = re.compile(r'^(?:(\s*\(.*\)\s*)*:)')
65 # Used to construct the annotation lists.
66 annotation_lead_pattern = re.compile(r'^\s*\(\s*(.*?)\s*\)\s*')
68 # These patterns determine the identifier of the current comment block. They
69 # are grouped in a list for easy determination of block identifiers (in
70 # skip_to_identifier). The function_name_pattern should be tested for last
71 # because it always matches signal and property identifiers.
72 identifier_patterns = [ signal_name_pattern, property_name_pattern, function_name_pattern ]
74 # This pattern is to match return sections that forget to have a colon (':')
75 # after the initial 'Return' phrase. It is not included by default in the list
76 # of final sections below because a lot of function descriptions begin with
77 # 'Returns ...' and the process_description() function would stop right at that
78 # first line, thinking it is a return section.
79 no_colon_return_pattern = re.compile(r'^@?(returns|return\s+value)\s*(.*\n?)$', re.IGNORECASE)
80 since_pattern = re.compile(r'^(since\s*:\s*.*\n?)$', re.IGNORECASE)
82 # These patterns normally will be encountered after the description. Knowing
83 # the order of their appearance is difficult so this list is used to test when
84 # one begins and the other ends when processing the rest of the sections after
86 final_section_patterns = [ return_pattern, since_pattern, deprecated_pattern, rename_to_pattern ]
88 def parse_file(fp, doc_dict):
92 line = skip_to_comment_block(fp, line)
93 line = skip_to_identifier(fp, line, cur_doc)
94 # See if the identifier is found (stored in the current GtkDoc by
95 # skip_to_identifier). If so, continue reading the rest of the comment
98 line = process_params(fp, line, cur_doc)
99 line = process_description(fp, line, cur_doc)
100 line = process_final_sections(fp, line, cur_doc)
101 # Add the current doc block to the dictionary of doc blocks.
102 doc_dict[cur_doc.name] = cur_doc
104 # Given a list of annotations as string of the form
105 # '(annotation1) (annotation2) ...' return a list of annotations of the form
106 # [ (name1, value1), (name2, value2) ... ]. Not all annotations have values so
107 # the values in the list of tuples could be empty ('').
108 def get_annotation_list(annotations):
111 match = annotation_lead_pattern.match(annotations)
113 annotation_contents = match.group(1)
114 name, split, value = annotation_contents.strip().partition(' ')
115 annotation_list.append((name, value))
116 # Remove first occurrence to continue processing.
117 annotations = annotation_lead_pattern.sub('', annotations)
120 return annotation_list
122 # Given a currently read line, test that line and continue reading until the
123 # beginning of a comment block is found or eof is reached. Return the last
125 def skip_to_comment_block(fp, line):
127 if comment_start_pattern.match(line):
132 # Given the current line in a comment block, continue skipping lines until a
133 # non-blank line in the comment block is found or until the end of the block
134 # (or eof) is reached. Returns the line where reading stopped.
135 def skip_to_nonblank(fp, line):
137 if not comment_empty_line_pattern.match(line):
140 # Stop processing if eof or end of comment block is reached.
141 if not line or comment_end_pattern.match(line):
145 # Given the first line of a comment block (the '/**'), see if the next
146 # non-blank line is the identifier of the comment block. Stop processing if
147 # the end of the block or eof is reached. Store the identifier (if there is
148 # one) and its type ('function', 'signal' or 'property') in the given GtkDoc.
149 # Return the line where the identifier is found or the line that stops the
150 # processing (if eof or the end of the comment block is found first).
151 def skip_to_identifier(fp, line, cur_doc):
152 # Skip the initial comment block line ('/**') if not eof.
153 if line: line = fp.readline()
155 # Now skip empty lines.
156 line = skip_to_nonblank(fp, line)
158 # See if the first non-blank line is the identifier.
159 if line and not comment_end_pattern.match(line):
160 # Remove the initial ' * ' in comment block line and see if there is an
162 line = comment_line_lead_pattern.sub('', line)
163 for pattern in identifier_patterns:
164 match = pattern.match(line)
166 # Set the GtkDoc name.
167 cur_doc.set_name(match.group(1))
168 # Get annotations and add them to the GtkDoc.
169 annotations = get_annotation_list(match.group(2))
170 for annotation in annotations:
171 cur_doc.add_annotation(annotation)
172 # Set the GtkDoc type.
173 if pattern == signal_name_pattern:
174 cur_doc.set_type('signal')
175 elif pattern == property_name_pattern:
176 cur_doc.set_type('property')
177 elif pattern == function_name_pattern:
178 cur_doc.set_type('function')
182 # Given a currently read line (presumably the identifier line), read the next
183 # lines, testing to see if the lines are part of parameter descriptions. If
184 # so, store the parameter descriptions in the given doc block. Stop on eof and
185 # return the last line that stops the processing.
186 def process_params(fp, line, cur_doc):
187 # Skip the identifier line if not eof. Also skip any blank lines in the
188 # comment block. Return if eof or the end of the comment block are
190 if line: line = fp.readline()
191 line = skip_to_nonblank(fp, line)
192 if not line or comment_end_pattern.match(line):
195 # Remove initial ' * ' in first non-empty comment block line.
196 line = comment_line_lead_pattern.sub('', line)
198 # Now process possible parameters as long as no eof or the end of the
199 # param section is not reached (which could be triggered by anything that
200 # doesn't match a '@param:..." line, even the end of the comment block).
201 match = param_pattern.match(line)
202 while line and match:
203 description = match.group(2)
205 # First extract the annotations from the description and save them.
207 annotation_match = annotations_pattern.match(description)
209 annotations = get_annotation_list(annotation_match.group(1))
210 # Remove the annotations from the description
211 description = annotations_pattern.sub('', description)
213 # Default to appending lines to current parameter.
214 append_func = cur_doc.append_to_last_param
216 # See if the return has been included as part of the parameter
217 # section and make sure that lines are added to the GtkDoc return if
219 if match.group(1).lower() == "returns":
220 cur_doc.add_return(description, annotations)
221 append_func = cur_doc.append_to_return
222 # If not, just add it as a regular parameter.
224 cur_doc.add_param(match.group(1), description, annotations)
226 # Now read lines and append them until next parameter, beginning of
227 # description (an empty line), the end of the comment block or eof.
230 # Stop processing if end of comment block or a blank comment line
232 if comment_empty_line_pattern.match(line) or \
233 comment_end_pattern.match(line):
236 # Remove initial ' * ' in comment block line.
237 line = comment_line_lead_pattern.sub('', line)
239 # Break from current param processing if a new one is
241 if param_pattern.match(line): break;
243 # Otherwise, just append the current line and get the next line.
247 # Re-evaluate match for while condition
248 match = param_pattern.match(line)
250 # End by returning the current line.
253 # Having processed parameters, read the following lines into the description of
254 # the current doc block until the end of the comment block, the end of file or
255 # a return section is encountered.
256 def process_description(fp, line, cur_doc):
257 # First skip empty lines returning on eof or end of comment block.
258 line = skip_to_nonblank(fp, line)
259 if not line or comment_end_pattern.match(line):
262 # Remove initial ' * ' in non-empty comment block line.
263 line = comment_line_lead_pattern.sub('', line)
265 # Also remove possible 'Description:' prefix.
266 if line[:12] == 'Description:': line = line[12:]
268 # Used to tell if the previous line was blank and a return section
269 # uncommonly marked with 'Returns ...' instead of 'Returns: ...' has
270 # started (assume it is non-empty to begin with).
271 prev_line = 'non-empty'
273 # Now read lines until a new section (like a return or a since section) is
276 # See if the description section has ended (if the line begins with
277 # 'Returns ...' and the previous line was empty -- this loop replaces
278 # empty lines with a newline).
279 if no_colon_return_pattern.match(line) and prev_line == '\n':
281 # Or if one of the patterns of the final sections match
282 for pattern in final_section_patterns:
283 if pattern.match(line):
286 # If not, append lines to description in the doc comment block.
287 cur_doc.append_to_description(line)
292 # Stop processing on eof or at the end of comment block.
293 if not line or comment_end_pattern.match(line):
296 # Remove initial ' * ' in line so that the text can be appended to the
297 # description of the comment block and make sure that if the line is
298 # empty it be interpreted as a newline.
299 line = comment_line_lead_pattern.sub('', line)
300 if not line: line = '\n'
302 # Given the line that ended the description (the first line of one of the final
303 # sections) process the final sections ('Returns:', 'Since:', etc.) until the
304 # end of the comment block or eof. Return the line that ends the processing.
305 def process_final_sections(fp, line, cur_doc):
306 while line and not comment_end_pattern.match(line):
307 # Remove leading ' * ' from current non-empty comment line.
308 line = comment_line_lead_pattern.sub('', line)
309 # Temporarily append the no colon return pattern to the final section
310 # patterns now that the description has been processed. It will be
311 # removed after the for loop below executes so that future descriptions
312 # that begin with 'Returns ...' are not interpreted as a return
314 final_section_patterns.append(no_colon_return_pattern)
315 for pattern in final_section_patterns:
316 match = pattern.match(line)
318 if pattern == return_pattern or \
319 pattern == no_colon_return_pattern:
320 # Dealing with a 'Returns:' so first extract the
321 # annotations from the description and save them.
322 description = match.group(2)
325 annotations_pattern.match(description)
328 get_annotation_list(annotation_match.group(1))
329 # Remove the annotations from the description
330 description = annotations_pattern.sub('', description)
332 # Now add the return.
333 cur_doc.add_return(description, annotations)
334 # In case more lines need to be appended.
335 append_func = cur_doc.append_to_return
336 elif pattern == rename_to_pattern:
337 # Dealing with a 'Rename to:' section (GObjectIntrospection
338 # annotation) so no further lines will be appended but this
339 # single one (and only to the annotations).
341 cur_doc.add_annotation((match.group(1),
344 # For all others ('Since:' and 'Deprecated:') just append
345 # the line to the description for now.
346 cur_doc.append_to_description(line)
347 # In case more lines need to be appended.
348 append_func = cur_doc.append_to_description
350 # Stop final section pattern matching for loop since a match
351 # has already been found.
354 # Remove the no colon return pattern (which was temporarily added in
355 # the just executed loop) from the list of final section patterns.
356 final_section_patterns.pop()
360 # Now continue appending lines to current section until a new one is
361 # found or an eof or the end of the comment block is encountered.
363 while not finished and line and \
364 not comment_end_pattern.match(line):
365 # Remove leading ' * ' from line and make sure that if it is empty,
366 # it be interpreted as a newline.
367 line = comment_line_lead_pattern.sub('', line)
368 if not line: line = '\n'
370 for pattern in final_section_patterns:
371 if pattern.match(line):
375 # Break out of loop if a new section is found (determined in above
379 # Now it's safe to append line.
380 if append_func: append_func(line)
382 # Get the next line to continue processing.
387 def parse_dir(dir, doc_dict):
388 for file in os.listdir(dir):
389 if file in ('.', '..'): continue
390 path = os.path.join(dir, file)
391 if os.path.isdir(path):
392 parse_dir(path, doc_dict)
393 if len(file) > 2 and file[-2:] == '.c':
394 sys.stderr.write("Processing " + path + '\n')
395 parse_file(open(path, 'r'), doc_dict)
397 def extract(dirs, doc_dict=None):
398 if not doc_dict: doc_dict = {}
400 parse_dir(dir, doc_dict)
403 tmpl_section_pattern = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
404 def parse_tmpl(fp, doc_dict):
409 match = tmpl_section_pattern.match(line)
411 cur_doc = None # new input shouldn't affect the old doc dict
412 sect_type = match.group(1)
413 sect_name = match.group(2)
415 if sect_type == 'FUNCTION':
416 cur_doc = doc_dict.get(sect_name)
419 cur_doc.set_name(sect_name)
420 doc_dict[sect_name] = cur_doc
421 elif line == '<!-- # Unused Parameters # -->\n':
422 cur_doc = None # don't worry about unused params.
424 if line[:10] == '@Returns: ':
425 if string.strip(line[10:]):
426 cur_doc.append_to_return(line[10:])
428 pos = string.find(line, ':')
430 cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
432 cur_doc.append_to_description(line)
434 cur_doc.append_to_description(line)
438 def extract_tmpl(dirs, doc_dict=None):
439 if not doc_dict: doc_dict = {}
441 for file in os.listdir(dir):
442 if file in ('.', '..'): continue
443 path = os.path.join(dir, file)
444 if os.path.isdir(path):
446 if len(file) > 2 and file[-2:] == '.sgml':
447 parse_tmpl(open(path, 'r'), doc_dict)