1 # Content (c) 2002, 2004, 2006, 2007, 2008, 2009
2 # David Turner <david@freetype.org>
4 # This file contains routines used to parse the content of documentation
5 # comment blocks and build more structured objects out of them.
13 # this regular expression is used to detect code sequences. these
14 # are simply code fragments embedded in '{' and '}' like in:
24 # note that indentation of the starting and ending accolades must be
25 # exactly the same. the code sequence can contain accolades at greater
28 re_code_start = re.compile( r"(\s*){\s*$" )
29 re_code_end = re.compile( r"(\s*)}\s*$" )
32 # this regular expression is used to isolate identifiers from
35 re_identifier = re.compile( r'(\w*)' )
38 # we collect macros ending in `_H'; while outputting the object data, we use
39 # this info together with the object's file location to emit the appropriate
40 # header file macro and name before the object itself
42 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
45 #############################################################################
47 # The DocCode class is used to store source code lines.
49 # 'self.lines' contains a set of source code lines that will be dumped as
50 # HTML in a <PRE> tag.
52 # The object is filled line by line by the parser; it strips the leading
53 # "margin" space from each input line before storing it in 'self.lines'.
57 def __init__( self, margin, lines ):
61 # remove margin spaces
63 if string.strip( l[:margin] ) == "":
65 self.lines.append( l )
67 def dump( self, prefix = "", width = 60 ):
68 lines = self.dump_lines( 0, width )
72 def dump_lines( self, margin = 0, width = 60 ):
75 result.append( " " * margin + l )
80 #############################################################################
82 # The DocPara class is used to store "normal" text paragraph.
84 # 'self.words' contains the list of words that make up the paragraph
88 def __init__( self, lines ):
93 self.words.extend( string.split( l ) )
95 def dump( self, prefix = "", width = 60 ):
96 lines = self.dump_lines( 0, width )
100 def dump_lines( self, margin = 0, width = 60 ):
101 cur = "" # current line
102 col = 0 # current width
105 for word in self.words:
111 result.append( " " * margin + cur )
121 result.append( " " * margin + cur )
127 #############################################################################
129 # The DocField class is used to store a list containing either DocPara or
130 # DocCode objects. Each DocField also has an optional "name" which is used
131 # when the object corresponds to a field or value definition
135 def __init__( self, name, lines ):
136 self.name = name # can be None for normal paragraphs/sources
137 self.items = [] # list of items
139 mode_none = 0 # start parsing mode
140 mode_code = 1 # parsing code sequences
141 mode_para = 3 # parsing normal paragraph
143 margin = -1 # current code sequence indentation
146 # now analyze the markup lines to see if they contain paragraphs,
147 # code sequences or fields definitions
153 # are we parsing a code sequence ?
154 if mode == mode_code:
155 m = re_code_end.match( l )
156 if m and len( m.group( 1 ) ) <= margin:
157 # that's it, we finished the code sequence
158 code = DocCode( 0, cur_lines )
159 self.items.append( code )
164 # nope, continue the code sequence
165 cur_lines.append( l[margin:] )
167 # start of code sequence ?
168 m = re_code_start.match( l )
172 para = DocPara( cur_lines )
173 self.items.append( para )
176 # switch to code extraction mode
177 margin = len( m.group( 1 ) )
180 if not string.split( l ) and cur_lines:
181 # if the line is empty, we end the current paragraph,
183 para = DocPara( cur_lines )
184 self.items.append( para )
187 # otherwise, simply add the line to the current
189 cur_lines.append( l )
191 if mode == mode_code:
192 # unexpected end of code sequence
193 code = DocCode( margin, cur_lines )
194 self.items.append( code )
196 para = DocPara( cur_lines )
197 self.items.append( para )
199 def dump( self, prefix = "" ):
201 print prefix + self.field + " ::"
202 prefix = prefix + "----"
211 def dump_lines( self, margin = 0, width = 60 ):
219 result.extend( p.dump_lines( margin, width ) )
226 # this regular expression is used to detect field definitions
228 re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )
234 def __init__( self, tag, lines ):
235 self.tag = string.lower( tag )
243 m = re_field.match( l )
245 # we detected the start of a new field definition
247 # first, save the current one
249 f = DocField( field, cur_lines )
250 self.fields.append( f )
254 field = m.group( 1 ) # record field name
255 ln = len( m.group( 0 ) )
256 l = " " * ln + l[ln:]
259 cur_lines.append( l )
261 if field or cur_lines:
262 f = DocField( field, cur_lines )
263 self.fields.append( f )
265 def get_name( self ):
267 return self.fields[0].items[0].words[0]
271 def get_start( self ):
274 for word in self.fields[0].items[0].words:
275 result = result + " " + word
280 def dump( self, margin ):
281 print " " * margin + "<" + self.tag + ">"
282 for f in self.fields:
284 print " " * margin + "</" + self.tag + ">"
290 def __init__( self, block ):
294 self.name = block.name
295 self.title = block.get_markup_words( "title" )
296 self.order = block.get_markup_words( "sections" )
299 self.title = string.split( "Miscellaneous" )
306 def __init__( self, name = "Other" ):
309 self.block_names = [] # ordered block names in section
312 self.description = ""
317 def add_def( self, block ):
318 self.defs.append( block )
320 def add_block( self, block ):
321 self.block_names.append( block.name )
322 self.blocks[block.name] = block
325 # look up one block that contains a valid section description
326 for block in self.defs:
327 title = block.get_markup_text( "title" )
330 self.abstract = block.get_markup_words( "abstract" )
331 self.description = block.get_markup_items( "description" )
332 self.order = block.get_markup_words( "order" )
336 self.block_names = sort_order_list( self.block_names, self.order )
340 class ContentProcessor:
342 def __init__( self ):
343 """initialize a block content processor"""
346 self.sections = {} # dictionary of documentation sections
347 self.section = None # current documentation section
349 self.chapters = [] # list of chapters
351 self.headers = {} # dictionary of header macros
353 def set_section( self, section_name ):
354 """set current section during parsing"""
355 if not self.sections.has_key( section_name ):
356 section = DocSection( section_name )
357 self.sections[section_name] = section
358 self.section = section
360 self.section = self.sections[section_name]
362 def add_chapter( self, block ):
363 chapter = DocChapter( block )
364 self.chapters.append( chapter )
368 """reset the content processor for a new block"""
371 self.markup_lines = []
373 def add_markup( self ):
374 """add a new markup section"""
375 if self.markup and self.markup_lines:
377 # get rid of last line of markup if it's empty
378 marks = self.markup_lines
379 if len( marks ) > 0 and not string.strip( marks[-1] ):
380 self.markup_lines = marks[:-1]
382 m = DocMarkup( self.markup, self.markup_lines )
384 self.markups.append( m )
387 self.markup_lines = []
389 def process_content( self, content ):
390 """process a block content and return a list of DocMarkup objects
391 corresponding to it"""
398 for t in re_markup_tags:
401 found = string.lower( m.group( 1 ) )
402 prefix = len( m.group( 0 ) )
403 line = " " * prefix + line[prefix:] # remove markup from line
406 # is it the start of a new markup section ?
409 self.add_markup() # add current markup content
411 if len( string.strip( line ) ) > 0:
412 self.markup_lines.append( line )
414 self.markup_lines.append( line )
420 def parse_sources( self, source_processor ):
421 blocks = source_processor.blocks
422 count = len( blocks )
424 for n in range( count ):
427 # this is a documentation comment, we need to catch
428 # all following normal blocks in the "follow" list
432 while m < count and not blocks[m].content:
433 follow.append( blocks[m] )
436 doc_block = DocBlock( source, follow, self )
439 # process all sections to extract their abstract, description
440 # and ordered list of items
442 for sec in self.sections.values():
445 # process chapters to check that all sections are correctly
447 for chap in self.chapters:
448 for sec in chap.order:
449 if self.sections.has_key( sec ):
450 section = self.sections[sec]
451 section.chapter = chap
453 chap.sections.append( section )
455 sys.stderr.write( "WARNING: chapter '" + \
456 chap.name + "' in " + chap.block.location() + \
457 " lists unknown section '" + sec + "'\n" )
459 # check that all sections are in a chapter
462 for sec in self.sections.values():
466 # create a new special chapter for all remaining sections
470 chap = DocChapter( None )
471 chap.sections = others
472 self.chapters.append( chap )
478 def __init__( self, source, follow, processor ):
483 self.type = "ERRTYPE"
484 self.name = "ERRNAME"
485 self.section = processor.section
486 self.markups = processor.process_content( source.content )
488 # compute block type from first markup tag
490 self.type = self.markups[0].tag
494 # compute block name from first markup paragraph
496 markup = self.markups[0]
497 para = markup.fields[0].items[0]
499 m = re_identifier.match( name )
506 if self.type == "section":
507 # detect new section starts
508 processor.set_section( self.name )
509 processor.section.add_def( self )
510 elif self.type == "chapter":
512 processor.add_chapter( self )
514 processor.section.add_block( self )
516 # now, compute the source lines relevant to this documentation
517 # block. We keep normal comments in for obvious reasons (??)
523 # collect header macro definitions
524 m = re_header_macro.match( l )
526 processor.headers[m.group( 2 )] = m.group( 1 );
528 # we use "/* */" as a separator
529 if re_source_sep.match( l ):
533 # now strip the leading and trailing empty lines from the sources
535 end = len( source ) - 1
537 while start < end and not string.strip( source[start] ):
540 while start < end and not string.strip( source[end] ):
543 if start == end and not string.strip( source[start] ):
546 self.code = source[start:end + 1]
548 def location( self ):
549 return self.source.location()
551 def get_markup( self, tag_name ):
552 """return the DocMarkup corresponding to a given tag in a block"""
553 for m in self.markups:
554 if m.tag == string.lower( tag_name ):
558 def get_markup_name( self, tag_name ):
559 """return the name of a given primary markup in a block"""
561 m = self.get_markup( tag_name )
566 def get_markup_words( self, tag_name ):
568 m = self.get_markup( tag_name )
569 return m.fields[0].items[0].words
573 def get_markup_text( self, tag_name ):
574 result = self.get_markup_words( tag_name )
575 return string.join( result )
577 def get_markup_items( self, tag_name ):
579 m = self.get_markup( tag_name )
580 return m.fields[0].items