src/tools/docmaker/sources.py

   1 #  Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009
   2 #    David Turner <david@freetype.org>
   3 #
   4 #
   5 # this file contains definitions of classes needed to decompose
   6 # C sources files into a series of multi-line "blocks". There are
   7 # two kinds of blocks:
   8 #
   9 #   - normal blocks, which contain source code or ordinary comments
  10 #
  11 #   - documentation blocks, which have restricted formatting, and
  12 #     whose text always start with a documentation markup tag like
  13 #     "<Function>", "<Type>", etc..
  14 #
  15 # the routines used to process the content of documentation blocks
  16 # are not contained here, but in "content.py"
  17 #
  18 # the classes and methods found here only deal with text parsing
  19 # and basic documentation block extraction
  20 #
  21
  22 import fileinput, re, sys, os, string
  23
  24
  25
  26 ################################################################
  27 ##
  28 ##  BLOCK FORMAT PATTERN
  29 ##
  30 ##   A simple class containing compiled regular expressions used
  31 ##   to detect potential documentation format block comments within
  32 ##   C source code
  33 ##
  34 ##   note that the 'column' pattern must contain a group that will
  35 ##   be used to "unbox" the content of documentation comment blocks
  36 ##
  37 class  SourceBlockFormat:
  38
  39     def  __init__( self, id, start, column, end ):
  40         """create a block pattern, used to recognize special documentation blocks"""
  41         self.id     = id
  42         self.start  = re.compile( start, re.VERBOSE )
  43         self.column = re.compile( column, re.VERBOSE )
  44         self.end    = re.compile( end, re.VERBOSE )
  45
  46
  47
  48 #
  49 # format 1 documentation comment blocks look like the following:
  50 #
  51 #    /************************************/
  52 #    /*                                  */
  53 #    /*                                  */
  54 #    /*                                  */
  55 #    /************************************/
  56 #
  57 # we define a few regular expressions here to detect them
  58 #
  59
  60 start = r'''
  61   \s*      # any number of whitespace
  62   /\*{2,}/ # followed by '/' and at least two asterisks then '/'
  63   \s*$     # probably followed by whitespace
  64 '''
  65
  66 column = r'''
  67   \s*      # any number of whitespace
  68   /\*{1}   # followed by '/' and precisely one asterisk
  69   ([^*].*) # followed by anything (group 1)
  70   \*{1}/   # followed by one asterisk and a '/'
  71   \s*$     # probably followed by whitespace
  72 '''
  73
  74 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
  75
  76
  77 #
  78 # format 2 documentation comment blocks look like the following:
  79 #
  80 #    /************************************ (at least 2 asterisks)
  81 #     *
  82 #     *
  83 #     *
  84 #     *
  85 #     **/       (1 or more asterisks at the end)
  86 #
  87 # we define a few regular expressions here to detect them
  88 #
  89 start = r'''
  90   \s*     # any number of whitespace
  91   /\*{2,} # followed by '/' and at least two asterisks
  92   \s*$    # probably followed by whitespace
  93 '''
  94
  95 column = r'''
  96   \s*        # any number of whitespace
  97   \*{1}(?!/) # followed by precisely one asterisk not followed by `/'
  98   (.*)       # then anything (group1)
  99 '''
 100
 101 end = r'''
 102   \s*  # any number of whitespace
 103   \*+/ # followed by at least one asterisk, then '/'
 104 '''
 105
 106 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
 107
 108
 109 #
 110 # the list of supported documentation block formats, we could add new ones
 111 # relatively easily
 112 #
 113 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
 114
 115
 116 #
 117 # the following regular expressions corresponds to markup tags
 118 # within the documentation comment blocks. they're equivalent
 119 # despite their different syntax
 120 #
 121 # notice how each markup tag _must_ begin a new line
 122 #
 123 re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  # <xxxx> format
 124 re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
 125
 126 #
 127 # the list of supported markup tags, we could add new ones relatively
 128 # easily
 129 #
 130 re_markup_tags = [re_markup_tag1, re_markup_tag2]
 131
 132 #
 133 # used to detect a cross-reference, after markup tags have been stripped
 134 #
 135 re_crossref = re.compile( r'@(\w*)(.*)' )
 136
 137 #
 138 # used to detect italic and bold styles in paragraph text
 139 #
 140 re_italic = re.compile( r"_(\w(\w|')*)_(.*)" )     #  _italic_
 141 re_bold   = re.compile( r"\*(\w(\w|')*)\*(.*)" )   #  *bold*
 142
 143 #
 144 # used to detect the end of commented source lines
 145 #
 146 re_source_sep = re.compile( r'\s*/\*\s*\*/' )
 147
 148 #
 149 # used to perform cross-reference within source output
 150 #
 151 re_source_crossref = re.compile( r'(\W*)(\w*)' )
 152
 153 #
 154 # a list of reserved source keywords
 155 #
 156 re_source_keywords = re.compile( '''\\b ( typedef   |
 157                                           struct    |
 158                                           enum      |
 159                                           union     |
 160                                           const     |
 161                                           char      |
 162                                           int       |
 163                                           short     |
 164                                           long      |
 165                                           void      |
 166                                           signed    |
 167                                           unsigned  |
 168                                           \#include |
 169                                           \#define  |
 170                                           \#undef   |
 171                                           \#if      |
 172                                           \#ifdef   |
 173                                           \#ifndef  |
 174                                           \#else    |
 175                                           \#endif   ) \\b''', re.VERBOSE )
 176
 177
 178 ################################################################
 179 ##
 180 ##  SOURCE BLOCK CLASS
 181 ##
 182 ##   A SourceProcessor is in charge of reading a C source file
 183 ##   and decomposing it into a series of different "SourceBlocks".
 184 ##   each one of these blocks can be made of the following data:
 185 ##
 186 ##   - A documentation comment block that starts with "/**" and
 187 ##     whose exact format will be discussed later
 188 ##
 189 ##   - normal sources lines, including comments
 190 ##
 191 ##   the important fields in a text block are the following ones:
 192 ##
 193 ##     self.lines   : a list of text lines for the corresponding block
 194 ##
 195 ##     self.content : for documentation comment blocks only, this is the
 196 ##                    block content that has been "unboxed" from its
 197 ##                    decoration. This is None for all other blocks
 198 ##                    (i.e. sources or ordinary comments with no starting
 199 ##                     markup tag)
 200 ##
 201 class  SourceBlock:
 202
 203     def  __init__( self, processor, filename, lineno, lines ):
 204         self.processor = processor
 205         self.filename  = filename
 206         self.lineno    = lineno
 207         self.lines     = lines[:]
 208         self.format    = processor.format
 209         self.content   = []
 210
 211         if self.format == None:
 212             return
 213
 214         words = []
 215
 216         # extract comment lines
 217         lines = []
 218
 219         for line0 in self.lines:
 220             m = self.format.column.match( line0 )
 221             if m:
 222                 lines.append( m.group( 1 ) )
 223
 224         # now, look for a markup tag
 225         for l in lines:
 226             l = string.strip( l )
 227             if len( l ) > 0:
 228                 for tag in re_markup_tags:
 229                     if tag.match( l ):
 230                         self.content = lines
 231                         return
 232
 233     def  location( self ):
 234         return "(" + self.filename + ":" + repr( self.lineno ) + ")"
 235
 236     # debugging only - not used in normal operations
 237     def  dump( self ):
 238         if self.content:
 239             print "{{{content start---"
 240             for l in self.content:
 241                 print l
 242             print "---content end}}}"
 243             return
 244
 245         fmt = ""
 246         if self.format:
 247             fmt = repr( self.format.id ) + " "
 248
 249         for line in self.lines:
 250             print line
 251
 252
 253
 254 ################################################################
 255 ##
 256 ##  SOURCE PROCESSOR CLASS
 257 ##
 258 ##   The SourceProcessor is in charge of reading a C source file
 259 ##   and decomposing it into a series of different "SourceBlock"
 260 ##   objects.
 261 ##
 262 ##   each one of these blocks can be made of the following data:
 263 ##
 264 ##   - A documentation comment block that starts with "/**" and
 265 ##     whose exact format will be discussed later
 266 ##
 267 ##   - normal sources lines, include comments
 268 ##
 269 ##
 270 class  SourceProcessor:
 271
 272     def  __init__( self ):
 273         """initialize a source processor"""
 274         self.blocks   = []
 275         self.filename = None
 276         self.format   = None
 277         self.lines    = []
 278
 279     def  reset( self ):
 280         """reset a block processor, clean all its blocks"""
 281         self.blocks = []
 282         self.format = None
 283
 284     def  parse_file( self, filename ):
 285         """parse a C source file, and add its blocks to the processor's list"""
 286         self.reset()
 287
 288         self.filename = filename
 289
 290         fileinput.close()
 291         self.format = None
 292         self.lineno = 0
 293         self.lines  = []
 294
 295         for line in fileinput.input( filename ):
 296             # strip trailing newlines, important on Windows machines!
 297             if line[-1] == '\012':
 298                 line = line[0:-1]
 299
 300             if self.format == None:
 301                 self.process_normal_line( line )
 302             else:
 303                 if self.format.end.match( line ):
 304                     # that's a normal block end, add it to 'lines' and
 305                     # create a new block
 306                     self.lines.append( line )
 307                     self.add_block_lines()
 308                 elif self.format.column.match( line ):
 309                     # that's a normal column line, add it to 'lines'
 310                     self.lines.append( line )
 311                 else:
 312                     # humm.. this is an unexpected block end,
 313                     # create a new block, but don't process the line
 314                     self.add_block_lines()
 315
 316                     # we need to process the line again
 317                     self.process_normal_line( line )
 318
 319         # record the last lines
 320         self.add_block_lines()
 321
 322     def  process_normal_line( self, line ):
 323         """process a normal line and check whether it is the start of a new block"""
 324         for f in re_source_block_formats:
 325             if f.start.match( line ):
 326                 self.add_block_lines()
 327                 self.format = f
 328                 self.lineno = fileinput.filelineno()
 329
 330         self.lines.append( line )
 331
 332     def  add_block_lines( self ):
 333         """add the current accumulated lines and create a new block"""
 334         if self.lines != []:
 335             block = SourceBlock( self, self.filename, self.lineno, self.lines )
 336
 337             self.blocks.append( block )
 338             self.format = None
 339             self.lines  = []
 340
 341     # debugging only, not used in normal operations
 342     def  dump( self ):
 343         """print all blocks in a processor"""
 344         for b in self.blocks:
 345             b.dump()
 346
 347 # eof