src/tools/docmaker/sources.py

   1 #
   2 #  sources.py
   3 #
   4 #    Convert source code comments to multi-line blocks (library file).
   5 #
   6 #  Copyright 2002-2004, 2006-2009, 2012-2014 by
   7 #  David Turner.
   8 #
   9 #  This file is part of the FreeType project, and may only be used,
  10 #  modified, and distributed under the terms of the FreeType project
  11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
  12 #  this file you indicate that you have read the license and
  13 #  understand and accept it fully.
  14
  15 #
  16 # This library file contains definitions of classes needed to decompose C
  17 # source code files into a series of multi-line `blocks'.  There are two
  18 # kinds of blocks.
  19 #
  20 #   - Normal blocks, which contain source code or ordinary comments.
  21 #
  22 #   - Documentation blocks, which have restricted formatting, and whose text
  23 #     always start with a documentation markup tag like `<Function>',
  24 #     `<Type>', etc.
  25 #
  26 # The routines to process the content of documentation blocks are contained
  27 # in file `content.py'; the classes and methods found here only deal with
  28 # text parsing and basic documentation block extraction.
  29 #
  30
  31
  32 import fileinput, re, sys, os, string
  33
  34
  35 ################################################################
  36 ##
  37 ##  SOURCE BLOCK FORMAT CLASS
  38 ##
  39 ##  A simple class containing compiled regular expressions to detect
  40 ##  potential documentation format block comments within C source code.
  41 ##
  42 ##  The `column' pattern must contain a group to `unbox' the content of
  43 ##  documentation comment blocks.
  44 ##
  45 ##  Later on, paragraphs are converted to long lines, which simplifies the
  46 ##  regular expressions that act upon the text.
  47 ##
  48 class  SourceBlockFormat:
  49
  50     def  __init__( self, id, start, column, end ):
  51         """Create a block pattern, used to recognize special documentation
  52            blocks."""
  53         self.id     = id
  54         self.start  = re.compile( start, re.VERBOSE )
  55         self.column = re.compile( column, re.VERBOSE )
  56         self.end    = re.compile( end, re.VERBOSE )
  57
  58
  59 #
  60 # Format 1 documentation comment blocks.
  61 #
  62 #    /************************************/ (at least 2 asterisks)
  63 #    /*                                  */
  64 #    /*                                  */
  65 #    /*                                  */
  66 #    /************************************/ (at least 2 asterisks)
  67 #
  68 start = r'''
  69   \s*      # any number of whitespace
  70   /\*{2,}/ # followed by '/' and at least two asterisks then '/'
  71   \s*$     # probably followed by whitespace
  72 '''
  73
  74 column = r'''
  75   \s*      # any number of whitespace
  76   /\*{1}   # followed by '/' and precisely one asterisk
  77   ([^*].*) # followed by anything (group 1)
  78   \*{1}/   # followed by one asterisk and a '/'
  79   \s*$     # probably followed by whitespace
  80 '''
  81
  82 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
  83
  84
  85 #
  86 # Format 2 documentation comment blocks.
  87 #
  88 #    /************************************ (at least 2 asterisks)
  89 #     *
  90 #     *                                    (1 asterisk)
  91 #     *
  92 #     */                                   (1 or more asterisks)
  93 #
  94 start = r'''
  95   \s*     # any number of whitespace
  96   /\*{2,} # followed by '/' and at least two asterisks
  97   \s*$    # probably followed by whitespace
  98 '''
  99
 100 column = r'''
 101   \s*           # any number of whitespace
 102   \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
 103   (.*)          # then anything (group1)
 104 '''
 105
 106 end = r'''
 107   \s*  # any number of whitespace
 108   \*+/ # followed by at least one asterisk, then '/'
 109 '''
 110
 111 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
 112
 113
 114 #
 115 # The list of supported documentation block formats.  We could add new ones
 116 # quite easily.
 117 #
 118 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
 119
 120
 121 #
 122 # The following regular expressions correspond to markup tags within the
 123 # documentation comment blocks.  They are equivalent despite their different
 124 # syntax.
 125 #
 126 # A markup tag consists of letters or character `-', to be found in group 1.
 127 #
 128 # Notice that a markup tag _must_ begin a new paragraph.
 129 #
 130 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
 131 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format
 132
 133 #
 134 # The list of supported markup tags.  We could add new ones quite easily.
 135 #
 136 re_markup_tags = [re_markup_tag1, re_markup_tag2]
 137
 138
 139 #
 140 # A regular expression to detect a cross reference, after markup tags have
 141 # been stripped off.  Group 1 is the reference, group 2 the rest of the
 142 # line.
 143 #
 144 # A cross reference consists of letters, digits, or characters `-' and `_'.
 145 #
 146 re_crossref = re.compile( r'@((?:\w|-)*)(.*)' )    #  @foo
 147
 148 #
 149 # Two regular expressions to detect italic and bold markup, respectively.
 150 # Group 1 is the markup, group 2 the rest of the line.
 151 #
 152 # Note that the markup is limited to words consisting of letters, digits,
 153 # the character `_', or an apostrophe (but not as the first character).
 154 #
 155 re_italic = re.compile( r"_(\w(?:\w|')*)_(.*)" )     #  _italic_
 156 re_bold   = re.compile( r"\*(\w(?:\w|')*)\*(.*)" )   #  *bold*
 157
 158 #
 159 # This regular expression code to identify an URL has been taken from
 160 #
 161 #   http://mail.python.org/pipermail/tutor/2002-September/017228.html
 162 #
 163 # (with slight modifications).
 164 #
 165 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
 166 ltrs = r'\w'
 167 gunk = r'/#~:.?+=&%@!\-'
 168 punc = r'.:?\-'
 169 any  = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
 170                                       'gunk' : gunk,
 171                                       'punc' : punc }
 172 url  = r"""
 173          (
 174            \b                    # start at word boundary
 175            %(urls)s :            # need resource and a colon
 176            [%(any)s] +?          # followed by one or more of any valid
 177                                  # character, but be conservative and
 178                                  # take only what you need to...
 179            (?=                   # [look-ahead non-consumptive assertion]
 180              [%(punc)s]*         # either 0 or more punctuation
 181              (?:                 # [non-grouping parentheses]
 182                [^%(any)s] | $    # followed by a non-url char
 183                                  # or end of the string
 184              )
 185            )
 186          )
 187         """ % {'urls' : urls,
 188                'any'  : any,
 189                'punc' : punc }
 190
 191 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
 192
 193 #
 194 # A regular expression that stops collection of comments for the current
 195 # block.
 196 #
 197 re_source_sep = re.compile( r'\s*/\*\s*\*/' )   #  /* */
 198
 199 #
 200 # A regular expression to find possible C identifiers while outputting
 201 # source code verbatim, covering things like `*foo' or `(bar'.  Group 1 is
 202 # the prefix, group 2 the identifier -- since we scan lines from left to
 203 # right, sequentially splitting the source code into prefix and identifier
 204 # is fully sufficient for our purposes.
 205 #
 206 re_source_crossref = re.compile( r'(\W*)(\w*)' )
 207
 208 #
 209 # A regular expression that matches a list of reserved C source keywords.
 210 #
 211 re_source_keywords = re.compile( '''\\b ( typedef   |
 212                                           struct    |
 213                                           enum      |
 214                                           union     |
 215                                           const     |
 216                                           char      |
 217                                           int       |
 218                                           short     |
 219                                           long      |
 220                                           void      |
 221                                           signed    |
 222                                           unsigned  |
 223                                           \#include |
 224                                           \#define  |
 225                                           \#undef   |
 226                                           \#if      |
 227                                           \#ifdef   |
 228                                           \#ifndef  |
 229                                           \#else    |
 230                                           \#endif   ) \\b''', re.VERBOSE )
 231
 232
 233 ################################################################
 234 ##
 235 ##  SOURCE BLOCK CLASS
 236 ##
 237 ##  There are two important fields in a `SourceBlock' object.
 238 ##
 239 ##    self.lines
 240 ##      A list of text lines for the corresponding block.
 241 ##
 242 ##    self.content
 243 ##      For documentation comment blocks only, this is the block content
 244 ##      that has been `unboxed' from its decoration.  This is `None' for all
 245 ##      other blocks (i.e., sources or ordinary comments with no starting
 246 ##      markup tag)
 247 ##
 248 class  SourceBlock:
 249
 250     def  __init__( self, processor, filename, lineno, lines ):
 251         self.processor = processor
 252         self.filename  = filename
 253         self.lineno    = lineno
 254         self.lines     = lines[:]
 255         self.format    = processor.format
 256         self.content   = []
 257
 258         if self.format == None:
 259             return
 260
 261         words = []
 262
 263         # extract comment lines
 264         lines = []
 265
 266         for line0 in self.lines:
 267             m = self.format.column.match( line0 )
 268             if m:
 269                 lines.append( m.group( 1 ) )
 270
 271         # now, look for a markup tag
 272         for l in lines:
 273             l = string.strip( l )
 274             if len( l ) > 0:
 275                 for tag in re_markup_tags:
 276                     if tag.match( l ):
 277                         self.content = lines
 278                         return
 279
 280     def  location( self ):
 281         return "(" + self.filename + ":" + repr( self.lineno ) + ")"
 282
 283     # debugging only -- not used in normal operations
 284     def  dump( self ):
 285         if self.content:
 286             print "{{{content start---"
 287             for l in self.content:
 288                 print l
 289             print "---content end}}}"
 290             return
 291
 292         fmt = ""
 293         if self.format:
 294             fmt = repr( self.format.id ) + " "
 295
 296         for line in self.lines:
 297             print line
 298
 299
 300 ################################################################
 301 ##
 302 ##  SOURCE PROCESSOR CLASS
 303 ##
 304 ##  The `SourceProcessor' is in charge of reading a C source file and
 305 ##  decomposing it into a series of different `SourceBlock' objects.
 306 ##
 307 ##  A SourceBlock object consists of the following data.
 308 ##
 309 ##    - A documentation comment block using one of the layouts above.  Its
 310 ##      exact format will be discussed later.
 311 ##
 312 ##    - Normal sources lines, including comments.
 313 ##
 314 ##
 315 class  SourceProcessor:
 316
 317     def  __init__( self ):
 318         """Initialize a source processor."""
 319         self.blocks   = []
 320         self.filename = None
 321         self.format   = None
 322         self.lines    = []
 323
 324     def  reset( self ):
 325         """Reset a block processor and clean up all its blocks."""
 326         self.blocks = []
 327         self.format = None
 328
 329     def  parse_file( self, filename ):
 330         """Parse a C source file and add its blocks to the processor's
 331            list."""
 332         self.reset()
 333
 334         self.filename = filename
 335
 336         fileinput.close()
 337         self.format = None
 338         self.lineno = 0
 339         self.lines  = []
 340
 341         for line in fileinput.input( filename ):
 342             # strip trailing newlines, important on Windows machines!
 343             if line[-1] == '\012':
 344                 line = line[0:-1]
 345
 346             if self.format == None:
 347                 self.process_normal_line( line )
 348             else:
 349                 if self.format.end.match( line ):
 350                     # A normal block end.  Add it to `lines' and create a
 351                     # new block
 352                     self.lines.append( line )
 353                     self.add_block_lines()
 354                 elif self.format.column.match( line ):
 355                     # A normal column line.  Add it to `lines'.
 356                     self.lines.append( line )
 357                 else:
 358                     # An unexpected block end.  Create a new block, but
 359                     # don't process the line.
 360                     self.add_block_lines()
 361
 362                     # we need to process the line again
 363                     self.process_normal_line( line )
 364
 365         # record the last lines
 366         self.add_block_lines()
 367
 368     def  process_normal_line( self, line ):
 369         """Process a normal line and check whether it is the start of a new
 370            block."""
 371         for f in re_source_block_formats:
 372             if f.start.match( line ):
 373                 self.add_block_lines()
 374                 self.format = f
 375                 self.lineno = fileinput.filelineno()
 376
 377         self.lines.append( line )
 378
 379     def  add_block_lines( self ):
 380         """Add the current accumulated lines and create a new block."""
 381         if self.lines != []:
 382             block = SourceBlock( self,
 383                                  self.filename,
 384                                  self.lineno,
 385                                  self.lines )
 386
 387             self.blocks.append( block )
 388             self.format = None
 389             self.lines  = []
 390
 391     # debugging only, not used in normal operations
 392     def  dump( self ):
 393         """Print all blocks in a processor."""
 394         for b in self.blocks:
 395             b.dump()
 396
 397 # eof