src/tools/docmaker/sources.py

   1 #
   2 #  sources.py
   3 #
   4 #    Convert source code comments to multi-line blocks (library file).
   5 #
   6 #  Copyright 2002-2018 by
   7 #  David Turner.
   8 #
   9 #  This file is part of the FreeType project, and may only be used,
  10 #  modified, and distributed under the terms of the FreeType project
  11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
  12 #  this file you indicate that you have read the license and
  13 #  understand and accept it fully.
  14
  15 #
  16 # This library file contains definitions of classes needed to decompose C
  17 # source code files into a series of multi-line `blocks'.  There are two
  18 # kinds of blocks.
  19 #
  20 #   - Normal blocks, which contain source code or ordinary comments.
  21 #
  22 #   - Documentation blocks, which have restricted formatting, and whose text
  23 #     always start with a documentation markup tag like `<Function>',
  24 #     `<Type>', etc.
  25 #
  26 # The routines to process the content of documentation blocks are contained
  27 # in file `content.py'; the classes and methods found here only deal with
  28 # text parsing and basic documentation block extraction.
  29 #
  30
  31
  32 import fileinput, re, sys, os, string
  33
  34
  35 ################################################################
  36 ##
  37 ##  SOURCE BLOCK FORMAT CLASS
  38 ##
  39 ##  A simple class containing compiled regular expressions to detect
  40 ##  potential documentation format block comments within C source code.
  41 ##
  42 ##  The `column' pattern must contain a group to `unbox' the content of
  43 ##  documentation comment blocks.
  44 ##
  45 ##  Later on, paragraphs are converted to long lines, which simplifies the
  46 ##  regular expressions that act upon the text.
  47 ##
  48 class  SourceBlockFormat:
  49
  50     def  __init__( self, id, start, column, end ):
  51         """Create a block pattern, used to recognize special documentation
  52            blocks."""
  53         self.id     = id
  54         self.start  = re.compile( start, re.VERBOSE )
  55         self.column = re.compile( column, re.VERBOSE )
  56         self.end    = re.compile( end, re.VERBOSE )
  57
  58
  59 #
  60 # Format 1 documentation comment blocks.
  61 #
  62 #    /************************************/ (at least 2 asterisks)
  63 #    /*                                  */
  64 #    /*                                  */
  65 #    /*                                  */
  66 #    /************************************/ (at least 2 asterisks)
  67 #
  68 start = r'''
  69   \s*      # any number of whitespace
  70   /\*{2,}/ # followed by '/' and at least two asterisks then '/'
  71   \s*$     # probably followed by whitespace
  72 '''
  73
  74 column = r'''
  75   \s*      # any number of whitespace
  76   /\*{1}   # followed by '/' and precisely one asterisk
  77   ([^*].*) # followed by anything (group 1)
  78   \*{1}/   # followed by one asterisk and a '/'
  79   \s*$     # probably followed by whitespace
  80 '''
  81
  82 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
  83
  84
  85 #
  86 # Format 2 documentation comment blocks.
  87 #
  88 #    /************************************ (at least 2 asterisks)
  89 #     *
  90 #     *                                    (1 asterisk)
  91 #     *
  92 #     */                                   (1 or more asterisks)
  93 #
  94 start = r'''
  95   \s*     # any number of whitespace
  96   /\*{2,} # followed by '/' and at least two asterisks
  97   \s*$    # probably followed by whitespace
  98 '''
  99
 100 column = r'''
 101   \s*           # any number of whitespace
 102   \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
 103   (.*)          # then anything (group1)
 104 '''
 105
 106 end = r'''
 107   \s*  # any number of whitespace
 108   \*+/ # followed by at least one asterisk, then '/'
 109 '''
 110
 111 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
 112
 113
 114 #
 115 # The list of supported documentation block formats.  We could add new ones
 116 # quite easily.
 117 #
 118 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
 119
 120
 121 #
 122 # The following regular expressions correspond to markup tags within the
 123 # documentation comment blocks.  They are equivalent despite their different
 124 # syntax.
 125 #
 126 # A markup tag consists of letters or character `-', to be found in group 1.
 127 #
 128 # Notice that a markup tag _must_ begin a new paragraph.
 129 #
 130 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
 131 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format
 132
 133 #
 134 # The list of supported markup tags.  We could add new ones quite easily.
 135 #
 136 re_markup_tags = [re_markup_tag1, re_markup_tag2]
 137
 138
 139 #
 140 # A regular expression to detect a cross reference, after markup tags have
 141 # been stripped off.
 142 #
 143 # Two syntax forms are supported:
 144 #
 145 #   @<name>
 146 #   @<name>[<id>]
 147 #
 148 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
 149 # and `-'.  Use `<id>' if there are multiple, valid `<name>' entries.
 150 #
 151 # Example: @foo[bar]
 152 #
 153 re_crossref = re.compile( r"""
 154                             @
 155                             (?P<name>(?:\w|-)+
 156                                      (?:\[(?:\w|-)+\])?)
 157                             (?P<rest>.*)
 158                           """, re.VERBOSE )
 159
 160 #
 161 # Two regular expressions to detect italic and bold markup, respectively.
 162 # Group 1 is the markup, group 2 the rest of the line.
 163 #
 164 # Note that the markup is limited to words consisting of letters, digits,
 165 # the characters `_' and `-', or an apostrophe (but not as the first
 166 # character).
 167 #
 168 re_italic = re.compile( r"_((?:\w|-)(?:\w|'|-)*)_(.*)" )     #  _italic_
 169 re_bold   = re.compile( r"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" )   #  *bold*
 170
 171 #
 172 # This regular expression code to identify an URL has been taken from
 173 #
 174 #   https://mail.python.org/pipermail/tutor/2002-September/017228.html
 175 #
 176 # (with slight modifications).
 177 #
 178 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
 179 ltrs = r'\w'
 180 gunk = r'/#~:.?+=&%@!\-'
 181 punc = r'.:?\-'
 182 any  = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
 183                                       'gunk' : gunk,
 184                                       'punc' : punc }
 185 url  = r"""
 186          (
 187            \b                    # start at word boundary
 188            %(urls)s :            # need resource and a colon
 189            [%(any)s] +?          # followed by one or more of any valid
 190                                  # character, but be conservative and
 191                                  # take only what you need to...
 192            (?=                   # [look-ahead non-consumptive assertion]
 193              [%(punc)s]*         # either 0 or more punctuation
 194              (?:                 # [non-grouping parentheses]
 195                [^%(any)s] | $    # followed by a non-url char
 196                                  # or end of the string
 197              )
 198            )
 199          )
 200         """ % {'urls' : urls,
 201                'any'  : any,
 202                'punc' : punc }
 203
 204 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
 205
 206 #
 207 # A regular expression that stops collection of comments for the current
 208 # block.
 209 #
 210 re_source_sep = re.compile( r'\s*/\*\s*\*/' )   #  /* */
 211
 212 #
 213 # A regular expression to find possible C identifiers while outputting
 214 # source code verbatim, covering things like `*foo' or `(bar'.  Group 1 is
 215 # the prefix, group 2 the identifier -- since we scan lines from left to
 216 # right, sequentially splitting the source code into prefix and identifier
 217 # is fully sufficient for our purposes.
 218 #
 219 re_source_crossref = re.compile( r'(\W*)(\w*)' )
 220
 221 #
 222 # A regular expression that matches a list of reserved C source keywords.
 223 #
 224 re_source_keywords = re.compile( '''\\b ( typedef   |
 225                                           struct    |
 226                                           enum      |
 227                                           union     |
 228                                           const     |
 229                                           char      |
 230                                           int       |
 231                                           short     |
 232                                           long      |
 233                                           void      |
 234                                           signed    |
 235                                           unsigned  |
 236                                           \#include |
 237                                           \#define  |
 238                                           \#undef   |
 239                                           \#if      |
 240                                           \#ifdef   |
 241                                           \#ifndef  |
 242                                           \#else    |
 243                                           \#endif   ) \\b''', re.VERBOSE )
 244
 245
 246 ################################################################
 247 ##
 248 ##  SOURCE BLOCK CLASS
 249 ##
 250 ##  There are two important fields in a `SourceBlock' object.
 251 ##
 252 ##    self.lines
 253 ##      A list of text lines for the corresponding block.
 254 ##
 255 ##    self.content
 256 ##      For documentation comment blocks only, this is the block content
 257 ##      that has been `unboxed' from its decoration.  This is `None' for all
 258 ##      other blocks (i.e., sources or ordinary comments with no starting
 259 ##      markup tag)
 260 ##
 261 class  SourceBlock:
 262
 263     def  __init__( self, processor, filename, lineno, lines ):
 264         self.processor = processor
 265         self.filename  = filename
 266         self.lineno    = lineno
 267         self.lines     = lines[:]
 268         self.format    = processor.format
 269         self.content   = []
 270
 271         if self.format == None:
 272             return
 273
 274         words = []
 275
 276         # extract comment lines
 277         lines = []
 278
 279         for line0 in self.lines:
 280             m = self.format.column.match( line0 )
 281             if m:
 282                 lines.append( m.group( 1 ) )
 283
 284         # now, look for a markup tag
 285         for l in lines:
 286             l = string.strip( l )
 287             if len( l ) > 0:
 288                 for tag in re_markup_tags:
 289                     if tag.match( l ):
 290                         self.content = lines
 291                         return
 292
 293     def  location( self ):
 294         return "(" + self.filename + ":" + repr( self.lineno ) + ")"
 295
 296     # debugging only -- not used in normal operations
 297     def  dump( self ):
 298         if self.content:
 299             print "{{{content start---"
 300             for l in self.content:
 301                 print l
 302             print "---content end}}}"
 303             return
 304
 305         fmt = ""
 306         if self.format:
 307             fmt = repr( self.format.id ) + " "
 308
 309         for line in self.lines:
 310             print line
 311
 312
 313 ################################################################
 314 ##
 315 ##  SOURCE PROCESSOR CLASS
 316 ##
 317 ##  The `SourceProcessor' is in charge of reading a C source file and
 318 ##  decomposing it into a series of different `SourceBlock' objects.
 319 ##
 320 ##  A SourceBlock object consists of the following data.
 321 ##
 322 ##    - A documentation comment block using one of the layouts above.  Its
 323 ##      exact format will be discussed later.
 324 ##
 325 ##    - Normal sources lines, including comments.
 326 ##
 327 ##
 328 class  SourceProcessor:
 329
 330     def  __init__( self ):
 331         """Initialize a source processor."""
 332         self.blocks   = []
 333         self.filename = None
 334         self.format   = None
 335         self.lines    = []
 336
 337     def  reset( self ):
 338         """Reset a block processor and clean up all its blocks."""
 339         self.blocks = []
 340         self.format = None
 341
 342     def  parse_file( self, filename ):
 343         """Parse a C source file and add its blocks to the processor's
 344            list."""
 345         self.reset()
 346
 347         self.filename = filename
 348
 349         fileinput.close()
 350         self.format = None
 351         self.lineno = 0
 352         self.lines  = []
 353
 354         for line in fileinput.input( filename ):
 355             # strip trailing newlines, important on Windows machines!
 356             if line[-1] == '\012':
 357                 line = line[0:-1]
 358
 359             if self.format == None:
 360                 self.process_normal_line( line )
 361             else:
 362                 if self.format.end.match( line ):
 363                     # A normal block end.  Add it to `lines' and create a
 364                     # new block
 365                     self.lines.append( line )
 366                     self.add_block_lines()
 367                 elif self.format.column.match( line ):
 368                     # A normal column line.  Add it to `lines'.
 369                     self.lines.append( line )
 370                 else:
 371                     # An unexpected block end.  Create a new block, but
 372                     # don't process the line.
 373                     self.add_block_lines()
 374
 375                     # we need to process the line again
 376                     self.process_normal_line( line )
 377
 378         # record the last lines
 379         self.add_block_lines()
 380
 381     def  process_normal_line( self, line ):
 382         """Process a normal line and check whether it is the start of a new
 383            block."""
 384         for f in re_source_block_formats:
 385             if f.start.match( line ):
 386                 self.add_block_lines()
 387                 self.format = f
 388                 self.lineno = fileinput.filelineno()
 389
 390         self.lines.append( line )
 391
 392     def  add_block_lines( self ):
 393         """Add the current accumulated lines and create a new block."""
 394         if self.lines != []:
 395             block = SourceBlock( self,
 396                                  self.filename,
 397                                  self.lineno,
 398                                  self.lines )
 399
 400             self.blocks.append( block )
 401             self.format = None
 402             self.lines  = []
 403
 404     # debugging only, not used in normal operations
 405     def  dump( self ):
 406         """Print all blocks in a processor."""
 407         for b in self.blocks:
 408             b.dump()
 409
 410 # eof