src/tools/docmaker/content.py

   1 #
   2 #  content.py
   3 #
   4 #    Parse comment blocks to build content blocks (library file).
   5 #
   6 #  Copyright 2002, 2004, 2006-2009, 2012-2014 by
   7 #  David Turner.
   8 #
   9 #  This file is part of the FreeType project, and may only be used,
  10 #  modified, and distributed under the terms of the FreeType project
  11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
  12 #  this file you indicate that you have read the license and
  13 #  understand and accept it fully.
  14
  15 #
  16 # This file contains routines to parse documentation comment blocks,
  17 # building more structured objects out of them.
  18 #
  19
  20
  21 from sources import *
  22 from utils   import *
  23
  24 import string, re
  25
  26
  27 #
  28 # Regular expressions to detect code sequences.  `Code sequences' are simply
  29 # code fragments embedded in '{' and '}', as demonstrated in the following
  30 # example.
  31 #
  32 #   {
  33 #     x = y + z;
  34 #     if ( zookoo == 2 )
  35 #     {
  36 #       foobar();
  37 #     }
  38 #   }
  39 #
  40 # Note that the indentation of the first opening brace and the last closing
  41 # brace must be exactly the same.  The code sequence itself should have a
  42 # larger indentation than the surrounding braces.
  43 #
  44 re_code_start = re.compile( r"(\s*){\s*$" )
  45 re_code_end   = re.compile( r"(\s*)}\s*$" )
  46
  47
  48 #
  49 # A regular expression to isolate identifiers from other text.
  50 #
  51 re_identifier = re.compile( r'((?:\w|-)*)' )
  52
  53
  54 #
  55 # We collect macro names ending in `_H' (group 1), as defined in
  56 # `config/ftheader.h'.  While outputting the object data, we use this info
  57 # together with the object's file location (group 2) to emit the appropriate
  58 # header file macro and its associated file name before the object itself.
  59 #
  60 # Example:
  61 #
  62 #   #define FT_FREETYPE_H <freetype.h>
  63 #
  64 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
  65
  66
  67 ################################################################
  68 ##
  69 ##  DOC CODE CLASS
  70 ##
  71 ##  The `DocCode' class is used to store source code lines.
  72 ##
  73 ##  `self.lines' contains a set of source code lines that will be dumped as
  74 ##  HTML in a <PRE> tag.
  75 ##
  76 ##  The object is filled line by line by the parser; it strips the leading
  77 ##  `margin' space from each input line before storing it in `self.lines'.
  78 ##
  79 class  DocCode:
  80
  81     def  __init__( self, margin, lines ):
  82         self.lines = []
  83         self.words = None
  84
  85         # remove margin spaces
  86         for l in lines:
  87             if string.strip( l[:margin] ) == "":
  88                 l = l[margin:]
  89             self.lines.append( l )
  90
  91     def  dump( self, prefix = "", width = 60 ):
  92         lines = self.dump_lines( 0, width )
  93         for l in lines:
  94             print prefix + l
  95
  96     def  dump_lines( self, margin = 0, width = 60 ):
  97         result = []
  98         for l in self.lines:
  99             result.append( " " * margin + l )
 100         return result
 101
 102
 103
 104 ################################################################
 105 ##
 106 ##  DOC PARA CLASS
 107 ##
 108 ##  `Normal' text paragraphs are stored in the `DocPara' class.
 109 ##
 110 ##  `self.words' contains the list of words that make up the paragraph.
 111 ##
 112 class  DocPara:
 113
 114     def  __init__( self, lines ):
 115         self.lines = None
 116         self.words = []
 117         for l in lines:
 118             l = string.strip( l )
 119             self.words.extend( string.split( l ) )
 120
 121     def  dump( self, prefix = "", width = 60 ):
 122         lines = self.dump_lines( 0, width )
 123         for l in lines:
 124             print prefix + l
 125
 126     def  dump_lines( self, margin = 0, width = 60 ):
 127         cur    = ""  # current line
 128         col    = 0   # current width
 129         result = []
 130
 131         for word in self.words:
 132             ln = len( word )
 133             if col > 0:
 134                 ln = ln + 1
 135
 136             if col + ln > width:
 137                 result.append( " " * margin + cur )
 138                 cur = word
 139                 col = len( word )
 140             else:
 141                 if col > 0:
 142                     cur = cur + " "
 143                 cur = cur + word
 144                 col = col + ln
 145
 146         if col > 0:
 147             result.append( " " * margin + cur )
 148
 149         return result
 150
 151
 152 ################################################################
 153 ##
 154 ##  DOC FIELD CLASS
 155 ##
 156 ##  The `DocField' class stores a list containing either `DocPara' or
 157 ##  `DocCode' objects.  Each DocField object also has an optional `name'
 158 ##  that is used when the object corresponds to a field or value definition.
 159 ##
 160 class  DocField:
 161
 162     def  __init__( self, name, lines ):
 163         self.name  = name  # can be `None' for normal paragraphs/sources
 164         self.items = []    # list of items
 165
 166         mode_none  = 0     # start parsing mode
 167         mode_code  = 1     # parsing code sequences
 168         mode_para  = 3     # parsing normal paragraph
 169
 170         margin     = -1    # current code sequence indentation
 171         cur_lines  = []
 172
 173         # analyze the markup lines to check whether they contain paragraphs,
 174         # code sequences, or fields definitions
 175         #
 176         start = 0
 177         mode  = mode_none
 178
 179         for l in lines:
 180             # are we parsing a code sequence?
 181             if mode == mode_code:
 182                 m = re_code_end.match( l )
 183                 if m and len( m.group( 1 ) ) <= margin:
 184                     # that's it, we finished the code sequence
 185                     code = DocCode( 0, cur_lines )
 186                     self.items.append( code )
 187                     margin    = -1
 188                     cur_lines = []
 189                     mode      = mode_none
 190                 else:
 191                     # otherwise continue the code sequence
 192                     cur_lines.append( l[margin:] )
 193             else:
 194                 # start of code sequence?
 195                 m = re_code_start.match( l )
 196                 if m:
 197                     # save current lines
 198                     if cur_lines:
 199                         para = DocPara( cur_lines )
 200                         self.items.append( para )
 201                         cur_lines = []
 202
 203                     # switch to code extraction mode
 204                     margin = len( m.group( 1 ) )
 205                     mode   = mode_code
 206                 else:
 207                     if not string.split( l ) and cur_lines:
 208                         # if the line is empty, we end the current paragraph,
 209                         # if any
 210                         para = DocPara( cur_lines )
 211                         self.items.append( para )
 212                         cur_lines = []
 213                     else:
 214                         # otherwise, simply add the line to the current
 215                         # paragraph
 216                         cur_lines.append( l )
 217
 218         if mode == mode_code:
 219             # unexpected end of code sequence
 220             code = DocCode( margin, cur_lines )
 221             self.items.append( code )
 222         elif cur_lines:
 223             para = DocPara( cur_lines )
 224             self.items.append( para )
 225
 226     def  dump( self, prefix = "" ):
 227         if self.field:
 228             print prefix + self.field + " ::"
 229             prefix = prefix + "----"
 230
 231         first = 1
 232         for p in self.items:
 233             if not first:
 234                 print ""
 235             p.dump( prefix )
 236             first = 0
 237
 238     def  dump_lines( self, margin = 0, width = 60 ):
 239         result = []
 240         nl     = None
 241
 242         for p in self.items:
 243             if nl:
 244                 result.append( "" )
 245
 246             result.extend( p.dump_lines( margin, width ) )
 247             nl = 1
 248
 249         return result
 250
 251
 252 #
 253 # A regular expression to detect field definitions.
 254 #
 255 # Examples:
 256 #
 257 #   foo     ::
 258 #   foo.bar ::
 259 #
 260 re_field = re.compile( r"""
 261                          \s*
 262                            (
 263                              \w*
 264                            |
 265                              \w (\w | \.)* \w
 266                            )
 267                          \s* ::
 268                        """, re.VERBOSE )
 269
 270
 271 ################################################################
 272 ##
 273 ##  DOC MARKUP CLASS
 274 ##
 275 class  DocMarkup:
 276
 277     def  __init__( self, tag, lines ):
 278         self.tag    = string.lower( tag )
 279         self.fields = []
 280
 281         cur_lines = []
 282         field     = None
 283         mode      = 0
 284
 285         for l in lines:
 286             m = re_field.match( l )
 287             if m:
 288                 # We detected the start of a new field definition.
 289
 290                 # first, save the current one
 291                 if cur_lines:
 292                     f = DocField( field, cur_lines )
 293                     self.fields.append( f )
 294                     cur_lines = []
 295                     field     = None
 296
 297                 field     = m.group( 1 )   # record field name
 298                 ln        = len( m.group( 0 ) )
 299                 l         = " " * ln + l[ln:]
 300                 cur_lines = [l]
 301             else:
 302                 cur_lines.append( l )
 303
 304         if field or cur_lines:
 305             f = DocField( field, cur_lines )
 306             self.fields.append( f )
 307
 308     def  get_name( self ):
 309         try:
 310             return self.fields[0].items[0].words[0]
 311         except:
 312             return None
 313
 314     def  dump( self, margin ):
 315         print " " * margin + "<" + self.tag + ">"
 316         for f in self.fields:
 317             f.dump( "  " )
 318         print " " * margin + "</" + self.tag + ">"
 319
 320
 321 ################################################################
 322 ##
 323 ##  DOC CHAPTER CLASS
 324 ##
 325 class  DocChapter:
 326
 327     def  __init__( self, block ):
 328         self.block    = block
 329         self.sections = []
 330         if block:
 331             self.name  = block.name
 332             self.title = block.get_markup_words( "title" )
 333             self.order = block.get_markup_words( "sections" )
 334         else:
 335             self.name  = "Other"
 336             self.title = string.split( "Miscellaneous" )
 337             self.order = []
 338
 339
 340 ################################################################
 341 ##
 342 ##  DOC SECTION CLASS
 343 ##
 344 class  DocSection:
 345
 346     def  __init__( self, name = "Other" ):
 347         self.name        = name
 348         self.blocks      = {}
 349         self.block_names = []  # ordered block names in section
 350         self.defs        = []
 351         self.abstract    = ""
 352         self.description = ""
 353         self.order       = []
 354         self.title       = "ERROR"
 355         self.chapter     = None
 356
 357     def  add_def( self, block ):
 358         self.defs.append( block )
 359
 360     def  add_block( self, block ):
 361         self.block_names.append( block.name )
 362         self.blocks[block.name] = block
 363
 364     def  process( self ):
 365         # look up one block that contains a valid section description
 366         for block in self.defs:
 367             title = block.get_markup_text( "title" )
 368             if title:
 369                 self.title       = title
 370                 self.abstract    = block.get_markup_words( "abstract" )
 371                 self.description = block.get_markup_items( "description" )
 372                 self.order       = block.get_markup_words_all( "order" )
 373                 return
 374
 375     def  reorder( self ):
 376         self.block_names = sort_order_list( self.block_names, self.order )
 377
 378
 379 ################################################################
 380 ##
 381 ##  CONTENT PROCESSOR CLASS
 382 ##
 383 class  ContentProcessor:
 384
 385     def  __init__( self ):
 386         """Initialize a block content processor."""
 387         self.reset()
 388
 389         self.sections = {}    # dictionary of documentation sections
 390         self.section  = None  # current documentation section
 391
 392         self.chapters = []    # list of chapters
 393
 394         self.headers  = {}    # dictionary of header macros
 395
 396     def  set_section( self, section_name ):
 397         """Set current section during parsing."""
 398         if not section_name in self.sections:
 399             section = DocSection( section_name )
 400             self.sections[section_name] = section
 401             self.section                = section
 402         else:
 403             self.section = self.sections[section_name]
 404
 405     def  add_chapter( self, block ):
 406         chapter = DocChapter( block )
 407         self.chapters.append( chapter )
 408
 409     def  reset( self ):
 410         """Reset the content processor for a new block."""
 411         self.markups      = []
 412         self.markup       = None
 413         self.markup_lines = []
 414
 415     def  add_markup( self ):
 416         """Add a new markup section."""
 417         if self.markup and self.markup_lines:
 418
 419             # get rid of last line of markup if it's empty
 420             marks = self.markup_lines
 421             if len( marks ) > 0 and not string.strip( marks[-1] ):
 422                 self.markup_lines = marks[:-1]
 423
 424             m = DocMarkup( self.markup, self.markup_lines )
 425
 426             self.markups.append( m )
 427
 428             self.markup       = None
 429             self.markup_lines = []
 430
 431     def  process_content( self, content ):
 432         """Process a block content and return a list of DocMarkup objects
 433            corresponding to it."""
 434         markup       = None
 435         markup_lines = []
 436         first        = 1
 437
 438         for line in content:
 439             found = None
 440             for t in re_markup_tags:
 441                 m = t.match( line )
 442                 if m:
 443                     found  = string.lower( m.group( 1 ) )
 444                     prefix = len( m.group( 0 ) )
 445                     line   = " " * prefix + line[prefix:]   # remove markup from line
 446                     break
 447
 448             # is it the start of a new markup section ?
 449             if found:
 450                 first = 0
 451                 self.add_markup()  # add current markup content
 452                 self.markup = found
 453                 if len( string.strip( line ) ) > 0:
 454                     self.markup_lines.append( line )
 455             elif first == 0:
 456                 self.markup_lines.append( line )
 457
 458         self.add_markup()
 459
 460         return self.markups
 461
 462     def  parse_sources( self, source_processor ):
 463         blocks = source_processor.blocks
 464         count  = len( blocks )
 465
 466         for n in range( count ):
 467             source = blocks[n]
 468             if source.content:
 469                 # this is a documentation comment, we need to catch
 470                 # all following normal blocks in the "follow" list
 471                 #
 472                 follow = []
 473                 m = n + 1
 474                 while m < count and not blocks[m].content:
 475                     follow.append( blocks[m] )
 476                     m = m + 1
 477
 478                 doc_block = DocBlock( source, follow, self )
 479
 480     def  finish( self ):
 481         # process all sections to extract their abstract, description
 482         # and ordered list of items
 483         #
 484         for sec in self.sections.values():
 485             sec.process()
 486
 487         # process chapters to check that all sections are correctly
 488         # listed there
 489         for chap in self.chapters:
 490             for sec in chap.order:
 491                 if sec in self.sections:
 492                     section = self.sections[sec]
 493                     section.chapter = chap
 494                     section.reorder()
 495                     chap.sections.append( section )
 496                 else:
 497                     sys.stderr.write( "WARNING: chapter '" +          \
 498                         chap.name + "' in " + chap.block.location() + \
 499                         " lists unknown section '" + sec + "'\n" )
 500
 501         # check that all sections are in a chapter
 502         #
 503         others = []
 504         for sec in self.sections.values():
 505             if not sec.chapter:
 506                 sec.reorder()
 507                 others.append( sec )
 508
 509         # create a new special chapter for all remaining sections
 510         # when necessary
 511         #
 512         if others:
 513             chap = DocChapter( None )
 514             chap.sections = others
 515             self.chapters.append( chap )
 516
 517
 518 ################################################################
 519 ##
 520 ##  DOC BLOCK CLASS
 521 ##
 522 class  DocBlock:
 523
 524     def  __init__( self, source, follow, processor ):
 525         processor.reset()
 526
 527         self.source  = source
 528         self.code    = []
 529         self.type    = "ERRTYPE"
 530         self.name    = "ERRNAME"
 531         self.section = processor.section
 532         self.markups = processor.process_content( source.content )
 533
 534         # compute block type from first markup tag
 535         try:
 536             self.type = self.markups[0].tag
 537         except:
 538             pass
 539
 540         # compute block name from first markup paragraph
 541         try:
 542             markup = self.markups[0]
 543             para   = markup.fields[0].items[0]
 544             name   = para.words[0]
 545             m = re_identifier.match( name )
 546             if m:
 547                 name = m.group( 1 )
 548             self.name = name
 549         except:
 550             pass
 551
 552         if self.type == "section":
 553             # detect new section starts
 554             processor.set_section( self.name )
 555             processor.section.add_def( self )
 556         elif self.type == "chapter":
 557             # detect new chapter
 558             processor.add_chapter( self )
 559         else:
 560             processor.section.add_block( self )
 561
 562         # now, compute the source lines relevant to this documentation
 563         # block. We keep normal comments in for obvious reasons (??)
 564         source = []
 565         for b in follow:
 566             if b.format:
 567                 break
 568             for l in b.lines:
 569                 # collect header macro definitions
 570                 m = re_header_macro.match( l )
 571                 if m:
 572                     processor.headers[m.group( 2 )] = m.group( 1 );
 573
 574                 # we use "/* */" as a separator
 575                 if re_source_sep.match( l ):
 576                     break
 577                 source.append( l )
 578
 579         # now strip the leading and trailing empty lines from the sources
 580         start = 0
 581         end   = len( source ) - 1
 582
 583         while start < end and not string.strip( source[start] ):
 584             start = start + 1
 585
 586         while start < end and not string.strip( source[end] ):
 587             end = end - 1
 588
 589         if start == end and not string.strip( source[start] ):
 590             self.code = []
 591         else:
 592             self.code = source[start:end + 1]
 593
 594     def  location( self ):
 595         return self.source.location()
 596
 597     def  get_markup( self, tag_name ):
 598         """Return the DocMarkup corresponding to a given tag in a block."""
 599         for m in self.markups:
 600             if m.tag == string.lower( tag_name ):
 601                 return m
 602         return None
 603
 604     def  get_markup_words( self, tag_name ):
 605         try:
 606             m = self.get_markup( tag_name )
 607             return m.fields[0].items[0].words
 608         except:
 609             return []
 610
 611     def  get_markup_words_all( self, tag_name ):
 612         try:
 613             m = self.get_markup( tag_name )
 614             words = []
 615             for item in m.fields[0].items:
 616                 # We honour empty lines in an `<Order>' section element by
 617                 # adding the sentinel `/empty/'.  The formatter should then
 618                 # convert it to an appropriate representation in the
 619                 # `section_enter' function.
 620                 words += item.words
 621                 words.append( "/empty/" )
 622             return words
 623         except:
 624             return []
 625
 626     def  get_markup_text( self, tag_name ):
 627         result = self.get_markup_words( tag_name )
 628         return string.join( result )
 629
 630     def  get_markup_items( self, tag_name ):
 631         try:
 632             m = self.get_markup( tag_name )
 633             return m.fields[0].items
 634         except:
 635             return None
 636
 637 # eof