src/tools/docmaker/content.py

   1 #
   2 #  content.py
   3 #
   4 #    Parse comment blocks to build content blocks (library file).
   5 #
   6 #  Copyright 2002-2016 by
   7 #  David Turner.
   8 #
   9 #  This file is part of the FreeType project, and may only be used,
  10 #  modified, and distributed under the terms of the FreeType project
  11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
  12 #  this file you indicate that you have read the license and
  13 #  understand and accept it fully.
  14
  15 #
  16 # This file contains routines to parse documentation comment blocks,
  17 # building more structured objects out of them.
  18 #
  19
  20
  21 from sources import *
  22 from utils   import *
  23
  24 import string, re
  25
  26
  27 #
  28 # Regular expressions to detect code sequences.  `Code sequences' are simply
  29 # code fragments embedded in '{' and '}', as demonstrated in the following
  30 # example.
  31 #
  32 #   {
  33 #     x = y + z;
  34 #     if ( zookoo == 2 )
  35 #     {
  36 #       foobar();
  37 #     }
  38 #   }
  39 #
  40 # Note that the indentation of the first opening brace and the last closing
  41 # brace must be exactly the same.  The code sequence itself should have a
  42 # larger indentation than the surrounding braces.
  43 #
  44 re_code_start = re.compile( r"(\s*){\s*$" )
  45 re_code_end   = re.compile( r"(\s*)}\s*$" )
  46
  47
  48 #
  49 # A regular expression to isolate identifiers from other text.  Two syntax
  50 # forms are supported:
  51 #
  52 #   <name>
  53 #   <name>[<id>]
  54 #
  55 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
  56 # and `-'.  Use `<id>' if there are multiple, valid `<name>' entries; in the
  57 # index, `<id>' will be appended in parentheses.
  58 #
  59 # For example,
  60 #
  61 #   stem_darkening[autofit]
  62 #
  63 # becomes `stem_darkening (autofit)' in the index.
  64 #
  65 re_identifier = re.compile( r"""
  66                               ((?:\w|-)+
  67                                (?:\[(?:\w|-)+\])?)
  68                             """, re.VERBOSE )
  69
  70
  71 #
  72 # We collect macro names ending in `_H' (group 1), as defined in
  73 # `freetype/config/ftheader.h'.  While outputting the object data, we use
  74 # this info together with the object's file location (group 2) to emit the
  75 # appropriate header file macro and its associated file name before the
  76 # object itself.
  77 #
  78 # Example:
  79 #
  80 #   #define FT_FREETYPE_H <freetype.h>
  81 #
  82 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
  83
  84
  85 ################################################################
  86 ##
  87 ##  DOC CODE CLASS
  88 ##
  89 ##  The `DocCode' class is used to store source code lines.
  90 ##
  91 ##  `self.lines' contains a set of source code lines that will be dumped as
  92 ##  HTML in a <PRE> tag.
  93 ##
  94 ##  The object is filled line by line by the parser; it strips the leading
  95 ##  `margin' space from each input line before storing it in `self.lines'.
  96 ##
  97 class  DocCode:
  98
  99     def  __init__( self, margin, lines ):
 100         self.lines = []
 101         self.words = None
 102
 103         # remove margin spaces
 104         for l in lines:
 105             if string.strip( l[:margin] ) == "":
 106                 l = l[margin:]
 107             self.lines.append( l )
 108
 109     def  dump( self, prefix = "", width = 60 ):
 110         lines = self.dump_lines( 0, width )
 111         for l in lines:
 112             print prefix + l
 113
 114     def  dump_lines( self, margin = 0, width = 60 ):
 115         result = []
 116         for l in self.lines:
 117             result.append( " " * margin + l )
 118         return result
 119
 120
 121
 122 ################################################################
 123 ##
 124 ##  DOC PARA CLASS
 125 ##
 126 ##  `Normal' text paragraphs are stored in the `DocPara' class.
 127 ##
 128 ##  `self.words' contains the list of words that make up the paragraph.
 129 ##
 130 class  DocPara:
 131
 132     def  __init__( self, lines ):
 133         self.lines = None
 134         self.words = []
 135         for l in lines:
 136             l = string.strip( l )
 137             self.words.extend( string.split( l ) )
 138
 139     def  dump( self, prefix = "", width = 60 ):
 140         lines = self.dump_lines( 0, width )
 141         for l in lines:
 142             print prefix + l
 143
 144     def  dump_lines( self, margin = 0, width = 60 ):
 145         cur    = ""  # current line
 146         col    = 0   # current width
 147         result = []
 148
 149         for word in self.words:
 150             ln = len( word )
 151             if col > 0:
 152                 ln = ln + 1
 153
 154             if col + ln > width:
 155                 result.append( " " * margin + cur )
 156                 cur = word
 157                 col = len( word )
 158             else:
 159                 if col > 0:
 160                     cur = cur + " "
 161                 cur = cur + word
 162                 col = col + ln
 163
 164         if col > 0:
 165             result.append( " " * margin + cur )
 166
 167         return result
 168
 169
 170 ################################################################
 171 ##
 172 ##  DOC FIELD CLASS
 173 ##
 174 ##  The `DocField' class stores a list containing either `DocPara' or
 175 ##  `DocCode' objects.  Each DocField object also has an optional `name'
 176 ##  that is used when the object corresponds to a field or value definition.
 177 ##
 178 class  DocField:
 179
 180     def  __init__( self, name, lines ):
 181         self.name  = name  # can be `None' for normal paragraphs/sources
 182         self.items = []    # list of items
 183
 184         mode_none  = 0     # start parsing mode
 185         mode_code  = 1     # parsing code sequences
 186         mode_para  = 3     # parsing normal paragraph
 187
 188         margin     = -1    # current code sequence indentation
 189         cur_lines  = []
 190
 191         # analyze the markup lines to check whether they contain paragraphs,
 192         # code sequences, or fields definitions
 193         #
 194         start = 0
 195         mode  = mode_none
 196
 197         for l in lines:
 198             # are we parsing a code sequence?
 199             if mode == mode_code:
 200                 m = re_code_end.match( l )
 201                 if m and len( m.group( 1 ) ) <= margin:
 202                     # that's it, we finished the code sequence
 203                     code = DocCode( 0, cur_lines )
 204                     self.items.append( code )
 205                     margin    = -1
 206                     cur_lines = []
 207                     mode      = mode_none
 208                 else:
 209                     # otherwise continue the code sequence
 210                     cur_lines.append( l[margin:] )
 211             else:
 212                 # start of code sequence?
 213                 m = re_code_start.match( l )
 214                 if m:
 215                     # save current lines
 216                     if cur_lines:
 217                         para = DocPara( cur_lines )
 218                         self.items.append( para )
 219                         cur_lines = []
 220
 221                     # switch to code extraction mode
 222                     margin = len( m.group( 1 ) )
 223                     mode   = mode_code
 224                 else:
 225                     if not string.split( l ) and cur_lines:
 226                         # if the line is empty, we end the current paragraph,
 227                         # if any
 228                         para = DocPara( cur_lines )
 229                         self.items.append( para )
 230                         cur_lines = []
 231                     else:
 232                         # otherwise, simply add the line to the current
 233                         # paragraph
 234                         cur_lines.append( l )
 235
 236         if mode == mode_code:
 237             # unexpected end of code sequence
 238             code = DocCode( margin, cur_lines )
 239             self.items.append( code )
 240         elif cur_lines:
 241             para = DocPara( cur_lines )
 242             self.items.append( para )
 243
 244     def  dump( self, prefix = "" ):
 245         if self.field:
 246             print prefix + self.field + " ::"
 247             prefix = prefix + "----"
 248
 249         first = 1
 250         for p in self.items:
 251             if not first:
 252                 print ""
 253             p.dump( prefix )
 254             first = 0
 255
 256     def  dump_lines( self, margin = 0, width = 60 ):
 257         result = []
 258         nl     = None
 259
 260         for p in self.items:
 261             if nl:
 262                 result.append( "" )
 263
 264             result.extend( p.dump_lines( margin, width ) )
 265             nl = 1
 266
 267         return result
 268
 269
 270 #
 271 # A regular expression to detect field definitions.
 272 #
 273 # Examples:
 274 #
 275 #   foo     ::
 276 #   foo.bar ::
 277 #
 278 re_field = re.compile( r"""
 279                          \s*
 280                            (
 281                              \w*
 282                            |
 283                              \w (\w | \.)* \w
 284                            )
 285                          \s* ::
 286                        """, re.VERBOSE )
 287
 288
 289 ################################################################
 290 ##
 291 ##  DOC MARKUP CLASS
 292 ##
 293 class  DocMarkup:
 294
 295     def  __init__( self, tag, lines ):
 296         self.tag    = string.lower( tag )
 297         self.fields = []
 298
 299         cur_lines = []
 300         field     = None
 301         mode      = 0
 302
 303         for l in lines:
 304             m = re_field.match( l )
 305             if m:
 306                 # We detected the start of a new field definition.
 307
 308                 # first, save the current one
 309                 if cur_lines:
 310                     f = DocField( field, cur_lines )
 311                     self.fields.append( f )
 312                     cur_lines = []
 313                     field     = None
 314
 315                 field     = m.group( 1 )   # record field name
 316                 ln        = len( m.group( 0 ) )
 317                 l         = " " * ln + l[ln:]
 318                 cur_lines = [l]
 319             else:
 320                 cur_lines.append( l )
 321
 322         if field or cur_lines:
 323             f = DocField( field, cur_lines )
 324             self.fields.append( f )
 325
 326     def  get_name( self ):
 327         try:
 328             return self.fields[0].items[0].words[0]
 329         except:
 330             return None
 331
 332     def  dump( self, margin ):
 333         print " " * margin + "<" + self.tag + ">"
 334         for f in self.fields:
 335             f.dump( "  " )
 336         print " " * margin + "</" + self.tag + ">"
 337
 338
 339 ################################################################
 340 ##
 341 ##  DOC CHAPTER CLASS
 342 ##
 343 class  DocChapter:
 344
 345     def  __init__( self, block ):
 346         self.block    = block
 347         self.sections = []
 348         if block:
 349             self.name  = block.name
 350             self.title = block.get_markup_words( "title" )
 351             self.order = block.get_markup_words( "sections" )
 352         else:
 353             self.name  = "Other"
 354             self.title = string.split( "Miscellaneous" )
 355             self.order = []
 356
 357
 358 ################################################################
 359 ##
 360 ##  DOC SECTION CLASS
 361 ##
 362 class  DocSection:
 363
 364     def  __init__( self, name = "Other" ):
 365         self.name        = name
 366         self.blocks      = {}
 367         self.block_names = []  # ordered block names in section
 368         self.defs        = []
 369         self.abstract    = ""
 370         self.description = ""
 371         self.order       = []
 372         self.title       = "ERROR"
 373         self.chapter     = None
 374
 375     def  add_def( self, block ):
 376         self.defs.append( block )
 377
 378     def  add_block( self, block ):
 379         self.block_names.append( block.name )
 380         self.blocks[block.name] = block
 381
 382     def  process( self ):
 383         # look up one block that contains a valid section description
 384         for block in self.defs:
 385             title = block.get_markup_text( "title" )
 386             if title:
 387                 self.title       = title
 388                 self.abstract    = block.get_markup_words( "abstract" )
 389                 self.description = block.get_markup_items( "description" )
 390                 self.order       = block.get_markup_words_all( "order" )
 391                 return
 392
 393     def  reorder( self ):
 394         self.block_names = sort_order_list( self.block_names, self.order )
 395
 396
 397 ################################################################
 398 ##
 399 ##  CONTENT PROCESSOR CLASS
 400 ##
 401 class  ContentProcessor:
 402
 403     def  __init__( self ):
 404         """Initialize a block content processor."""
 405         self.reset()
 406
 407         self.sections = {}    # dictionary of documentation sections
 408         self.section  = None  # current documentation section
 409
 410         self.chapters = []    # list of chapters
 411
 412         self.headers  = {}    # dictionary of header macros
 413
 414     def  set_section( self, section_name ):
 415         """Set current section during parsing."""
 416         if not section_name in self.sections:
 417             section = DocSection( section_name )
 418             self.sections[section_name] = section
 419             self.section                = section
 420         else:
 421             self.section = self.sections[section_name]
 422
 423     def  add_chapter( self, block ):
 424         chapter = DocChapter( block )
 425         self.chapters.append( chapter )
 426
 427     def  reset( self ):
 428         """Reset the content processor for a new block."""
 429         self.markups      = []
 430         self.markup       = None
 431         self.markup_lines = []
 432
 433     def  add_markup( self ):
 434         """Add a new markup section."""
 435         if self.markup and self.markup_lines:
 436
 437             # get rid of last line of markup if it's empty
 438             marks = self.markup_lines
 439             if len( marks ) > 0 and not string.strip( marks[-1] ):
 440                 self.markup_lines = marks[:-1]
 441
 442             m = DocMarkup( self.markup, self.markup_lines )
 443
 444             self.markups.append( m )
 445
 446             self.markup       = None
 447             self.markup_lines = []
 448
 449     def  process_content( self, content ):
 450         """Process a block content and return a list of DocMarkup objects
 451            corresponding to it."""
 452         markup       = None
 453         markup_lines = []
 454         first        = 1
 455
 456         for line in content:
 457             found = None
 458             for t in re_markup_tags:
 459                 m = t.match( line )
 460                 if m:
 461                     found  = string.lower( m.group( 1 ) )
 462                     prefix = len( m.group( 0 ) )
 463                     line   = " " * prefix + line[prefix:]   # remove markup from line
 464                     break
 465
 466             # is it the start of a new markup section ?
 467             if found:
 468                 first = 0
 469                 self.add_markup()  # add current markup content
 470                 self.markup = found
 471                 if len( string.strip( line ) ) > 0:
 472                     self.markup_lines.append( line )
 473             elif first == 0:
 474                 self.markup_lines.append( line )
 475
 476         self.add_markup()
 477
 478         return self.markups
 479
 480     def  parse_sources( self, source_processor ):
 481         blocks = source_processor.blocks
 482         count  = len( blocks )
 483
 484         for n in range( count ):
 485             source = blocks[n]
 486             if source.content:
 487                 # this is a documentation comment, we need to catch
 488                 # all following normal blocks in the "follow" list
 489                 #
 490                 follow = []
 491                 m = n + 1
 492                 while m < count and not blocks[m].content:
 493                     follow.append( blocks[m] )
 494                     m = m + 1
 495
 496                 doc_block = DocBlock( source, follow, self )
 497
 498     def  finish( self ):
 499         # process all sections to extract their abstract, description
 500         # and ordered list of items
 501         #
 502         for sec in self.sections.values():
 503             sec.process()
 504
 505         # process chapters to check that all sections are correctly
 506         # listed there
 507         for chap in self.chapters:
 508             for sec in chap.order:
 509                 if sec in self.sections:
 510                     section = self.sections[sec]
 511                     section.chapter = chap
 512                     section.reorder()
 513                     chap.sections.append( section )
 514                 else:
 515                     sys.stderr.write( "WARNING: chapter '" +          \
 516                         chap.name + "' in " + chap.block.location() + \
 517                         " lists unknown section '" + sec + "'\n" )
 518
 519         # check that all sections are in a chapter
 520         #
 521         others = []
 522         for sec in self.sections.values():
 523             if not sec.chapter:
 524                 sec.reorder()
 525                 others.append( sec )
 526
 527         # create a new special chapter for all remaining sections
 528         # when necessary
 529         #
 530         if others:
 531             chap = DocChapter( None )
 532             chap.sections = others
 533             self.chapters.append( chap )
 534
 535
 536 ################################################################
 537 ##
 538 ##  DOC BLOCK CLASS
 539 ##
 540 class  DocBlock:
 541
 542     def  __init__( self, source, follow, processor ):
 543         processor.reset()
 544
 545         self.source  = source
 546         self.code    = []
 547         self.type    = "ERRTYPE"
 548         self.name    = "ERRNAME"
 549         self.section = processor.section
 550         self.markups = processor.process_content( source.content )
 551
 552         # compute block type from first markup tag
 553         try:
 554             self.type = self.markups[0].tag
 555         except:
 556             pass
 557
 558         # compute block name from first markup paragraph
 559         try:
 560             markup = self.markups[0]
 561             para   = markup.fields[0].items[0]
 562             name   = para.words[0]
 563             m = re_identifier.match( name )
 564             if m:
 565                 name = m.group( 1 )
 566             self.name = name
 567         except:
 568             pass
 569
 570         if self.type == "section":
 571             # detect new section starts
 572             processor.set_section( self.name )
 573             processor.section.add_def( self )
 574         elif self.type == "chapter":
 575             # detect new chapter
 576             processor.add_chapter( self )
 577         else:
 578             processor.section.add_block( self )
 579
 580         # now, compute the source lines relevant to this documentation
 581         # block. We keep normal comments in for obvious reasons (??)
 582         source = []
 583         for b in follow:
 584             if b.format:
 585                 break
 586             for l in b.lines:
 587                 # collect header macro definitions
 588                 m = re_header_macro.match( l )
 589                 if m:
 590                     processor.headers[m.group( 2 )] = m.group( 1 );
 591
 592                 # we use "/* */" as a separator
 593                 if re_source_sep.match( l ):
 594                     break
 595                 source.append( l )
 596
 597         # now strip the leading and trailing empty lines from the sources
 598         start = 0
 599         end   = len( source ) - 1
 600
 601         while start < end and not string.strip( source[start] ):
 602             start = start + 1
 603
 604         while start < end and not string.strip( source[end] ):
 605             end = end - 1
 606
 607         if start == end and not string.strip( source[start] ):
 608             self.code = []
 609         else:
 610             self.code = source[start:end + 1]
 611
 612     def  location( self ):
 613         return self.source.location()
 614
 615     def  get_markup( self, tag_name ):
 616         """Return the DocMarkup corresponding to a given tag in a block."""
 617         for m in self.markups:
 618             if m.tag == string.lower( tag_name ):
 619                 return m
 620         return None
 621
 622     def  get_markup_words( self, tag_name ):
 623         try:
 624             m = self.get_markup( tag_name )
 625             return m.fields[0].items[0].words
 626         except:
 627             return []
 628
 629     def  get_markup_words_all( self, tag_name ):
 630         try:
 631             m = self.get_markup( tag_name )
 632             words = []
 633             for item in m.fields[0].items:
 634                 # We honour empty lines in an `<Order>' section element by
 635                 # adding the sentinel `/empty/'.  The formatter should then
 636                 # convert it to an appropriate representation in the
 637                 # `section_enter' function.
 638                 words += item.words
 639                 words.append( "/empty/" )
 640             return words
 641         except:
 642             return []
 643
 644     def  get_markup_text( self, tag_name ):
 645         result = self.get_markup_words( tag_name )
 646         return string.join( result )
 647
 648     def  get_markup_items( self, tag_name ):
 649         try:
 650             m = self.get_markup( tag_name )
 651             return m.fields[0].items
 652         except:
 653             return None
 654
 655 # eof