src/tools/docmaker/content.py

   1 #  Content (c) 2002, 2004, 2006, 2007, 2008, 2009
   2 #    David Turner <david@freetype.org>
   3 #
   4 #  This file contains routines used to parse the content of documentation
   5 #  comment blocks and build more structured objects out of them.
   6 #
   7
   8 from sources import *
   9 from utils import *
  10 import string, re
  11
  12
  13 # this regular expression is used to detect code sequences. these
  14 # are simply code fragments embedded in '{' and '}' like in:
  15 #
  16 #  {
  17 #    x = y + z;
  18 #    if ( zookoo == 2 )
  19 #    {
  20 #      foobar();
  21 #    }
  22 #  }
  23 #
  24 # note that indentation of the starting and ending accolades must be
  25 # exactly the same. the code sequence can contain accolades at greater
  26 # indentation
  27 #
  28 re_code_start = re.compile( r"(\s*){\s*$" )
  29 re_code_end   = re.compile( r"(\s*)}\s*$" )
  30
  31
  32 # this regular expression is used to isolate identifiers from
  33 # other text
  34 #
  35 re_identifier = re.compile( r'(\w*)' )
  36
  37
  38 # we collect macros ending in `_H'; while outputting the object data, we use
  39 # this info together with the object's file location to emit the appropriate
  40 # header file macro and name before the object itself
  41 #
  42 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
  43
  44
  45 #############################################################################
  46 #
  47 # The DocCode class is used to store source code lines.
  48 #
  49 #   'self.lines' contains a set of source code lines that will be dumped as
  50 #   HTML in a <PRE> tag.
  51 #
  52 #   The object is filled line by line by the parser; it strips the leading
  53 #   "margin" space from each input line before storing it in 'self.lines'.
  54 #
  55 class  DocCode:
  56
  57     def  __init__( self, margin, lines ):
  58         self.lines = []
  59         self.words = None
  60
  61         # remove margin spaces
  62         for l in lines:
  63             if string.strip( l[:margin] ) == "":
  64                 l = l[margin:]
  65             self.lines.append( l )
  66
  67     def  dump( self, prefix = "", width = 60 ):
  68         lines = self.dump_lines( 0, width )
  69         for l in lines:
  70             print prefix + l
  71
  72     def  dump_lines( self, margin = 0, width = 60 ):
  73         result = []
  74         for l in self.lines:
  75             result.append( " " * margin + l )
  76         return result
  77
  78
  79
  80 #############################################################################
  81 #
  82 # The DocPara class is used to store "normal" text paragraph.
  83 #
  84 #   'self.words' contains the list of words that make up the paragraph
  85 #
  86 class  DocPara:
  87
  88     def  __init__( self, lines ):
  89         self.lines = None
  90         self.words = []
  91         for l in lines:
  92             l = string.strip( l )
  93             self.words.extend( string.split( l ) )
  94
  95     def  dump( self, prefix = "", width = 60 ):
  96         lines = self.dump_lines( 0, width )
  97         for l in lines:
  98             print prefix + l
  99
 100     def  dump_lines( self, margin = 0, width = 60 ):
 101         cur    = ""  # current line
 102         col    = 0   # current width
 103         result = []
 104
 105         for word in self.words:
 106             ln = len( word )
 107             if col > 0:
 108                 ln = ln + 1
 109
 110             if col + ln > width:
 111                 result.append( " " * margin + cur )
 112                 cur = word
 113                 col = len( word )
 114             else:
 115                 if col > 0:
 116                     cur = cur + " "
 117                 cur = cur + word
 118                 col = col + ln
 119
 120         if col > 0:
 121             result.append( " " * margin + cur )
 122
 123         return result
 124
 125
 126
 127 #############################################################################
 128 #
 129 #  The DocField class is used to store a list containing either DocPara or
 130 #  DocCode objects. Each DocField also has an optional "name" which is used
 131 #  when the object corresponds to a field or value definition
 132 #
 133 class  DocField:
 134
 135     def  __init__( self, name, lines ):
 136         self.name  = name  # can be None for normal paragraphs/sources
 137         self.items = []    # list of items
 138
 139         mode_none  = 0     # start parsing mode
 140         mode_code  = 1     # parsing code sequences
 141         mode_para  = 3     # parsing normal paragraph
 142
 143         margin     = -1    # current code sequence indentation
 144         cur_lines  = []
 145
 146         # now analyze the markup lines to see if they contain paragraphs,
 147         # code sequences or fields definitions
 148         #
 149         start = 0
 150         mode  = mode_none
 151
 152         for l in lines:
 153             # are we parsing a code sequence ?
 154             if mode == mode_code:
 155                 m = re_code_end.match( l )
 156                 if m and len( m.group( 1 ) ) <= margin:
 157                     # that's it, we finished the code sequence
 158                     code = DocCode( 0, cur_lines )
 159                     self.items.append( code )
 160                     margin    = -1
 161                     cur_lines = []
 162                     mode      = mode_none
 163                 else:
 164                     # nope, continue the code sequence
 165                     cur_lines.append( l[margin:] )
 166             else:
 167                 # start of code sequence ?
 168                 m = re_code_start.match( l )
 169                 if m:
 170                     # save current lines
 171                     if cur_lines:
 172                         para = DocPara( cur_lines )
 173                         self.items.append( para )
 174                         cur_lines = []
 175
 176                     # switch to code extraction mode
 177                     margin = len( m.group( 1 ) )
 178                     mode   = mode_code
 179                 else:
 180                     if not string.split( l ) and cur_lines:
 181                         # if the line is empty, we end the current paragraph,
 182                         # if any
 183                         para = DocPara( cur_lines )
 184                         self.items.append( para )
 185                         cur_lines = []
 186                     else:
 187                         # otherwise, simply add the line to the current
 188                         # paragraph
 189                         cur_lines.append( l )
 190
 191         if mode == mode_code:
 192             # unexpected end of code sequence
 193             code = DocCode( margin, cur_lines )
 194             self.items.append( code )
 195         elif cur_lines:
 196             para = DocPara( cur_lines )
 197             self.items.append( para )
 198
 199     def  dump( self, prefix = "" ):
 200         if self.field:
 201             print prefix + self.field + " ::"
 202             prefix = prefix + "----"
 203
 204         first = 1
 205         for p in self.items:
 206             if not first:
 207                 print ""
 208             p.dump( prefix )
 209             first = 0
 210
 211     def  dump_lines( self, margin = 0, width = 60 ):
 212         result = []
 213         nl     = None
 214
 215         for p in self.items:
 216             if nl:
 217                 result.append( "" )
 218
 219             result.extend( p.dump_lines( margin, width ) )
 220             nl = 1
 221
 222         return result
 223
 224
 225
 226 # this regular expression is used to detect field definitions
 227 #
 228 re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )
 229
 230
 231
 232 class  DocMarkup:
 233
 234     def  __init__( self, tag, lines ):
 235         self.tag    = string.lower( tag )
 236         self.fields = []
 237
 238         cur_lines = []
 239         field     = None
 240         mode      = 0
 241
 242         for l in lines:
 243             m = re_field.match( l )
 244             if m:
 245                 # we detected the start of a new field definition
 246
 247                 # first, save the current one
 248                 if cur_lines:
 249                     f = DocField( field, cur_lines )
 250                     self.fields.append( f )
 251                     cur_lines = []
 252                     field     = None
 253
 254                 field     = m.group( 1 )   # record field name
 255                 ln        = len( m.group( 0 ) )
 256                 l         = " " * ln + l[ln:]
 257                 cur_lines = [l]
 258             else:
 259                 cur_lines.append( l )
 260
 261         if field or cur_lines:
 262             f = DocField( field, cur_lines )
 263             self.fields.append( f )
 264
 265     def  get_name( self ):
 266         try:
 267             return self.fields[0].items[0].words[0]
 268         except:
 269             return None
 270
 271     def  get_start( self ):
 272         try:
 273             result = ""
 274             for word in self.fields[0].items[0].words:
 275                 result = result + " " + word
 276             return result[1:]
 277         except:
 278             return "ERROR"
 279
 280     def  dump( self, margin ):
 281         print " " * margin + "<" + self.tag + ">"
 282         for f in self.fields:
 283             f.dump( "  " )
 284         print " " * margin + "</" + self.tag + ">"
 285
 286
 287
 288 class  DocChapter:
 289
 290     def  __init__( self, block ):
 291         self.block    = block
 292         self.sections = []
 293         if block:
 294             self.name  = block.name
 295             self.title = block.get_markup_words( "title" )
 296             self.order = block.get_markup_words( "sections" )
 297         else:
 298             self.name  = "Other"
 299             self.title = string.split( "Miscellaneous" )
 300             self.order = []
 301
 302
 303
 304 class  DocSection:
 305
 306     def  __init__( self, name = "Other" ):
 307         self.name        = name
 308         self.blocks      = {}
 309         self.block_names = []  # ordered block names in section
 310         self.defs        = []
 311         self.abstract    = ""
 312         self.description = ""
 313         self.order       = []
 314         self.title       = "ERROR"
 315         self.chapter     = None
 316
 317     def  add_def( self, block ):
 318         self.defs.append( block )
 319
 320     def  add_block( self, block ):
 321         self.block_names.append( block.name )
 322         self.blocks[block.name] = block
 323
 324     def  process( self ):
 325         # look up one block that contains a valid section description
 326         for block in self.defs:
 327             title = block.get_markup_text( "title" )
 328             if title:
 329                 self.title       = title
 330                 self.abstract    = block.get_markup_words( "abstract" )
 331                 self.description = block.get_markup_items( "description" )
 332                 self.order       = block.get_markup_words( "order" )
 333                 return
 334
 335     def  reorder( self ):
 336         self.block_names = sort_order_list( self.block_names, self.order )
 337
 338
 339
 340 class  ContentProcessor:
 341
 342     def  __init__( self ):
 343         """initialize a block content processor"""
 344         self.reset()
 345
 346         self.sections = {}    # dictionary of documentation sections
 347         self.section  = None  # current documentation section
 348
 349         self.chapters = []    # list of chapters
 350
 351         self.headers  = {}    # dictionary of header macros
 352
 353     def  set_section( self, section_name ):
 354         """set current section during parsing"""
 355         if not self.sections.has_key( section_name ):
 356             section = DocSection( section_name )
 357             self.sections[section_name] = section
 358             self.section                = section
 359         else:
 360             self.section = self.sections[section_name]
 361
 362     def  add_chapter( self, block ):
 363         chapter = DocChapter( block )
 364         self.chapters.append( chapter )
 365
 366
 367     def  reset( self ):
 368         """reset the content processor for a new block"""
 369         self.markups      = []
 370         self.markup       = None
 371         self.markup_lines = []
 372
 373     def  add_markup( self ):
 374         """add a new markup section"""
 375         if self.markup and self.markup_lines:
 376
 377             # get rid of last line of markup if it's empty
 378             marks = self.markup_lines
 379             if len( marks ) > 0 and not string.strip( marks[-1] ):
 380                 self.markup_lines = marks[:-1]
 381
 382             m = DocMarkup( self.markup, self.markup_lines )
 383
 384             self.markups.append( m )
 385
 386             self.markup       = None
 387             self.markup_lines = []
 388
 389     def  process_content( self, content ):
 390         """process a block content and return a list of DocMarkup objects
 391            corresponding to it"""
 392         markup       = None
 393         markup_lines = []
 394         first        = 1
 395
 396         for line in content:
 397             found = None
 398             for t in re_markup_tags:
 399                 m = t.match( line )
 400                 if m:
 401                     found  = string.lower( m.group( 1 ) )
 402                     prefix = len( m.group( 0 ) )
 403                     line   = " " * prefix + line[prefix:]   # remove markup from line
 404                     break
 405
 406             # is it the start of a new markup section ?
 407             if found:
 408                 first = 0
 409                 self.add_markup()  # add current markup content
 410                 self.markup = found
 411                 if len( string.strip( line ) ) > 0:
 412                     self.markup_lines.append( line )
 413             elif first == 0:
 414                 self.markup_lines.append( line )
 415
 416         self.add_markup()
 417
 418         return self.markups
 419
 420     def  parse_sources( self, source_processor ):
 421         blocks = source_processor.blocks
 422         count  = len( blocks )
 423
 424         for n in range( count ):
 425             source = blocks[n]
 426             if source.content:
 427                 # this is a documentation comment, we need to catch
 428                 # all following normal blocks in the "follow" list
 429                 #
 430                 follow = []
 431                 m = n + 1
 432                 while m < count and not blocks[m].content:
 433                     follow.append( blocks[m] )
 434                     m = m + 1
 435
 436                 doc_block = DocBlock( source, follow, self )
 437
 438     def  finish( self ):
 439         # process all sections to extract their abstract, description
 440         # and ordered list of items
 441         #
 442         for sec in self.sections.values():
 443             sec.process()
 444
 445         # process chapters to check that all sections are correctly
 446         # listed there
 447         for chap in self.chapters:
 448             for sec in chap.order:
 449                 if self.sections.has_key( sec ):
 450                     section = self.sections[sec]
 451                     section.chapter = chap
 452                     section.reorder()
 453                     chap.sections.append( section )
 454                 else:
 455                     sys.stderr.write( "WARNING: chapter '" +          \
 456                         chap.name + "' in " + chap.block.location() + \
 457                         " lists unknown section '" + sec + "'\n" )
 458
 459         # check that all sections are in a chapter
 460         #
 461         others = []
 462         for sec in self.sections.values():
 463             if not sec.chapter:
 464                 others.append( sec )
 465
 466         # create a new special chapter for all remaining sections
 467         # when necessary
 468         #
 469         if others:
 470             chap = DocChapter( None )
 471             chap.sections = others
 472             self.chapters.append( chap )
 473
 474
 475
 476 class  DocBlock:
 477
 478     def  __init__( self, source, follow, processor ):
 479         processor.reset()
 480
 481         self.source  = source
 482         self.code    = []
 483         self.type    = "ERRTYPE"
 484         self.name    = "ERRNAME"
 485         self.section = processor.section
 486         self.markups = processor.process_content( source.content )
 487
 488         # compute block type from first markup tag
 489         try:
 490             self.type = self.markups[0].tag
 491         except:
 492             pass
 493
 494         # compute block name from first markup paragraph
 495         try:
 496             markup = self.markups[0]
 497             para   = markup.fields[0].items[0]
 498             name   = para.words[0]
 499             m = re_identifier.match( name )
 500             if m:
 501                 name = m.group( 1 )
 502             self.name = name
 503         except:
 504             pass
 505
 506         if self.type == "section":
 507             # detect new section starts
 508             processor.set_section( self.name )
 509             processor.section.add_def( self )
 510         elif self.type == "chapter":
 511             # detect new chapter
 512             processor.add_chapter( self )
 513         else:
 514             processor.section.add_block( self )
 515
 516         # now, compute the source lines relevant to this documentation
 517         # block. We keep normal comments in for obvious reasons (??)
 518         source = []
 519         for b in follow:
 520             if b.format:
 521                 break
 522             for l in b.lines:
 523                 # collect header macro definitions
 524                 m = re_header_macro.match( l )
 525                 if m:
 526                     processor.headers[m.group( 2 )] = m.group( 1 );
 527
 528                 # we use "/* */" as a separator
 529                 if re_source_sep.match( l ):
 530                     break
 531                 source.append( l )
 532
 533         # now strip the leading and trailing empty lines from the sources
 534         start = 0
 535         end   = len( source ) - 1
 536
 537         while start < end and not string.strip( source[start] ):
 538             start = start + 1
 539
 540         while start < end and not string.strip( source[end] ):
 541             end = end - 1
 542
 543         if start == end and not string.strip( source[start] ):
 544             self.code = []
 545         else:
 546             self.code = source[start:end + 1]
 547
 548     def  location( self ):
 549         return self.source.location()
 550
 551     def  get_markup( self, tag_name ):
 552         """return the DocMarkup corresponding to a given tag in a block"""
 553         for m in self.markups:
 554             if m.tag == string.lower( tag_name ):
 555                 return m
 556         return None
 557
 558     def  get_markup_name( self, tag_name ):
 559         """return the name of a given primary markup in a block"""
 560         try:
 561             m = self.get_markup( tag_name )
 562             return m.get_name()
 563         except:
 564             return None
 565
 566     def  get_markup_words( self, tag_name ):
 567         try:
 568             m = self.get_markup( tag_name )
 569             return m.fields[0].items[0].words
 570         except:
 571             return []
 572
 573     def  get_markup_text( self, tag_name ):
 574         result = self.get_markup_words( tag_name )
 575         return string.join( result )
 576
 577     def  get_markup_items( self, tag_name ):
 578         try:
 579             m = self.get_markup( tag_name )
 580             return m.fields[0].items
 581         except:
 582             return None
 583
 584 # eof