4 # Convert source code comments to multi-line blocks (library file).
6 # Copyright 2002-2004, 2006-2009, 2012-2014 by
9 # This file is part of the FreeType project, and may only be used,
10 # modified, and distributed under the terms of the FreeType project
11 # license, LICENSE.TXT. By continuing to use, modify, or distribute
12 # this file you indicate that you have read the license and
13 # understand and accept it fully.
16 # This library file contains definitions of classes needed to decompose C
17 # source code files into a series of multi-line `blocks'. There are two
20 # - Normal blocks, which contain source code or ordinary comments.
22 # - Documentation blocks, which have restricted formatting, and whose text
23 # always start with a documentation markup tag like `<Function>',
26 # The routines to process the content of documentation blocks are contained
27 # in file `content.py'; the classes and methods found here only deal with
28 # text parsing and basic documentation block extraction.
32 import fileinput, re, sys, os, string
35 ################################################################
37 ## SOURCE BLOCK FORMAT CLASS
39 ## A simple class containing compiled regular expressions to detect
40 ## potential documentation format block comments within C source code.
42 ## The `column' pattern must contain a group to `unbox' the content of
43 ## documentation comment blocks.
45 ## Later on, paragraphs are converted to long lines, which simplifies the
46 ## regular expressions that act upon the text.
48 class SourceBlockFormat:
50 def __init__( self, id, start, column, end ):
51 """Create a block pattern, used to recognize special documentation
54 self.start = re.compile( start, re.VERBOSE )
55 self.column = re.compile( column, re.VERBOSE )
56 self.end = re.compile( end, re.VERBOSE )
60 # Format 1 documentation comment blocks.
62 # /************************************/ (at least 2 asterisks)
66 # /************************************/ (at least 2 asterisks)
69 \s* # any number of whitespace
70 /\*{2,}/ # followed by '/' and at least two asterisks then '/'
71 \s*$ # probably followed by whitespace
75 \s* # any number of whitespace
76 /\*{1} # followed by '/' and precisely one asterisk
77 ([^*].*) # followed by anything (group 1)
78 \*{1}/ # followed by one asterisk and a '/'
79 \s*$ # probably followed by whitespace
82 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
86 # Format 2 documentation comment blocks.
88 # /************************************ (at least 2 asterisks)
92 # */ (1 or more asterisks)
95 \s* # any number of whitespace
96 /\*{2,} # followed by '/' and at least two asterisks
97 \s*$ # probably followed by whitespace
101 \s* # any number of whitespace
102 \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
103 (.*) # then anything (group1)
107 \s* # any number of whitespace
108 \*+/ # followed by at least one asterisk, then '/'
111 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
115 # The list of supported documentation block formats. We could add new ones
118 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
122 # The following regular expressions correspond to markup tags within the
123 # documentation comment blocks. They are equivalent despite their different
126 # A markup tag consists of letters or character `-', to be found in group 1.
128 # Notice that a markup tag _must_ begin a new paragraph.
130 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' ) # <xxxx> format
131 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' ) # @xxxx: format
134 # The list of supported markup tags. We could add new ones quite easily.
136 re_markup_tags = [re_markup_tag1, re_markup_tag2]
140 # A regular expression to detect a cross reference, after markup tags have
141 # been stripped off. Group 1 is the reference, group 2 the rest of the
144 # A cross reference consists of letters, digits, or characters `-' and `_'.
146 re_crossref = re.compile( r'@((?:\w|-)*)(.*)' ) # @foo
149 # Two regular expressions to detect italic and bold markup, respectively.
150 # Group 1 is the markup, group 2 the rest of the line.
152 # Note that the markup is limited to words consisting of letters, digits,
153 # the character `_', or an apostrophe (but not as the first character).
155 re_italic = re.compile( r"_(\w(?:\w|')*)_(.*)" ) # _italic_
156 re_bold = re.compile( r"\*(\w(?:\w|')*)\*(.*)" ) # *bold*
159 # This regular expression code to identify an URL has been taken from
161 # http://mail.python.org/pipermail/tutor/2002-September/017228.html
163 # (with slight modifications).
165 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
167 gunk = r'/#~:.?+=&%@!\-'
169 any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
174 \b # start at word boundary
175 %(urls)s : # need resource and a colon
176 [%(any)s] +? # followed by one or more of any valid
177 # character, but be conservative and
178 # take only what you need to...
179 (?= # [look-ahead non-consumptive assertion]
180 [%(punc)s]* # either 0 or more punctuation
181 (?: # [non-grouping parentheses]
182 [^%(any)s] | $ # followed by a non-url char
183 # or end of the string
187 """ % {'urls' : urls,
191 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
194 # A regular expression that stops collection of comments for the current
197 re_source_sep = re.compile( r'\s*/\*\s*\*/' ) # /* */
200 # A regular expression to find possible C identifiers while outputting
201 # source code verbatim, covering things like `*foo' or `(bar'. Group 1 is
202 # the prefix, group 2 the identifier -- since we scan lines from left to
203 # right, sequentially splitting the source code into prefix and identifier
204 # is fully sufficient for our purposes.
206 re_source_crossref = re.compile( r'(\W*)(\w*)' )
209 # A regular expression that matches a list of reserved C source keywords.
211 re_source_keywords = re.compile( '''\\b ( typedef |
230 \#endif ) \\b''', re.VERBOSE )
233 ################################################################
235 ## SOURCE BLOCK CLASS
237 ## There are two important fields in a `SourceBlock' object.
240 ## A list of text lines for the corresponding block.
243 ## For documentation comment blocks only, this is the block content
244 ## that has been `unboxed' from its decoration. This is `None' for all
245 ## other blocks (i.e., sources or ordinary comments with no starting
250 def __init__( self, processor, filename, lineno, lines ):
251 self.processor = processor
252 self.filename = filename
254 self.lines = lines[:]
255 self.format = processor.format
258 if self.format == None:
263 # extract comment lines
266 for line0 in self.lines:
267 m = self.format.column.match( line0 )
269 lines.append( m.group( 1 ) )
271 # now, look for a markup tag
273 l = string.strip( l )
275 for tag in re_markup_tags:
280 def location( self ):
281 return "(" + self.filename + ":" + repr( self.lineno ) + ")"
283 # debugging only -- not used in normal operations
286 print "{{{content start---"
287 for l in self.content:
289 print "---content end}}}"
294 fmt = repr( self.format.id ) + " "
296 for line in self.lines:
300 ################################################################
302 ## SOURCE PROCESSOR CLASS
304 ## The `SourceProcessor' is in charge of reading a C source file and
305 ## decomposing it into a series of different `SourceBlock' objects.
307 ## A SourceBlock object consists of the following data.
309 ## - A documentation comment block using one of the layouts above. Its
310 ## exact format will be discussed later.
312 ## - Normal sources lines, including comments.
315 class SourceProcessor:
317 def __init__( self ):
318 """Initialize a source processor."""
325 """Reset a block processor and clean up all its blocks."""
329 def parse_file( self, filename ):
330 """Parse a C source file and add its blocks to the processor's
334 self.filename = filename
341 for line in fileinput.input( filename ):
342 # strip trailing newlines, important on Windows machines!
343 if line[-1] == '\012':
346 if self.format == None:
347 self.process_normal_line( line )
349 if self.format.end.match( line ):
350 # A normal block end. Add it to `lines' and create a
352 self.lines.append( line )
353 self.add_block_lines()
354 elif self.format.column.match( line ):
355 # A normal column line. Add it to `lines'.
356 self.lines.append( line )
358 # An unexpected block end. Create a new block, but
359 # don't process the line.
360 self.add_block_lines()
362 # we need to process the line again
363 self.process_normal_line( line )
365 # record the last lines
366 self.add_block_lines()
368 def process_normal_line( self, line ):
369 """Process a normal line and check whether it is the start of a new
371 for f in re_source_block_formats:
372 if f.start.match( line ):
373 self.add_block_lines()
375 self.lineno = fileinput.filelineno()
377 self.lines.append( line )
379 def add_block_lines( self ):
380 """Add the current accumulated lines and create a new block."""
382 block = SourceBlock( self,
387 self.blocks.append( block )
391 # debugging only, not used in normal operations
393 """Print all blocks in a processor."""
394 for b in self.blocks: