4 # Convert source code comments to multi-line blocks (library file).
6 # Copyright 2002-2018 by
9 # This file is part of the FreeType project, and may only be used,
10 # modified, and distributed under the terms of the FreeType project
11 # license, LICENSE.TXT. By continuing to use, modify, or distribute
12 # this file you indicate that you have read the license and
13 # understand and accept it fully.
16 # This library file contains definitions of classes needed to decompose C
17 # source code files into a series of multi-line `blocks'. There are two
20 # - Normal blocks, which contain source code or ordinary comments.
22 # - Documentation blocks, which have restricted formatting, and whose text
23 # always start with a documentation markup tag like `<Function>',
26 # The routines to process the content of documentation blocks are contained
27 # in file `content.py'; the classes and methods found here only deal with
28 # text parsing and basic documentation block extraction.
32 import fileinput, re, sys, os, string
35 ################################################################
37 ## SOURCE BLOCK FORMAT CLASS
39 ## A simple class containing compiled regular expressions to detect
40 ## potential documentation format block comments within C source code.
42 ## The `column' pattern must contain a group to `unbox' the content of
43 ## documentation comment blocks.
45 ## Later on, paragraphs are converted to long lines, which simplifies the
46 ## regular expressions that act upon the text.
48 class SourceBlockFormat:
50 def __init__( self, id, start, column, end ):
51 """Create a block pattern, used to recognize special documentation
54 self.start = re.compile( start, re.VERBOSE )
55 self.column = re.compile( column, re.VERBOSE )
56 self.end = re.compile( end, re.VERBOSE )
60 # Format 1 documentation comment blocks.
62 # /************************************/ (at least 2 asterisks)
66 # /************************************/ (at least 2 asterisks)
69 \s* # any number of whitespace
70 /\*{2,}/ # followed by '/' and at least two asterisks then '/'
71 \s*$ # probably followed by whitespace
75 \s* # any number of whitespace
76 /\*{1} # followed by '/' and precisely one asterisk
77 ([^*].*) # followed by anything (group 1)
78 \*{1}/ # followed by one asterisk and a '/'
79 \s*$ # probably followed by whitespace
82 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
86 # Format 2 documentation comment blocks.
88 # /************************************ (at least 2 asterisks)
92 # */ (1 or more asterisks)
95 \s* # any number of whitespace
96 /\*{2,} # followed by '/' and at least two asterisks
97 \s*$ # probably followed by whitespace
101 \s* # any number of whitespace
102 \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
103 (.*) # then anything (group1)
107 \s* # any number of whitespace
108 \*+/ # followed by at least one asterisk, then '/'
111 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
115 # The list of supported documentation block formats. We could add new ones
118 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
122 # The following regular expressions correspond to markup tags within the
123 # documentation comment blocks. They are equivalent despite their different
126 # A markup tag consists of letters or character `-', to be found in group 1.
128 # Notice that a markup tag _must_ begin a new paragraph.
130 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' ) # <xxxx> format
131 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' ) # @xxxx: format
134 # The list of supported markup tags. We could add new ones quite easily.
136 re_markup_tags = [re_markup_tag1, re_markup_tag2]
140 # A regular expression to detect a cross reference, after markup tags have
143 # Two syntax forms are supported:
148 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
149 # and `-'. Use `<id>' if there are multiple, valid `<name>' entries.
153 re_crossref = re.compile( r"""
161 # Two regular expressions to detect italic and bold markup, respectively.
162 # Group 1 is the markup, group 2 the rest of the line.
164 # Note that the markup is limited to words consisting of letters, digits,
165 # the characters `_' and `-', or an apostrophe (but not as the first
168 re_italic = re.compile( r"_((?:\w|-)(?:\w|'|-)*)_(.*)" ) # _italic_
169 re_bold = re.compile( r"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" ) # *bold*
172 # This regular expression code to identify an URL has been taken from
174 # https://mail.python.org/pipermail/tutor/2002-September/017228.html
176 # (with slight modifications).
178 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
180 gunk = r'/#~:.?+=&%@!\-'
182 any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
187 \b # start at word boundary
188 %(urls)s : # need resource and a colon
189 [%(any)s] +? # followed by one or more of any valid
190 # character, but be conservative and
191 # take only what you need to...
192 (?= # [look-ahead non-consumptive assertion]
193 [%(punc)s]* # either 0 or more punctuation
194 (?: # [non-grouping parentheses]
195 [^%(any)s] | $ # followed by a non-url char
196 # or end of the string
200 """ % {'urls' : urls,
204 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
207 # A regular expression that stops collection of comments for the current
210 re_source_sep = re.compile( r'\s*/\*\s*\*/' ) # /* */
213 # A regular expression to find possible C identifiers while outputting
214 # source code verbatim, covering things like `*foo' or `(bar'. Group 1 is
215 # the prefix, group 2 the identifier -- since we scan lines from left to
216 # right, sequentially splitting the source code into prefix and identifier
217 # is fully sufficient for our purposes.
219 re_source_crossref = re.compile( r'(\W*)(\w*)' )
222 # A regular expression that matches a list of reserved C source keywords.
224 re_source_keywords = re.compile( '''\\b ( typedef |
243 \#endif ) \\b''', re.VERBOSE )
246 ################################################################
248 ## SOURCE BLOCK CLASS
250 ## There are two important fields in a `SourceBlock' object.
253 ## A list of text lines for the corresponding block.
256 ## For documentation comment blocks only, this is the block content
257 ## that has been `unboxed' from its decoration. This is `None' for all
258 ## other blocks (i.e., sources or ordinary comments with no starting
263 def __init__( self, processor, filename, lineno, lines ):
264 self.processor = processor
265 self.filename = filename
267 self.lines = lines[:]
268 self.format = processor.format
271 if self.format == None:
276 # extract comment lines
279 for line0 in self.lines:
280 m = self.format.column.match( line0 )
282 lines.append( m.group( 1 ) )
284 # now, look for a markup tag
286 l = string.strip( l )
288 for tag in re_markup_tags:
293 def location( self ):
294 return "(" + self.filename + ":" + repr( self.lineno ) + ")"
296 # debugging only -- not used in normal operations
299 print "{{{content start---"
300 for l in self.content:
302 print "---content end}}}"
307 fmt = repr( self.format.id ) + " "
309 for line in self.lines:
313 ################################################################
315 ## SOURCE PROCESSOR CLASS
317 ## The `SourceProcessor' is in charge of reading a C source file and
318 ## decomposing it into a series of different `SourceBlock' objects.
320 ## A SourceBlock object consists of the following data.
322 ## - A documentation comment block using one of the layouts above. Its
323 ## exact format will be discussed later.
325 ## - Normal sources lines, including comments.
328 class SourceProcessor:
330 def __init__( self ):
331 """Initialize a source processor."""
338 """Reset a block processor and clean up all its blocks."""
342 def parse_file( self, filename ):
343 """Parse a C source file and add its blocks to the processor's
347 self.filename = filename
354 for line in fileinput.input( filename ):
355 # strip trailing newlines, important on Windows machines!
356 if line[-1] == '\012':
359 if self.format == None:
360 self.process_normal_line( line )
362 if self.format.end.match( line ):
363 # A normal block end. Add it to `lines' and create a
365 self.lines.append( line )
366 self.add_block_lines()
367 elif self.format.column.match( line ):
368 # A normal column line. Add it to `lines'.
369 self.lines.append( line )
371 # An unexpected block end. Create a new block, but
372 # don't process the line.
373 self.add_block_lines()
375 # we need to process the line again
376 self.process_normal_line( line )
378 # record the last lines
379 self.add_block_lines()
381 def process_normal_line( self, line ):
382 """Process a normal line and check whether it is the start of a new
384 for f in re_source_block_formats:
385 if f.start.match( line ):
386 self.add_block_lines()
388 self.lineno = fileinput.filelineno()
390 self.lines.append( line )
392 def add_block_lines( self ):
393 """Add the current accumulated lines and create a new block."""
395 block = SourceBlock( self,
400 self.blocks.append( block )
404 # debugging only, not used in normal operations
406 """Print all blocks in a processor."""
407 for b in self.blocks: