3 # Copyright 2008, Google Inc.
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above
13 # copyright notice, this list of conditions and the following disclaimer
14 # in the documentation and/or other materials provided with the
16 # * Neither the name of Google Inc. nor the names of its
17 # contributors may be used to endorse or promote products derived from
18 # this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 """pump v0.2.0 - Pretty Useful for Meta Programming.
34 A tool for preprocessor meta programming. Useful for generating
35 repetitive boilerplate code. Especially useful for writing C++
36 classes, functions, macros, and templates that need to work with
37 various number of arguments.
44 Converts foo.cc.pump to foo.cc.
48 ATOMIC_CODE ::= $var ID = EXPRESSION
49 | $var ID = [[ CODE ]]
50 | $range ID EXPRESSION..EXPRESSION
51 | $for ID SEPARATOR [[ CODE ]]
55 | $if EXPRESSION [[ CODE ]] ELSE_BRANCH
58 SEPARATOR ::= RAW_CODE | EMPTY
59 ELSE_BRANCH ::= $else [[ CODE ]]
60 | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
62 EXPRESSION has Python syntax.
65 __author__ = 'wan@google.com (Zhanyong Wan)'
73 (re.compile(r'\$var\s+'), '$var'),
74 (re.compile(r'\$elif\s+'), '$elif'),
75 (re.compile(r'\$else\s+'), '$else'),
76 (re.compile(r'\$for\s+'), '$for'),
77 (re.compile(r'\$if\s+'), '$if'),
78 (re.compile(r'\$range\s+'), '$range'),
79 (re.compile(r'\$[_A-Za-z]\w*'), '$id'),
80 (re.compile(r'\$\(\$\)'), '$($)'),
81 (re.compile(r'\$'), '$'),
82 (re.compile(r'\[\[\n?'), '[['),
83 (re.compile(r'\]\]\n?'), ']]'),
88 """Represents a position (line and column) in a text file."""
90 def __init__(self, line=-1, column=-1):
94 def __eq__(self, rhs):
95 return self.line == rhs.line and self.column == rhs.column
97 def __ne__(self, rhs):
98 return not self == rhs
100 def __lt__(self, rhs):
101 return self.line < rhs.line or (
102 self.line == rhs.line and self.column < rhs.column)
104 def __le__(self, rhs):
105 return self < rhs or self == rhs
107 def __gt__(self, rhs):
110 def __ge__(self, rhs):
117 return '%s(%s)' % (self.line + 1, self.column)
119 def __add__(self, offset):
120 return Cursor(self.line, self.column + offset)
122 def __sub__(self, offset):
123 return Cursor(self.line, self.column - offset)
126 """Returns a copy of self."""
128 return Cursor(self.line, self.column)
131 # Special cursor to indicate the end-of-file.
133 """Returns the special cursor to denote the end-of-file."""
134 return Cursor(-1, -1)
138 """Represents a token in a Pump source file."""
140 def __init__(self, start=None, end=None, value=None, token_type=None):
150 self.token_type = token_type
153 return 'Token @%s: \'%s\' type=%s' % (
154 self.start, self.value, self.token_type)
157 """Returns a copy of self."""
159 return Token(self.start.Clone(), self.end.Clone(), self.value,
163 def StartsWith(lines, pos, string):
164 """Returns True iff the given position in lines starts with 'string'."""
166 return lines[pos.line][pos.column:].startswith(string)
169 def FindFirstInLine(line, token_table):
170 best_match_start = -1
171 for (regex, token_type) in token_table:
172 m = regex.search(line)
174 # We found regex in lines
175 if best_match_start < 0 or m.start() < best_match_start:
176 best_match_start = m.start()
177 best_match_length = m.end() - m.start()
178 best_match_token_type = token_type
180 if best_match_start < 0:
183 return (best_match_start, best_match_length, best_match_token_type)
186 def FindFirst(lines, token_table, cursor):
187 """Finds the first occurrence of any string in strings in lines."""
189 start = cursor.Clone()
190 cur_line_number = cursor.line
191 for line in lines[start.line:]:
192 if cur_line_number == start.line:
193 line = line[start.column:]
194 m = FindFirstInLine(line, token_table)
196 # We found a regex in line.
197 (start_column, length, token_type) = m
198 if cur_line_number == start.line:
199 start_column += start.column
200 found_start = Cursor(cur_line_number, start_column)
201 found_end = found_start + length
202 return MakeToken(lines, found_start, found_end, token_type)
204 # We failed to find str in lines
208 def SubString(lines, start, end):
209 """Returns a substring in lines."""
212 end = Cursor(len(lines) - 1, len(lines[-1]))
217 if start.line == end.line:
218 return lines[start.line][start.column:end.column]
220 result_lines = ([lines[start.line][start.column:]] +
221 lines[start.line + 1:end.line] +
222 [lines[end.line][:end.column]])
223 return ''.join(result_lines)
226 def StripMetaComments(str):
227 """Strip meta comments from each line in the given string."""
229 # First, completely remove lines containing nothing but a meta
230 # comment, including the trailing \n.
231 str = re.sub(r'^\s*\$\$.*\n', '', str)
233 # Then, remove meta comments from contentful lines.
234 return re.sub(r'\s*\$\$.*', '', str)
237 def MakeToken(lines, start, end, token_type):
238 """Creates a new instance of Token."""
240 return Token(start, end, SubString(lines, start, end), token_type)
243 def ParseToken(lines, pos, regex, token_type):
244 line = lines[pos.line][pos.column:]
245 m = regex.search(line)
246 if m and not m.start():
247 return MakeToken(lines, pos, pos + m.end(), token_type)
249 print 'ERROR: %s expected at %s.' % (token_type, pos)
253 ID_REGEX = re.compile(r'[_A-Za-z]\w*')
254 EQ_REGEX = re.compile(r'=')
255 REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)')
256 OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*')
257 WHITE_SPACE_REGEX = re.compile(r'\s')
258 DOT_DOT_REGEX = re.compile(r'\.\.')
261 def Skip(lines, pos, regex):
262 line = lines[pos.line][pos.column:]
263 m = re.search(regex, line)
264 if m and not m.start():
270 def SkipUntil(lines, pos, regex, token_type):
271 line = lines[pos.line][pos.column:]
272 m = re.search(regex, line)
274 return pos + m.start()
276 print ('ERROR: %s expected on line %s after column %s.' %
277 (token_type, pos.line + 1, pos.column))
281 def ParseExpTokenInParens(lines, pos):
282 def ParseInParens(pos):
283 pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX)
284 pos = Skip(lines, pos, r'\(')
286 pos = Skip(lines, pos, r'\)')
290 pos = SkipUntil(lines, pos, r'\(|\)', ')')
291 if SubString(lines, pos, pos + 1) == '(':
293 pos = Skip(lines, pos, r'\)')
299 pos = ParseInParens(pos)
300 return MakeToken(lines, start, pos, 'exp')
303 def RStripNewLineFromToken(token):
304 if token.value.endswith('\n'):
305 return Token(token.start, token.end, token.value[:-1], token.token_type)
310 def TokenizeLines(lines, pos):
312 found = FindFirst(lines, TOKEN_TABLE, pos)
314 yield MakeToken(lines, pos, Eof(), 'code')
317 if found.start == pos:
319 prev_token_rstripped = None
321 prev_token = MakeToken(lines, pos, found.start, 'code')
322 prev_token_rstripped = RStripNewLineFromToken(prev_token)
324 if found.token_type == '$var':
325 if prev_token_rstripped:
326 yield prev_token_rstripped
328 id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
330 pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
332 eq_token = ParseToken(lines, pos, EQ_REGEX, '=')
334 pos = Skip(lines, eq_token.end, r'\s*')
336 if SubString(lines, pos, pos + 2) != '[[':
337 exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp')
339 pos = Cursor(exp_token.end.line + 1, 0)
340 elif found.token_type == '$for':
341 if prev_token_rstripped:
342 yield prev_token_rstripped
344 id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
346 pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX)
347 elif found.token_type == '$range':
348 if prev_token_rstripped:
349 yield prev_token_rstripped
351 id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
353 pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
355 dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..')
356 yield MakeToken(lines, pos, dots_pos, 'exp')
357 yield MakeToken(lines, dots_pos, dots_pos + 2, '..')
359 new_pos = Cursor(pos.line + 1, 0)
360 yield MakeToken(lines, pos, new_pos, 'exp')
362 elif found.token_type == '$':
366 exp_token = ParseExpTokenInParens(lines, found.end)
369 elif (found.token_type == ']]' or found.token_type == '$if' or
370 found.token_type == '$elif' or found.token_type == '$else'):
371 if prev_token_rstripped:
372 yield prev_token_rstripped
383 """A generator that yields the tokens in the given string."""
385 lines = s.splitlines(True)
386 for token in TokenizeLines(lines, Cursor(0, 0)):
391 def __init__(self, atomic_code_list=None):
392 self.atomic_code = atomic_code_list
396 def __init__(self, identifier=None, atomic_code=None):
397 self.identifier = identifier
398 self.atomic_code = atomic_code
402 def __init__(self, identifier=None, exp1=None, exp2=None):
403 self.identifier = identifier
409 def __init__(self, identifier=None, sep=None, code=None):
410 self.identifier = identifier
416 def __init__(self, else_branch=None):
417 self.else_branch = else_branch
421 def __init__(self, exp=None, then_branch=None, else_branch=None):
423 self.then_branch = then_branch
424 self.else_branch = else_branch
428 def __init__(self, token=None):
429 self.raw_code = token
432 class LiteralDollarNode:
433 def __init__(self, token):
438 def __init__(self, token, python_exp):
440 self.python_exp = python_exp
443 def PopFront(a_list):
449 def PushFront(a_list, elem):
453 def PopToken(a_list, token_type=None):
454 token = PopFront(a_list)
455 if token_type is not None and token.token_type != token_type:
456 print 'ERROR: %s expected at %s' % (token_type, token.start)
457 print 'ERROR: %s found instead' % (token,)
463 def PeekToken(a_list):
470 def ParseExpNode(token):
471 python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value)
472 return ExpNode(token, python_exp)
475 def ParseElseNode(tokens):
476 def Pop(token_type=None):
477 return PopToken(tokens, token_type)
479 next = PeekToken(tokens)
482 if next.token_type == '$else':
485 code_node = ParseCodeNode(tokens)
488 elif next.token_type == '$elif':
492 code_node = ParseCodeNode(tokens)
494 inner_else_node = ParseElseNode(tokens)
495 return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)])
496 elif not next.value.strip():
498 return ParseElseNode(tokens)
503 def ParseAtomicCodeNode(tokens):
504 def Pop(token_type=None):
505 return PopToken(tokens, token_type)
507 head = PopFront(tokens)
510 return RawCodeNode(head)
514 next = PeekToken(tokens)
515 if next.token_type == 'exp':
517 return VarNode(id_token, ParseExpNode(exp_token))
519 code_node = ParseCodeNode(tokens)
521 return VarNode(id_token, code_node)
524 next_token = PeekToken(tokens)
525 if next_token.token_type == 'code':
526 sep_token = next_token
531 code_node = ParseCodeNode(tokens)
533 return ForNode(id_token, sep_token, code_node)
535 exp_token = Pop('code')
537 code_node = ParseCodeNode(tokens)
539 else_node = ParseElseNode(tokens)
540 return IfNode(ParseExpNode(exp_token), code_node, else_node)
543 exp1_token = Pop('exp')
545 exp2_token = Pop('exp')
546 return RangeNode(id_token, ParseExpNode(exp1_token),
547 ParseExpNode(exp2_token))
549 return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id'))
551 return LiteralDollarNode(head)
553 exp_token = Pop('exp')
554 return ParseExpNode(exp_token)
556 code_node = ParseCodeNode(tokens)
560 PushFront(tokens, head)
564 def ParseCodeNode(tokens):
565 atomic_code_list = []
569 atomic_code_node = ParseAtomicCodeNode(tokens)
571 atomic_code_list.append(atomic_code_node)
574 return CodeNode(atomic_code_list)
577 def ParseToAST(pump_src_text):
578 """Convert the given Pump source text into an AST."""
579 tokens = list(Tokenize(pump_src_text))
580 code_node = ParseCodeNode(tokens)
591 clone.variables = self.variables[:]
592 clone.ranges = self.ranges[:]
595 def PushVariable(self, var, value):
596 # If value looks like an int, store it as an int.
598 int_value = int(value)
599 if ('%s' % int_value) == value:
603 self.variables[:0] = [(var, value)]
605 def PopVariable(self):
606 self.variables[:1] = []
608 def PushRange(self, var, lower, upper):
609 self.ranges[:0] = [(var, lower, upper)]
614 def GetValue(self, identifier):
615 for (var, value) in self.variables:
616 if identifier == var:
619 print 'ERROR: meta variable %s is undefined.' % (identifier,)
622 def EvalExp(self, exp):
624 result = eval(exp.python_exp)
626 print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e)
627 print ('ERROR: failed to evaluate meta expression %s at %s' %
628 (exp.python_exp, exp.token.start))
632 def GetRange(self, identifier):
633 for (var, lower, upper) in self.ranges:
634 if identifier == var:
635 return (lower, upper)
637 print 'ERROR: range %s is undefined.' % (identifier,)
645 def GetLastLine(self):
646 index = self.string.rfind('\n')
650 return self.string[index + 1:]
656 def RunAtomicCode(env, node, output):
657 if isinstance(node, VarNode):
658 identifier = node.identifier.value.strip()
660 RunAtomicCode(env.Clone(), node.atomic_code, result)
661 value = result.string
662 env.PushVariable(identifier, value)
663 elif isinstance(node, RangeNode):
664 identifier = node.identifier.value.strip()
665 lower = int(env.EvalExp(node.exp1))
666 upper = int(env.EvalExp(node.exp2))
667 env.PushRange(identifier, lower, upper)
668 elif isinstance(node, ForNode):
669 identifier = node.identifier.value.strip()
674 (lower, upper) = env.GetRange(identifier)
675 for i in range(lower, upper + 1):
676 new_env = env.Clone()
677 new_env.PushVariable(identifier, i)
678 RunCode(new_env, node.code, output)
681 elif isinstance(node, RawCodeNode):
682 output.Append(node.raw_code.value)
683 elif isinstance(node, IfNode):
684 cond = env.EvalExp(node.exp)
686 RunCode(env.Clone(), node.then_branch, output)
687 elif node.else_branch is not None:
688 RunCode(env.Clone(), node.else_branch, output)
689 elif isinstance(node, ExpNode):
690 value = env.EvalExp(node)
691 output.Append('%s' % (value,))
692 elif isinstance(node, LiteralDollarNode):
694 elif isinstance(node, CodeNode):
695 RunCode(env.Clone(), node, output)
702 def RunCode(env, code_node, output):
703 for atomic_code in code_node.atomic_code:
704 RunAtomicCode(env, atomic_code, output)
707 def IsSingleLineComment(cur_line):
708 return '//' in cur_line
711 def IsInPreprocessorDirective(prev_lines, cur_line):
712 if cur_line.lstrip().startswith('#'):
714 return prev_lines and prev_lines[-1].endswith('\\')
717 def WrapComment(line, output):
718 loc = line.find('//')
719 before_comment = line[:loc].rstrip()
720 if before_comment == '':
723 output.append(before_comment)
724 indent = len(before_comment) - len(before_comment.lstrip())
725 prefix = indent*' ' + '// '
726 max_len = 80 - len(prefix)
727 comment = line[loc + 2:].strip()
728 segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != '']
731 if len((cur_line + seg).rstrip()) < max_len:
734 if cur_line.strip() != '':
735 output.append(prefix + cur_line.rstrip())
736 cur_line = seg.lstrip()
737 if cur_line.strip() != '':
738 output.append(prefix + cur_line.strip())
741 def WrapCode(line, line_concat, output):
742 indent = len(line) - len(line.lstrip())
743 prefix = indent*' ' # Prefix of the current line
744 max_len = 80 - indent - len(line_concat) # Maximum length of the current line
745 new_prefix = prefix + 4*' ' # Prefix of a continuation line
746 new_max_len = max_len - 4 # Maximum length of a continuation line
747 # Prefers to wrap a line after a ',' or ';'.
748 segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != '']
749 cur_line = '' # The current line without leading spaces.
751 # If the line is still too long, wrap at a space.
752 while cur_line == '' and len(seg.strip()) > max_len:
754 split_at = seg.rfind(' ', 0, max_len)
755 output.append(prefix + seg[:split_at].strip() + line_concat)
756 seg = seg[split_at + 1:]
758 max_len = new_max_len
760 if len((cur_line + seg).rstrip()) < max_len:
761 cur_line = (cur_line + seg).lstrip()
763 output.append(prefix + cur_line.rstrip() + line_concat)
765 max_len = new_max_len
766 cur_line = seg.lstrip()
767 if cur_line.strip() != '':
768 output.append(prefix + cur_line.strip())
771 def WrapPreprocessorDirective(line, output):
772 WrapCode(line, ' \\', output)
775 def WrapPlainCode(line, output):
776 WrapCode(line, '', output)
779 def IsMultiLineIWYUPragma(line):
780 return re.search(r'/\* IWYU pragma: ', line)
783 def IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
784 return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or
785 re.match(r'^#include\s', line) or
786 # Don't break IWYU pragmas, either; that causes iwyu.py problems.
787 re.search(r'// IWYU pragma: ', line))
790 def WrapLongLine(line, output):
794 elif IsSingleLineComment(line):
795 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
796 # The style guide made an exception to allow long header guard lines,
797 # includes and IWYU pragmas.
800 WrapComment(line, output)
801 elif IsInPreprocessorDirective(output, line):
802 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
803 # The style guide made an exception to allow long header guard lines,
804 # includes and IWYU pragmas.
807 WrapPreprocessorDirective(line, output)
808 elif IsMultiLineIWYUPragma(line):
811 WrapPlainCode(line, output)
814 def BeautifyCode(string):
815 lines = string.splitlines()
818 WrapLongLine(line, output)
819 output2 = [line.rstrip() for line in output]
820 return '\n'.join(output2) + '\n'
823 def ConvertFromPumpSource(src_text):
824 """Return the text generated from the given Pump source text."""
825 ast = ParseToAST(StripMetaComments(src_text))
827 RunCode(Env(), ast, output)
828 return BeautifyCode(output.string)
837 output_str = ConvertFromPumpSource(file(file_path, 'r').read())
838 if file_path.endswith('.pump'):
839 output_file_path = file_path[:-5]
841 output_file_path = '-'
842 if output_file_path == '-':
845 output_file = file(output_file_path, 'w')
846 output_file.write('// This file was GENERATED by command:\n')
847 output_file.write('// %s %s\n' %
848 (os.path.basename(__file__), os.path.basename(file_path)))
849 output_file.write('// DO NOT EDIT BY HAND!!!\n\n')
850 output_file.write(output_str)
854 if __name__ == '__main__':