84fee5b2410e7fef1cb9f3dde8c6b7c5c0a75eeb
[profile/ivi/python.git] / Lib / lib2to3 / patcomp.py
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 """Pattern compiler.
5
6 The grammer is taken from PatternGrammar.txt.
7
8 The compiler compiles a pattern to a pytree.*Pattern instance.
9 """
10
11 __author__ = "Guido van Rossum <guido@python.org>"
12
13 # Python imports
14 import os
15
16 # Fairly local imports
17 from .pgen2 import driver, literals, token, tokenize, parse, grammar
18
19 # Really local imports
20 from . import pytree
21 from . import pygram
22
23 # The pattern grammar file
24 _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
25                                      "PatternGrammar.txt")
26
27
28 class PatternSyntaxError(Exception):
29     pass
30
31
32 def tokenize_wrapper(input):
33     """Tokenizes a string suppressing significant whitespace."""
34     skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
35     tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
36     for quintuple in tokens:
37         type, value, start, end, line_text = quintuple
38         if type not in skip:
39             yield quintuple
40
41
42 class PatternCompiler(object):
43
44     def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
45         """Initializer.
46
47         Takes an optional alternative filename for the pattern grammar.
48         """
49         self.grammar = driver.load_grammar(grammar_file)
50         self.syms = pygram.Symbols(self.grammar)
51         self.pygrammar = pygram.python_grammar
52         self.pysyms = pygram.python_symbols
53         self.driver = driver.Driver(self.grammar, convert=pattern_convert)
54
55     def compile_pattern(self, input, debug=False, with_tree=False):
56         """Compiles a pattern string to a nested pytree.*Pattern object."""
57         tokens = tokenize_wrapper(input)
58         try:
59             root = self.driver.parse_tokens(tokens, debug=debug)
60         except parse.ParseError as e:
61             raise PatternSyntaxError(str(e))
62         if with_tree:
63             return self.compile_node(root), root
64         else:
65             return self.compile_node(root)
66
67     def compile_node(self, node):
68         """Compiles a node, recursively.
69
70         This is one big switch on the node type.
71         """
72         # XXX Optimize certain Wildcard-containing-Wildcard patterns
73         # that can be merged
74         if node.type == self.syms.Matcher:
75             node = node.children[0] # Avoid unneeded recursion
76
77         if node.type == self.syms.Alternatives:
78             # Skip the odd children since they are just '|' tokens
79             alts = [self.compile_node(ch) for ch in node.children[::2]]
80             if len(alts) == 1:
81                 return alts[0]
82             p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
83             return p.optimize()
84
85         if node.type == self.syms.Alternative:
86             units = [self.compile_node(ch) for ch in node.children]
87             if len(units) == 1:
88                 return units[0]
89             p = pytree.WildcardPattern([units], min=1, max=1)
90             return p.optimize()
91
92         if node.type == self.syms.NegatedUnit:
93             pattern = self.compile_basic(node.children[1:])
94             p = pytree.NegatedPattern(pattern)
95             return p.optimize()
96
97         assert node.type == self.syms.Unit
98
99         name = None
100         nodes = node.children
101         if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
102             name = nodes[0].value
103             nodes = nodes[2:]
104         repeat = None
105         if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
106             repeat = nodes[-1]
107             nodes = nodes[:-1]
108
109         # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
110         pattern = self.compile_basic(nodes, repeat)
111
112         if repeat is not None:
113             assert repeat.type == self.syms.Repeater
114             children = repeat.children
115             child = children[0]
116             if child.type == token.STAR:
117                 min = 0
118                 max = pytree.HUGE
119             elif child.type == token.PLUS:
120                 min = 1
121                 max = pytree.HUGE
122             elif child.type == token.LBRACE:
123                 assert children[-1].type == token.RBRACE
124                 assert  len(children) in (3, 5)
125                 min = max = self.get_int(children[1])
126                 if len(children) == 5:
127                     max = self.get_int(children[3])
128             else:
129                 assert False
130             if min != 1 or max != 1:
131                 pattern = pattern.optimize()
132                 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
133
134         if name is not None:
135             pattern.name = name
136         return pattern.optimize()
137
138     def compile_basic(self, nodes, repeat=None):
139         # Compile STRING | NAME [Details] | (...) | [...]
140         assert len(nodes) >= 1
141         node = nodes[0]
142         if node.type == token.STRING:
143             value = unicode(literals.evalString(node.value))
144             return pytree.LeafPattern(_type_of_literal(value), value)
145         elif node.type == token.NAME:
146             value = node.value
147             if value.isupper():
148                 if value not in TOKEN_MAP:
149                     raise PatternSyntaxError("Invalid token: %r" % value)
150                 if nodes[1:]:
151                     raise PatternSyntaxError("Can't have details for token")
152                 return pytree.LeafPattern(TOKEN_MAP[value])
153             else:
154                 if value == "any":
155                     type = None
156                 elif not value.startswith("_"):
157                     type = getattr(self.pysyms, value, None)
158                     if type is None:
159                         raise PatternSyntaxError("Invalid symbol: %r" % value)
160                 if nodes[1:]: # Details present
161                     content = [self.compile_node(nodes[1].children[1])]
162                 else:
163                     content = None
164                 return pytree.NodePattern(type, content)
165         elif node.value == "(":
166             return self.compile_node(nodes[1])
167         elif node.value == "[":
168             assert repeat is None
169             subpattern = self.compile_node(nodes[1])
170             return pytree.WildcardPattern([[subpattern]], min=0, max=1)
171         assert False, node
172
173     def get_int(self, node):
174         assert node.type == token.NUMBER
175         return int(node.value)
176
177
178 # Map named tokens to the type value for a LeafPattern
179 TOKEN_MAP = {"NAME": token.NAME,
180              "STRING": token.STRING,
181              "NUMBER": token.NUMBER,
182              "TOKEN": None}
183
184
185 def _type_of_literal(value):
186     if value[0].isalpha():
187         return token.NAME
188     elif value in grammar.opmap:
189         return grammar.opmap[value]
190     else:
191         return None
192
193
194 def pattern_convert(grammar, raw_node_info):
195     """Converts raw node information to a Node or Leaf instance."""
196     type, value, context, children = raw_node_info
197     if children or type in grammar.number2symbol:
198         return pytree.Node(type, children, context=context)
199     else:
200         return pytree.Leaf(type, value, context=context)
201
202
203 def compile_pattern(pattern):
204     return PatternCompiler().compile_pattern(pattern)