1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__ = "Guido van Rossum <guido@python.org>"
17 from StringIO import StringIO
19 HUGE = 0x7FFFFFFF # maximum repeat count, default max
22 def type_repr(type_num):
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
35 Abstract base class for Node and Leaf.
37 This provides some default functionality and boilerplate using the
40 A node may be a subnode of at most one parent.
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
55 def __eq__(self, other):
57 Compare two nodes for equality.
59 This calls the method _eq().
61 if self.__class__ is not other.__class__:
63 return self._eq(other)
65 __hash__ = None # For Py3 compatibility.
67 def __ne__(self, other):
69 Compare two nodes for inequality.
71 This calls the method _eq().
73 if self.__class__ is not other.__class__:
75 return not self._eq(other)
79 Compare two nodes for equality.
81 This is called by __eq__ and __ne__. It is only called if the two nodes
82 have the same type. This must be implemented by the concrete subclass.
83 Nodes should be considered equal if they have the same structure,
84 ignoring the prefix string and other context information.
86 raise NotImplementedError
90 Return a cloned (deep) copy of self.
92 This must be implemented by the concrete subclass.
94 raise NotImplementedError
98 Return a post-order iterator for the tree.
100 This must be implemented by the concrete subclass.
102 raise NotImplementedError
106 Return a pre-order iterator for the tree.
108 This must be implemented by the concrete subclass.
110 raise NotImplementedError
112 def set_prefix(self, prefix):
114 Set the prefix for the node (see Leaf class).
116 DEPRECATED; use the prefix property directly.
118 warnings.warn("set_prefix() is deprecated; use the prefix property",
119 DeprecationWarning, stacklevel=2)
122 def get_prefix(self):
124 Return the prefix for the node (see Leaf class).
126 DEPRECATED; use the prefix property directly.
128 warnings.warn("get_prefix() is deprecated; use the prefix property",
129 DeprecationWarning, stacklevel=2)
132 def replace(self, new):
133 """Replace this node with a new one in the parent."""
134 assert self.parent is not None, str(self)
135 assert new is not None
136 if not isinstance(new, list):
140 for ch in self.parent.children:
142 assert not found, (self.parent.children, self, new)
144 l_children.extend(new)
147 l_children.append(ch)
148 assert found, (self.children, self, new)
149 self.parent.changed()
150 self.parent.children = l_children
152 x.parent = self.parent
155 def get_lineno(self):
156 """Return the line number which generated the invocant node."""
158 while not isinstance(node, Leaf):
159 if not node.children:
161 node = node.children[0]
166 self.parent.changed()
167 self.was_changed = True
171 Remove the node from the tree. Returns the position of the node in its
172 parent's children before it was removed.
175 for i, node in enumerate(self.parent.children):
177 self.parent.changed()
178 del self.parent.children[i]
183 def next_sibling(self):
185 The node immediately following the invocant in their parent's children
186 list. If the invocant does not have a next sibling, it is None
188 if self.parent is None:
191 # Can't use index(); we need to test by identity
192 for i, child in enumerate(self.parent.children):
195 return self.parent.children[i+1]
200 def prev_sibling(self):
202 The node immediately preceding the invocant in their parent's children
203 list. If the invocant does not have a previous sibling, it is None.
205 if self.parent is None:
208 # Can't use index(); we need to test by identity
209 for i, child in enumerate(self.parent.children):
213 return self.parent.children[i-1]
216 for child in self.children:
217 for x in child.leaves():
221 if self.parent is None:
223 return 1 + self.parent.depth()
225 def get_suffix(self):
227 Return the string immediately following the invocant node. This is
228 effectively equivalent to node.next_sibling.prefix
230 next_sib = self.next_sibling
233 return next_sib.prefix
235 if sys.version_info < (3, 0):
237 return unicode(self).encode("ascii")
241 """Concrete implementation for interior nodes."""
243 def __init__(self,type, children,
246 fixers_applied=None):
250 Takes a type constant (a symbol number >= 256), a sequence of
251 child nodes, and an optional context keyword argument.
253 As a side effect, the parent pointers of the children are updated.
255 assert type >= 256, type
257 self.children = list(children)
258 for ch in self.children:
259 assert ch.parent is None, repr(ch)
261 if prefix is not None:
264 self.fixers_applied = fixers_applied[:]
266 self.fixers_applied = None
269 """Return a canonical string representation."""
270 return "%s(%s, %r)" % (self.__class__.__name__,
271 type_repr(self.type),
274 def __unicode__(self):
276 Return a pretty string representation.
278 This reproduces the input source exactly.
280 return u"".join(map(unicode, self.children))
282 if sys.version_info > (3, 0):
283 __str__ = __unicode__
285 def _eq(self, other):
286 """Compare two nodes for equality."""
287 return (self.type, self.children) == (other.type, other.children)
290 """Return a cloned (deep) copy of self."""
291 return Node(self.type, [ch.clone() for ch in self.children],
292 fixers_applied=self.fixers_applied)
294 def post_order(self):
295 """Return a post-order iterator for the tree."""
296 for child in self.children:
297 for node in child.post_order():
302 """Return a pre-order iterator for the tree."""
304 for child in self.children:
305 for node in child.pre_order():
308 def _prefix_getter(self):
310 The whitespace and comments preceding this node in the input.
312 if not self.children:
314 return self.children[0].prefix
316 def _prefix_setter(self, prefix):
318 self.children[0].prefix = prefix
320 prefix = property(_prefix_getter, _prefix_setter)
322 def set_child(self, i, child):
324 Equivalent to 'node.children[i] = child'. This method also sets the
325 child's parent attribute appropriately.
328 self.children[i].parent = None
329 self.children[i] = child
332 def insert_child(self, i, child):
334 Equivalent to 'node.children.insert(i, child)'. This method also sets
335 the child's parent attribute appropriately.
338 self.children.insert(i, child)
341 def append_child(self, child):
343 Equivalent to 'node.children.append(child)'. This method also sets the
344 child's parent attribute appropriately.
347 self.children.append(child)
353 """Concrete implementation for leaf nodes."""
355 # Default values for instance variables
356 _prefix = "" # Whitespace and comments preceding this token in the input
357 lineno = 0 # Line where this token starts in the input
358 column = 0 # Column where this token tarts in the input
360 def __init__(self, type, value,
367 Takes a type constant (a token number < 256), a string value, and an
368 optional context keyword argument.
370 assert 0 <= type < 256, type
371 if context is not None:
372 self._prefix, (self.lineno, self.column) = context
375 if prefix is not None:
376 self._prefix = prefix
377 self.fixers_applied = fixers_applied[:]
380 """Return a canonical string representation."""
381 return "%s(%r, %r)" % (self.__class__.__name__,
385 def __unicode__(self):
387 Return a pretty string representation.
389 This reproduces the input source exactly.
391 return self.prefix + unicode(self.value)
393 if sys.version_info > (3, 0):
394 __str__ = __unicode__
396 def _eq(self, other):
397 """Compare two nodes for equality."""
398 return (self.type, self.value) == (other.type, other.value)
401 """Return a cloned (deep) copy of self."""
402 return Leaf(self.type, self.value,
403 (self.prefix, (self.lineno, self.column)),
404 fixers_applied=self.fixers_applied)
409 def post_order(self):
410 """Return a post-order iterator for the tree."""
414 """Return a pre-order iterator for the tree."""
417 def _prefix_getter(self):
419 The whitespace and comments preceding this token in the input.
423 def _prefix_setter(self, prefix):
425 self._prefix = prefix
427 prefix = property(_prefix_getter, _prefix_setter)
429 def convert(gr, raw_node):
431 Convert raw node information to a Node or Leaf instance.
433 This is passed to the parser driver which calls it whenever a reduction of a
434 grammar rule produces a new complete node, so that the tree is build
437 type, value, context, children = raw_node
438 if children or type in gr.number2symbol:
439 # If there's exactly one child, return that child instead of
440 # creating a new node.
441 if len(children) == 1:
443 return Node(type, children, context=context)
445 return Leaf(type, value, context=context)
448 class BasePattern(object):
451 A pattern is a tree matching pattern.
453 It looks for a specific node type (token or symbol), and
454 optionally for a specific content.
456 This is an abstract base class. There are three concrete
459 - LeafPattern matches a single leaf node;
460 - NodePattern matches a single node (usually non-leaf);
461 - WildcardPattern matches a sequence of nodes of variable length.
464 # Defaults for instance variables
465 type = None # Node type (token if < 256, symbol if >= 256)
466 content = None # Optional content matching pattern
467 name = None # Optional name used to store match in results dict
469 def __new__(cls, *args, **kwds):
470 """Constructor that prevents BasePattern from being instantiated."""
471 assert cls is not BasePattern, "Cannot instantiate BasePattern"
472 return object.__new__(cls)
475 args = [type_repr(self.type), self.content, self.name]
476 while args and args[-1] is None:
478 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
482 A subclass can define this as a hook for optimizations.
484 Returns either self or another node with the same effect.
488 def match(self, node, results=None):
490 Does this pattern exactly match a node?
492 Returns True if it matches, False if not.
494 If results is not None, it must be a dict which will be
495 updated with the nodes matching named subpatterns.
497 Default implementation for non-wildcard patterns.
499 if self.type is not None and node.type != self.type:
501 if self.content is not None:
503 if results is not None:
505 if not self._submatch(node, r):
509 if results is not None and self.name:
510 results[self.name] = node
513 def match_seq(self, nodes, results=None):
515 Does this pattern exactly match a sequence of nodes?
517 Default implementation for non-wildcard patterns.
521 return self.match(nodes[0], results)
523 def generate_matches(self, nodes):
525 Generator yielding all matches for this pattern.
527 Default implementation for non-wildcard patterns.
530 if nodes and self.match(nodes[0], r):
534 class LeafPattern(BasePattern):
536 def __init__(self, type=None, content=None, name=None):
538 Initializer. Takes optional type, content, and name.
540 The type, if given must be a token type (< 256). If not given,
541 this matches any *leaf* node; the content may still be required.
543 The content, if given, must be a string.
545 If a name is given, the matching node is stored in the results
549 assert 0 <= type < 256, type
550 if content is not None:
551 assert isinstance(content, basestring), repr(content)
553 self.content = content
556 def match(self, node, results=None):
557 """Override match() to insist on a leaf node."""
558 if not isinstance(node, Leaf):
560 return BasePattern.match(self, node, results)
562 def _submatch(self, node, results=None):
564 Match the pattern's content to the node's children.
566 This assumes the node type matches and self.content is not None.
568 Returns True if it matches, False if not.
570 If results is not None, it must be a dict which will be
571 updated with the nodes matching named subpatterns.
573 When returning False, the results dict may still be updated.
575 return self.content == node.value
578 class NodePattern(BasePattern):
582 def __init__(self, type=None, content=None, name=None):
584 Initializer. Takes optional type, content, and name.
586 The type, if given, must be a symbol type (>= 256). If the
587 type is None this matches *any* single node (leaf or not),
588 except if content is not None, in which it only matches
589 non-leaf nodes that also match the content pattern.
591 The content, if not None, must be a sequence of Patterns that
592 must match the node's children exactly. If the content is
593 given, the type must not be None.
595 If a name is given, the matching node is stored in the results
599 assert type >= 256, type
600 if content is not None:
601 assert not isinstance(content, basestring), repr(content)
602 content = list(content)
603 for i, item in enumerate(content):
604 assert isinstance(item, BasePattern), (i, item)
605 if isinstance(item, WildcardPattern):
606 self.wildcards = True
608 self.content = content
611 def _submatch(self, node, results=None):
613 Match the pattern's content to the node's children.
615 This assumes the node type matches and self.content is not None.
617 Returns True if it matches, False if not.
619 If results is not None, it must be a dict which will be
620 updated with the nodes matching named subpatterns.
622 When returning False, the results dict may still be updated.
625 for c, r in generate_matches(self.content, node.children):
626 if c == len(node.children):
627 if results is not None:
631 if len(self.content) != len(node.children):
633 for subpattern, child in zip(self.content, node.children):
634 if not subpattern.match(child, results):
639 class WildcardPattern(BasePattern):
642 A wildcard pattern can match zero or more nodes.
644 This has all the flexibility needed to implement patterns like:
648 (...)* (...)+ (...)? (...){m,n}
650 except it always uses non-greedy matching.
653 def __init__(self, content=None, min=0, max=HUGE, name=None):
658 content: optional sequence of subsequences of patterns;
659 if absent, matches one node;
660 if present, each subsequence is an alternative [*]
661 min: optinal minumum number of times to match, default 0
662 max: optional maximum number of times tro match, default HUGE
663 name: optional name assigned to this match
665 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
666 equivalent to (a b c | d e | f g h); if content is None,
667 this is equivalent to '.' in regular expression terms.
668 The min and max parameters work as follows:
669 min=0, max=maxint: .*
670 min=1, max=maxint: .+
673 If content is not None, replace the dot with the parenthesized
674 list of alternatives, e.g. (a b c | d e | f g h)*
676 assert 0 <= min <= max <= HUGE, (min, max)
677 if content is not None:
678 content = tuple(map(tuple, content)) # Protect against alterations
679 # Check sanity of alternatives
680 assert len(content), repr(content) # Can't have zero alternatives
682 assert len(alt), repr(alt) # Can have empty alternatives
683 self.content = content
689 """Optimize certain stacked wildcard patterns."""
691 if (self.content is not None and
692 len(self.content) == 1 and len(self.content[0]) == 1):
693 subpattern = self.content[0][0]
694 if self.min == 1 and self.max == 1:
695 if self.content is None:
696 return NodePattern(name=self.name)
697 if subpattern is not None and self.name == subpattern.name:
698 return subpattern.optimize()
699 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
700 subpattern.min <= 1 and self.name == subpattern.name):
701 return WildcardPattern(subpattern.content,
702 self.min*subpattern.min,
703 self.max*subpattern.max,
707 def match(self, node, results=None):
708 """Does this pattern exactly match a node?"""
709 return self.match_seq([node], results)
711 def match_seq(self, nodes, results=None):
712 """Does this pattern exactly match a sequence of nodes?"""
713 for c, r in self.generate_matches(nodes):
715 if results is not None:
718 results[self.name] = list(nodes)
722 def generate_matches(self, nodes):
724 Generator yielding matches for a sequence of nodes.
727 nodes: sequence of nodes
730 (count, results) tuples where:
731 count: the match comprises nodes[:count];
732 results: dict containing named submatches.
734 if self.content is None:
735 # Shortcut for special case (see __init__.__doc__)
736 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
739 r[self.name] = nodes[:count]
741 elif self.name == "bare_name":
742 yield self._bare_name_matches(nodes)
744 # The reason for this is that hitting the recursion limit usually
745 # results in some ugly messages about how RuntimeErrors are being
747 save_stderr = sys.stderr
748 sys.stderr = StringIO()
750 for count, r in self._recursive_matches(nodes, 0):
752 r[self.name] = nodes[:count]
755 # We fall back to the iterative pattern matching scheme if the recursive
756 # scheme hits the recursion limit.
757 for count, r in self._iterative_matches(nodes):
759 r[self.name] = nodes[:count]
762 sys.stderr = save_stderr
764 def _iterative_matches(self, nodes):
765 """Helper to iteratively yield the matches."""
771 # generate matches that use just one alt from self.content
772 for alt in self.content:
773 for c, r in generate_matches(alt, nodes):
775 results.append((c, r))
777 # for each match, iterate down the nodes
780 for c0, r0 in results:
781 # stop if the entire set of nodes has been matched
782 if c0 < nodelen and c0 <= self.max:
783 for alt in self.content:
784 for c1, r1 in generate_matches(alt, nodes[c0:]):
790 new_results.append((c0 + c1, r))
791 results = new_results
793 def _bare_name_matches(self, nodes):
794 """Special optimized matcher for bare_name."""
799 while not done and count < max:
801 for leaf in self.content:
802 if leaf[0].match(nodes[count], r):
806 r[self.name] = nodes[:count]
809 def _recursive_matches(self, nodes, count):
810 """Helper to recursively yield the matches."""
811 assert self.content is not None
812 if count >= self.min:
815 for alt in self.content:
816 for c0, r0 in generate_matches(alt, nodes):
817 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
824 class NegatedPattern(BasePattern):
826 def __init__(self, content=None):
830 The argument is either a pattern or None. If it is None, this
831 only matches an empty sequence (effectively '$' in regex
832 lingo). If it is not None, this matches whenever the argument
833 pattern doesn't have any matches.
835 if content is not None:
836 assert isinstance(content, BasePattern), repr(content)
837 self.content = content
839 def match(self, node):
840 # We never match a node in its entirety
843 def match_seq(self, nodes):
844 # We only match an empty sequence of nodes in its entirety
845 return len(nodes) == 0
847 def generate_matches(self, nodes):
848 if self.content is None:
849 # Return a match if there is an empty sequence
853 # Return a match if the argument pattern has no matches
854 for c, r in self.content.generate_matches(nodes):
859 def generate_matches(patterns, nodes):
861 Generator yielding matches for a sequence of patterns and nodes.
864 patterns: a sequence of patterns
865 nodes: a sequence of nodes
868 (count, results) tuples where:
869 count: the entire sequence of patterns matches nodes[:count];
870 results: dict containing named submatches.
875 p, rest = patterns[0], patterns[1:]
876 for c0, r0 in p.generate_matches(nodes):
880 for c1, r1 in generate_matches(rest, nodes[c0:]):