1 #=======================================================================
3 # Python Lexical Analyser
5 # Traditional Regular Expression Syntax
7 #=======================================================================
9 from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
10 from Errors import PlexError
12 class RegexpSyntaxError(PlexError):
17 Convert traditional string representation of regular expression |s|
18 into Plex representation.
20 return REParser(s).parse_re()
22 class REParser(object):
24 def __init__(self, s):
33 self.error("Unexpected %s" % repr(self.c))
37 """Parse a set of alternative regexps."""
43 re_list.append(self.parse_seq())
48 """Parse a sequence of regexps."""
50 while not self.end and not self.c in "|)":
51 re_list.append(self.parse_mod())
55 """Parse a primitive regexp followed by *, +, ? modifiers."""
56 re = self.parse_prim()
57 while not self.end and self.c in "*+?":
68 """Parse a primitive regexp."""
80 re = self.parse_charset()
88 def parse_charset(self):
89 """Parse a charset. Does not include the surrounding []."""
98 while not self.end and self.c != ']':
100 if self.c == '-' and self.lookahead(1) != ']':
103 for a in xrange(ord(c1), ord(c2) + 1):
104 char_list.append(chr(a))
107 chars = ''.join(char_list)
114 """Advance to the next char."""
116 i = self.i = self.i + 1
125 self.error("Premature end of string")
130 def lookahead(self, n):
131 """Look ahead n chars."""
140 Expect to find character |c| at current position.
141 Raises an exception otherwise.
146 self.error("Missing %s" % repr(c))
148 def error(self, mess):
149 """Raise exception to signal syntax error in regexp."""
150 raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
151 repr(self.s), self.i, mess))