src/third_party/jinja2/lexer.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.lexer
   4     ~~~~~~~~~~~~
   5
   6     This module implements a Jinja / Python combination lexer. The
   7     `Lexer` class provided by this module is used to do some preprocessing
   8     for Jinja.
   9
  10     On the one hand it filters out invalid operators like the bitshift
  11     operators we don't allow in templates. On the other hand it separates
  12     template code and python code in expressions.
  13
  14     :copyright: (c) 2010 by the Jinja Team.
  15     :license: BSD, see LICENSE for more details.
  16 """
  17 import re
  18
  19 from operator import itemgetter
  20 from collections import deque
  21 from jinja2.exceptions import TemplateSyntaxError
  22 from jinja2.utils import LRUCache
  23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \
  24      intern
  25
  26
  27 # cache for the lexers. Exists in order to be able to have multiple
  28 # environments with the same lexer
  29 _lexer_cache = LRUCache(50)
  30
  31 # static regular expressions
  32 whitespace_re = re.compile(r'\s+', re.U)
  33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
  34                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
  35 integer_re = re.compile(r'\d+')
  36
  37 # we use the unicode identifier rule if this python version is able
  38 # to handle unicode identifiers, otherwise the standard ASCII one.
  39 try:
  40     compile('föö', '<unknown>', 'eval')
  41 except SyntaxError:
  42     name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
  43 else:
  44     from jinja2 import _stringdefs
  45     name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
  46                                          _stringdefs.xid_continue))
  47
  48 float_re = re.compile(r'(?<!\.)\d+\.\d+')
  49 newline_re = re.compile(r'(\r\n|\r|\n)')
  50
  51 # internal the tokens and keep references to them
  52 TOKEN_ADD = intern('add')
  53 TOKEN_ASSIGN = intern('assign')
  54 TOKEN_COLON = intern('colon')
  55 TOKEN_COMMA = intern('comma')
  56 TOKEN_DIV = intern('div')
  57 TOKEN_DOT = intern('dot')
  58 TOKEN_EQ = intern('eq')
  59 TOKEN_FLOORDIV = intern('floordiv')
  60 TOKEN_GT = intern('gt')
  61 TOKEN_GTEQ = intern('gteq')
  62 TOKEN_LBRACE = intern('lbrace')
  63 TOKEN_LBRACKET = intern('lbracket')
  64 TOKEN_LPAREN = intern('lparen')
  65 TOKEN_LT = intern('lt')
  66 TOKEN_LTEQ = intern('lteq')
  67 TOKEN_MOD = intern('mod')
  68 TOKEN_MUL = intern('mul')
  69 TOKEN_NE = intern('ne')
  70 TOKEN_PIPE = intern('pipe')
  71 TOKEN_POW = intern('pow')
  72 TOKEN_RBRACE = intern('rbrace')
  73 TOKEN_RBRACKET = intern('rbracket')
  74 TOKEN_RPAREN = intern('rparen')
  75 TOKEN_SEMICOLON = intern('semicolon')
  76 TOKEN_SUB = intern('sub')
  77 TOKEN_TILDE = intern('tilde')
  78 TOKEN_WHITESPACE = intern('whitespace')
  79 TOKEN_FLOAT = intern('float')
  80 TOKEN_INTEGER = intern('integer')
  81 TOKEN_NAME = intern('name')
  82 TOKEN_STRING = intern('string')
  83 TOKEN_OPERATOR = intern('operator')
  84 TOKEN_BLOCK_BEGIN = intern('block_begin')
  85 TOKEN_BLOCK_END = intern('block_end')
  86 TOKEN_VARIABLE_BEGIN = intern('variable_begin')
  87 TOKEN_VARIABLE_END = intern('variable_end')
  88 TOKEN_RAW_BEGIN = intern('raw_begin')
  89 TOKEN_RAW_END = intern('raw_end')
  90 TOKEN_COMMENT_BEGIN = intern('comment_begin')
  91 TOKEN_COMMENT_END = intern('comment_end')
  92 TOKEN_COMMENT = intern('comment')
  93 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
  94 TOKEN_LINESTATEMENT_END = intern('linestatement_end')
  95 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
  96 TOKEN_LINECOMMENT_END = intern('linecomment_end')
  97 TOKEN_LINECOMMENT = intern('linecomment')
  98 TOKEN_DATA = intern('data')
  99 TOKEN_INITIAL = intern('initial')
 100 TOKEN_EOF = intern('eof')
 101
 102 # bind operators to token types
 103 operators = {
 104     '+':            TOKEN_ADD,
 105     '-':            TOKEN_SUB,
 106     '/':            TOKEN_DIV,
 107     '//':           TOKEN_FLOORDIV,
 108     '*':            TOKEN_MUL,
 109     '%':            TOKEN_MOD,
 110     '**':           TOKEN_POW,
 111     '~':            TOKEN_TILDE,
 112     '[':            TOKEN_LBRACKET,
 113     ']':            TOKEN_RBRACKET,
 114     '(':            TOKEN_LPAREN,
 115     ')':            TOKEN_RPAREN,
 116     '{':            TOKEN_LBRACE,
 117     '}':            TOKEN_RBRACE,
 118     '==':           TOKEN_EQ,
 119     '!=':           TOKEN_NE,
 120     '>':            TOKEN_GT,
 121     '>=':           TOKEN_GTEQ,
 122     '<':            TOKEN_LT,
 123     '<=':           TOKEN_LTEQ,
 124     '=':            TOKEN_ASSIGN,
 125     '.':            TOKEN_DOT,
 126     ':':            TOKEN_COLON,
 127     '|':            TOKEN_PIPE,
 128     ',':            TOKEN_COMMA,
 129     ';':            TOKEN_SEMICOLON
 130 }
 131
 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
 133 assert len(operators) == len(reverse_operators), 'operators dropped'
 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
 135                          sorted(operators, key=lambda x: -len(x))))
 136
 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
 138                             TOKEN_COMMENT_END, TOKEN_WHITESPACE,
 139                             TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
 140                             TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
 142                              TOKEN_COMMENT, TOKEN_LINECOMMENT])
 143
 144
 145 def _describe_token_type(token_type):
 146     if token_type in reverse_operators:
 147         return reverse_operators[token_type]
 148     return {
 149         TOKEN_COMMENT_BEGIN:        'begin of comment',
 150         TOKEN_COMMENT_END:          'end of comment',
 151         TOKEN_COMMENT:              'comment',
 152         TOKEN_LINECOMMENT:          'comment',
 153         TOKEN_BLOCK_BEGIN:          'begin of statement block',
 154         TOKEN_BLOCK_END:            'end of statement block',
 155         TOKEN_VARIABLE_BEGIN:       'begin of print statement',
 156         TOKEN_VARIABLE_END:         'end of print statement',
 157         TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
 158         TOKEN_LINESTATEMENT_END:    'end of line statement',
 159         TOKEN_DATA:                 'template data / text',
 160         TOKEN_EOF:                  'end of template'
 161     }.get(token_type, token_type)
 162
 163
 164 def describe_token(token):
 165     """Returns a description of the token."""
 166     if token.type == 'name':
 167         return token.value
 168     return _describe_token_type(token.type)
 169
 170
 171 def describe_token_expr(expr):
 172     """Like `describe_token` but for token expressions."""
 173     if ':' in expr:
 174         type, value = expr.split(':', 1)
 175         if type == 'name':
 176             return value
 177     else:
 178         type = expr
 179     return _describe_token_type(type)
 180
 181
 182 def count_newlines(value):
 183     """Count the number of newline characters in the string.  This is
 184     useful for extensions that filter a stream.
 185     """
 186     return len(newline_re.findall(value))
 187
 188
 189 def compile_rules(environment):
 190     """Compiles all the rules from the environment into a list of rules."""
 191     e = re.escape
 192     rules = [
 193         (len(environment.comment_start_string), 'comment',
 194          e(environment.comment_start_string)),
 195         (len(environment.block_start_string), 'block',
 196          e(environment.block_start_string)),
 197         (len(environment.variable_start_string), 'variable',
 198          e(environment.variable_start_string))
 199     ]
 200
 201     if environment.line_statement_prefix is not None:
 202         rules.append((len(environment.line_statement_prefix), 'linestatement',
 203                       r'^[ \t\v]*' + e(environment.line_statement_prefix)))
 204     if environment.line_comment_prefix is not None:
 205         rules.append((len(environment.line_comment_prefix), 'linecomment',
 206                       r'(?:^|(?<=\S))[^\S\r\n]*' +
 207                       e(environment.line_comment_prefix)))
 208
 209     return [x[1:] for x in sorted(rules, reverse=True)]
 210
 211
 212 class Failure(object):
 213     """Class that raises a `TemplateSyntaxError` if called.
 214     Used by the `Lexer` to specify known errors.
 215     """
 216
 217     def __init__(self, message, cls=TemplateSyntaxError):
 218         self.message = message
 219         self.error_class = cls
 220
 221     def __call__(self, lineno, filename):
 222         raise self.error_class(self.message, lineno, filename)
 223
 224
 225 class Token(tuple):
 226     """Token class."""
 227     __slots__ = ()
 228     lineno, type, value = (property(itemgetter(x)) for x in range(3))
 229
 230     def __new__(cls, lineno, type, value):
 231         return tuple.__new__(cls, (lineno, intern(str(type)), value))
 232
 233     def __str__(self):
 234         if self.type in reverse_operators:
 235             return reverse_operators[self.type]
 236         elif self.type == 'name':
 237             return self.value
 238         return self.type
 239
 240     def test(self, expr):
 241         """Test a token against a token expression.  This can either be a
 242         token type or ``'token_type:token_value'``.  This can only test
 243         against string values and types.
 244         """
 245         # here we do a regular string equality check as test_any is usually
 246         # passed an iterable of not interned strings.
 247         if self.type == expr:
 248             return True
 249         elif ':' in expr:
 250             return expr.split(':', 1) == [self.type, self.value]
 251         return False
 252
 253     def test_any(self, *iterable):
 254         """Test against multiple token expressions."""
 255         for expr in iterable:
 256             if self.test(expr):
 257                 return True
 258         return False
 259
 260     def __repr__(self):
 261         return 'Token(%r, %r, %r)' % (
 262             self.lineno,
 263             self.type,
 264             self.value
 265         )
 266
 267
 268 @implements_iterator
 269 class TokenStreamIterator(object):
 270     """The iterator for tokenstreams.  Iterate over the stream
 271     until the eof token is reached.
 272     """
 273
 274     def __init__(self, stream):
 275         self.stream = stream
 276
 277     def __iter__(self):
 278         return self
 279
 280     def __next__(self):
 281         token = self.stream.current
 282         if token.type is TOKEN_EOF:
 283             self.stream.close()
 284             raise StopIteration()
 285         next(self.stream)
 286         return token
 287
 288
 289 @implements_iterator
 290 class TokenStream(object):
 291     """A token stream is an iterable that yields :class:`Token`\s.  The
 292     parser however does not iterate over it but calls :meth:`next` to go
 293     one token ahead.  The current active token is stored as :attr:`current`.
 294     """
 295
 296     def __init__(self, generator, name, filename):
 297         self._iter = iter(generator)
 298         self._pushed = deque()
 299         self.name = name
 300         self.filename = filename
 301         self.closed = False
 302         self.current = Token(1, TOKEN_INITIAL, '')
 303         next(self)
 304
 305     def __iter__(self):
 306         return TokenStreamIterator(self)
 307
 308     def __bool__(self):
 309         return bool(self._pushed) or self.current.type is not TOKEN_EOF
 310     __nonzero__ = __bool__  # py2
 311
 312     eos = property(lambda x: not x, doc="Are we at the end of the stream?")
 313
 314     def push(self, token):
 315         """Push a token back to the stream."""
 316         self._pushed.append(token)
 317
 318     def look(self):
 319         """Look at the next token."""
 320         old_token = next(self)
 321         result = self.current
 322         self.push(result)
 323         self.current = old_token
 324         return result
 325
 326     def skip(self, n=1):
 327         """Got n tokens ahead."""
 328         for x in range(n):
 329             next(self)
 330
 331     def next_if(self, expr):
 332         """Perform the token test and return the token if it matched.
 333         Otherwise the return value is `None`.
 334         """
 335         if self.current.test(expr):
 336             return next(self)
 337
 338     def skip_if(self, expr):
 339         """Like :meth:`next_if` but only returns `True` or `False`."""
 340         return self.next_if(expr) is not None
 341
 342     def __next__(self):
 343         """Go one token ahead and return the old one"""
 344         rv = self.current
 345         if self._pushed:
 346             self.current = self._pushed.popleft()
 347         elif self.current.type is not TOKEN_EOF:
 348             try:
 349                 self.current = next(self._iter)
 350             except StopIteration:
 351                 self.close()
 352         return rv
 353
 354     def close(self):
 355         """Close the stream."""
 356         self.current = Token(self.current.lineno, TOKEN_EOF, '')
 357         self._iter = None
 358         self.closed = True
 359
 360     def expect(self, expr):
 361         """Expect a given token type and return it.  This accepts the same
 362         argument as :meth:`jinja2.lexer.Token.test`.
 363         """
 364         if not self.current.test(expr):
 365             expr = describe_token_expr(expr)
 366             if self.current.type is TOKEN_EOF:
 367                 raise TemplateSyntaxError('unexpected end of template, '
 368                                           'expected %r.' % expr,
 369                                           self.current.lineno,
 370                                           self.name, self.filename)
 371             raise TemplateSyntaxError("expected token %r, got %r" %
 372                                       (expr, describe_token(self.current)),
 373                                       self.current.lineno,
 374                                       self.name, self.filename)
 375         try:
 376             return self.current
 377         finally:
 378             next(self)
 379
 380
 381 def get_lexer(environment):
 382     """Return a lexer which is probably cached."""
 383     key = (environment.block_start_string,
 384            environment.block_end_string,
 385            environment.variable_start_string,
 386            environment.variable_end_string,
 387            environment.comment_start_string,
 388            environment.comment_end_string,
 389            environment.line_statement_prefix,
 390            environment.line_comment_prefix,
 391            environment.trim_blocks,
 392            environment.lstrip_blocks,
 393            environment.newline_sequence,
 394            environment.keep_trailing_newline)
 395     lexer = _lexer_cache.get(key)
 396     if lexer is None:
 397         lexer = Lexer(environment)
 398         _lexer_cache[key] = lexer
 399     return lexer
 400
 401
 402 class Lexer(object):
 403     """Class that implements a lexer for a given environment. Automatically
 404     created by the environment class, usually you don't have to do that.
 405
 406     Note that the lexer is not automatically bound to an environment.
 407     Multiple environments can share the same lexer.
 408     """
 409
 410     def __init__(self, environment):
 411         # shortcuts
 412         c = lambda x: re.compile(x, re.M | re.S)
 413         e = re.escape
 414
 415         # lexing rules for tags
 416         tag_rules = [
 417             (whitespace_re, TOKEN_WHITESPACE, None),
 418             (float_re, TOKEN_FLOAT, None),
 419             (integer_re, TOKEN_INTEGER, None),
 420             (name_re, TOKEN_NAME, None),
 421             (string_re, TOKEN_STRING, None),
 422             (operator_re, TOKEN_OPERATOR, None)
 423         ]
 424
 425         # assemble the root lexing rule. because "|" is ungreedy
 426         # we have to sort by length so that the lexer continues working
 427         # as expected when we have parsing rules like <% for block and
 428         # <%= for variables. (if someone wants asp like syntax)
 429         # variables are just part of the rules if variable processing
 430         # is required.
 431         root_tag_rules = compile_rules(environment)
 432
 433         # block suffix if trimming is enabled
 434         block_suffix_re = environment.trim_blocks and '\\n?' or ''
 435
 436         # strip leading spaces if lstrip_blocks is enabled
 437         prefix_re = {}
 438         if environment.lstrip_blocks:
 439             # use '{%+' to manually disable lstrip_blocks behavior
 440             no_lstrip_re = e('+')
 441             # detect overlap between block and variable or comment strings
 442             block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
 443             # make sure we don't mistake a block for a variable or a comment
 444             m = block_diff.match(environment.comment_start_string)
 445             no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
 446             m = block_diff.match(environment.variable_start_string)
 447             no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
 448
 449             # detect overlap between comment and variable strings
 450             comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
 451             m = comment_diff.match(environment.variable_start_string)
 452             no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
 453
 454             lstrip_re = r'^[ \t]*'
 455             block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
 456                     lstrip_re,
 457                     e(environment.block_start_string),
 458                     no_lstrip_re,
 459                     e(environment.block_start_string),
 460                     )
 461             comment_prefix_re = r'%s%s%s|%s\+?' % (
 462                     lstrip_re,
 463                     e(environment.comment_start_string),
 464                     no_variable_re,
 465                     e(environment.comment_start_string),
 466                     )
 467             prefix_re['block'] = block_prefix_re
 468             prefix_re['comment'] = comment_prefix_re
 469         else:
 470             block_prefix_re = '%s' % e(environment.block_start_string)
 471
 472         self.newline_sequence = environment.newline_sequence
 473         self.keep_trailing_newline = environment.keep_trailing_newline
 474
 475         # global lexing rules
 476         self.rules = {
 477             'root': [
 478                 # directives
 479                 (c('(.*?)(?:%s)' % '|'.join(
 480                     [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
 481                         e(environment.block_start_string),
 482                         block_prefix_re,
 483                         e(environment.block_end_string),
 484                         e(environment.block_end_string)
 485                     )] + [
 486                         r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
 487                         for n, r in root_tag_rules
 488                     ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
 489                 # data
 490                 (c('.+'), TOKEN_DATA, None)
 491             ],
 492             # comments
 493             TOKEN_COMMENT_BEGIN: [
 494                 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
 495                     e(environment.comment_end_string),
 496                     e(environment.comment_end_string),
 497                     block_suffix_re
 498                 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
 499                 (c('(.)'), (Failure('Missing end of comment tag'),), None)
 500             ],
 501             # blocks
 502             TOKEN_BLOCK_BEGIN: [
 503                 (c('(?:\-%s\s*|%s)%s' % (
 504                     e(environment.block_end_string),
 505                     e(environment.block_end_string),
 506                     block_suffix_re
 507                 )), TOKEN_BLOCK_END, '#pop'),
 508             ] + tag_rules,
 509             # variables
 510             TOKEN_VARIABLE_BEGIN: [
 511                 (c('\-%s\s*|%s' % (
 512                     e(environment.variable_end_string),
 513                     e(environment.variable_end_string)
 514                 )), TOKEN_VARIABLE_END, '#pop')
 515             ] + tag_rules,
 516             # raw block
 517             TOKEN_RAW_BEGIN: [
 518                 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
 519                     e(environment.block_start_string),
 520                     block_prefix_re,
 521                     e(environment.block_end_string),
 522                     e(environment.block_end_string),
 523                     block_suffix_re
 524                 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
 525                 (c('(.)'), (Failure('Missing end of raw directive'),), None)
 526             ],
 527             # line statements
 528             TOKEN_LINESTATEMENT_BEGIN: [
 529                 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
 530             ] + tag_rules,
 531             # line comments
 532             TOKEN_LINECOMMENT_BEGIN: [
 533                 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
 534                  TOKEN_LINECOMMENT_END), '#pop')
 535             ]
 536         }
 537
 538     def _normalize_newlines(self, value):
 539         """Called for strings and template data to normalize it to unicode."""
 540         return newline_re.sub(self.newline_sequence, value)
 541
 542     def tokenize(self, source, name=None, filename=None, state=None):
 543         """Calls tokeniter + tokenize and wraps it in a token stream.
 544         """
 545         stream = self.tokeniter(source, name, filename, state)
 546         return TokenStream(self.wrap(stream, name, filename), name, filename)
 547
 548     def wrap(self, stream, name=None, filename=None):
 549         """This is called with the stream as returned by `tokenize` and wraps
 550         every token in a :class:`Token` and converts the value.
 551         """
 552         for lineno, token, value in stream:
 553             if token in ignored_tokens:
 554                 continue
 555             elif token == 'linestatement_begin':
 556                 token = 'block_begin'
 557             elif token == 'linestatement_end':
 558                 token = 'block_end'
 559             # we are not interested in those tokens in the parser
 560             elif token in ('raw_begin', 'raw_end'):
 561                 continue
 562             elif token == 'data':
 563                 value = self._normalize_newlines(value)
 564             elif token == 'keyword':
 565                 token = value
 566             elif token == 'name':
 567                 value = str(value)
 568             elif token == 'string':
 569                 # try to unescape string
 570                 try:
 571                     value = self._normalize_newlines(value[1:-1]) \
 572                         .encode('ascii', 'backslashreplace') \
 573                         .decode('unicode-escape')
 574                 except Exception as e:
 575                     msg = str(e).split(':')[-1].strip()
 576                     raise TemplateSyntaxError(msg, lineno, name, filename)
 577                 # if we can express it as bytestring (ascii only)
 578                 # we do that for support of semi broken APIs
 579                 # as datetime.datetime.strftime.  On python 3 this
 580                 # call becomes a noop thanks to 2to3
 581                 try:
 582                     value = str(value)
 583                 except UnicodeError:
 584                     pass
 585             elif token == 'integer':
 586                 value = int(value)
 587             elif token == 'float':
 588                 value = float(value)
 589             elif token == 'operator':
 590                 token = operators[value]
 591             yield Token(lineno, token, value)
 592
 593     def tokeniter(self, source, name, filename=None, state=None):
 594         """This method tokenizes the text and returns the tokens in a
 595         generator.  Use this method if you just want to tokenize a template.
 596         """
 597         source = text_type(source)
 598         lines = source.splitlines()
 599         if self.keep_trailing_newline and source:
 600             for newline in ('\r\n', '\r', '\n'):
 601                 if source.endswith(newline):
 602                     lines.append('')
 603                     break
 604         source = '\n'.join(lines)
 605         pos = 0
 606         lineno = 1
 607         stack = ['root']
 608         if state is not None and state != 'root':
 609             assert state in ('variable', 'block'), 'invalid state'
 610             stack.append(state + '_begin')
 611         else:
 612             state = 'root'
 613         statetokens = self.rules[stack[-1]]
 614         source_length = len(source)
 615
 616         balancing_stack = []
 617
 618         while 1:
 619             # tokenizer loop
 620             for regex, tokens, new_state in statetokens:
 621                 m = regex.match(source, pos)
 622                 # if no match we try again with the next rule
 623                 if m is None:
 624                     continue
 625
 626                 # we only match blocks and variables if braces / parentheses
 627                 # are balanced. continue parsing with the lower rule which
 628                 # is the operator rule. do this only if the end tags look
 629                 # like operators
 630                 if balancing_stack and \
 631                    tokens in ('variable_end', 'block_end',
 632                               'linestatement_end'):
 633                     continue
 634
 635                 # tuples support more options
 636                 if isinstance(tokens, tuple):
 637                     for idx, token in enumerate(tokens):
 638                         # failure group
 639                         if token.__class__ is Failure:
 640                             raise token(lineno, filename)
 641                         # bygroup is a bit more complex, in that case we
 642                         # yield for the current token the first named
 643                         # group that matched
 644                         elif token == '#bygroup':
 645                             for key, value in iteritems(m.groupdict()):
 646                                 if value is not None:
 647                                     yield lineno, key, value
 648                                     lineno += value.count('\n')
 649                                     break
 650                             else:
 651                                 raise RuntimeError('%r wanted to resolve '
 652                                                    'the token dynamically'
 653                                                    ' but no group matched'
 654                                                    % regex)
 655                         # normal group
 656                         else:
 657                             data = m.group(idx + 1)
 658                             if data or token not in ignore_if_empty:
 659                                 yield lineno, token, data
 660                             lineno += data.count('\n')
 661
 662                 # strings as token just are yielded as it.
 663                 else:
 664                     data = m.group()
 665                     # update brace/parentheses balance
 666                     if tokens == 'operator':
 667                         if data == '{':
 668                             balancing_stack.append('}')
 669                         elif data == '(':
 670                             balancing_stack.append(')')
 671                         elif data == '[':
 672                             balancing_stack.append(']')
 673                         elif data in ('}', ')', ']'):
 674                             if not balancing_stack:
 675                                 raise TemplateSyntaxError('unexpected \'%s\'' %
 676                                                           data, lineno, name,
 677                                                           filename)
 678                             expected_op = balancing_stack.pop()
 679                             if expected_op != data:
 680                                 raise TemplateSyntaxError('unexpected \'%s\', '
 681                                                           'expected \'%s\'' %
 682                                                           (data, expected_op),
 683                                                           lineno, name,
 684                                                           filename)
 685                     # yield items
 686                     if data or tokens not in ignore_if_empty:
 687                         yield lineno, tokens, data
 688                     lineno += data.count('\n')
 689
 690                 # fetch new position into new variable so that we can check
 691                 # if there is a internal parsing error which would result
 692                 # in an infinite loop
 693                 pos2 = m.end()
 694
 695                 # handle state changes
 696                 if new_state is not None:
 697                     # remove the uppermost state
 698                     if new_state == '#pop':
 699                         stack.pop()
 700                     # resolve the new state by group checking
 701                     elif new_state == '#bygroup':
 702                         for key, value in iteritems(m.groupdict()):
 703                             if value is not None:
 704                                 stack.append(key)
 705                                 break
 706                         else:
 707                             raise RuntimeError('%r wanted to resolve the '
 708                                                'new state dynamically but'
 709                                                ' no group matched' %
 710                                                regex)
 711                     # direct state name given
 712                     else:
 713                         stack.append(new_state)
 714                     statetokens = self.rules[stack[-1]]
 715                 # we are still at the same position and no stack change.
 716                 # this means a loop without break condition, avoid that and
 717                 # raise error
 718                 elif pos2 == pos:
 719                     raise RuntimeError('%r yielded empty string without '
 720                                        'stack change' % regex)
 721                 # publish new function and start again
 722                 pos = pos2
 723                 break
 724             # if loop terminated without break we haven't found a single match
 725             # either we are at the end of the file or we have a problem
 726             else:
 727                 # end of text
 728                 if pos >= source_length:
 729                     return
 730                 # something went wrong
 731                 raise TemplateSyntaxError('unexpected char %r at %d' %
 732                                           (source[pos], pos), lineno,
 733                                           name, filename)