From 59c3f07c1fc4baf009bd31e5bc90f7d0936ec42c Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 23 Oct 2013 18:58:46 +0200 Subject: [PATCH] support 'rb' string prefix in addition to 'br' (Py3 allows it) --- Cython/Compiler/Lexicon.py | 10 ++++++++-- Cython/Compiler/Parsing.py | 12 +++++++++--- Cython/Compiler/Scanning.py | 8 +++----- tests/run/strliterals.pyx | 11 +++++++++++ 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index 7195f13..fde089b 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -4,7 +4,10 @@ # raw_prefixes = "rR" -string_prefixes = "cCuUbB" +bytes_prefixes = "bB" +string_prefixes = "uU" + bytes_prefixes +char_prefixes = "cC" +any_string_prefix = raw_prefixes + string_prefixes + char_prefixes IDENT = 'IDENT' def make_lexicon(): @@ -60,7 +63,10 @@ def make_lexicon(): + Str('"""') ) - beginstring = Opt(Any(string_prefixes)) + Opt(Any(raw_prefixes)) + (Str("'") | Str('"') | Str("'''") | Str('"""')) + beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) | + Any(raw_prefixes) + Opt(Any(bytes_prefixes)) | + Any(char_prefixes) + ) + (Str("'") | Str('"') | Str("'''") | Str('"""')) two_oct = octdigit + octdigit three_oct = octdigit + octdigit + octdigit two_hex = hexdigit + hexdigit diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index aa3f4a5..a77a227 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -785,13 +785,17 @@ def p_string_literal(s, kind_override=None): # s.sy == 'BEGIN_STRING' pos = s.position() - is_raw = 0 + is_raw = False is_python3_source = s.context.language_level >= 3 has_non_ASCII_literal_characters = False kind = s.systring[:1].lower() if kind == 'r': - kind = '' - is_raw = 1 + # Py3 allows both 'br' and 'rb' as prefix + if s.systring[1:2].lower() == 'b': + kind = 'b' + else: + kind = '' + is_raw = True elif kind in 'ub': is_raw = s.systring[1:2].lower() == 'r' elif kind != 'c': @@ -808,6 +812,7 @@ def p_string_literal(s, kind_override=None): chars = StringEncoding.StrLiteralBuilder(s.source_encoding) else: chars = StringEncoding.BytesLiteralBuilder(s.source_encoding) + while 1: s.next() sy = s.sy @@ -872,6 +877,7 @@ def p_string_literal(s, kind_override=None): else: s.error("Unexpected token %r:%r in string literal" % (sy, s.systring)) + if kind == 'c': unicode_value = None bytes_value = chars.getchar() diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py index 8ba17c8..93905e0 100644 --- a/Cython/Compiler/Scanning.py +++ b/Cython/Compiler/Scanning.py @@ -7,14 +7,14 @@ import os import platform import cython -cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=unicode, +cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode, print_function=object) from Cython import Utils from Cython.Plex.Scanners import Scanner from Cython.Plex.Errors import UnrecognizedInput from Errors import error -from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT +from Lexicon import any_string_prefix, make_lexicon, IDENT from Future import print_function from StringEncoding import EncodedString @@ -340,9 +340,7 @@ class PyrexScanner(Scanner): } def begin_string_action(self, text): - if text[:1] in string_prefixes: - text = text[1:] - if text[:1] in raw_prefixes: + while text[:1] in any_string_prefix: text = text[1:] self.begin(self.string_states[text]) self.produce('BEGIN_STRING') diff --git a/tests/run/strliterals.pyx b/tests/run/strliterals.pyx index 517a45a..aafa943 100644 --- a/tests/run/strliterals.pyx +++ b/tests/run/strliterals.pyx @@ -180,6 +180,17 @@ s7 = Br"abc\x11" s8 = bR"abc\x11" s9 = BR"abc\x11" +# and in reversed order: r+b +s6_2 = rb"abc\x11" +s7_2 = rB"abc\x11" +s8_2 = Rb"abc\x11" +s9_2 = RB"abc\x11" + +assert s6 == s6_2 +assert s7 == s7_2 +assert s8 == s8_2 +assert s9 == s9_2 + u1 = u"abc\x11" u2 = U"abc\x11" u3 = ur"abc\x11" -- 2.7.4