support 'rb' string prefix in addition to 'br' (Py3 allows it)

author Stefan Behnel <stefan_ml@behnel.de>

Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)

committer Stefan Behnel <stefan_ml@behnel.de>

Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)
author Stefan Behnel <stefan_ml@behnel.de>
Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)
committer Stefan Behnel <stefan_ml@behnel.de>
Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)
diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py

index 7195f13..fde089b 100644 (file)
--- a/Cython/Compiler/Lexicon.py
+++ b/Cython/Compiler/Lexicon.py
@@ -4,7 +4,10 @@
  #
  
  raw_prefixes = "rR"
-string_prefixes = "cCuUbB"
+bytes_prefixes = "bB"
+string_prefixes = "uU" + bytes_prefixes
+char_prefixes = "cC"
+any_string_prefix = raw_prefixes + string_prefixes + char_prefixes
  IDENT = 'IDENT'
  
  def make_lexicon():
@@ -60,7 +63,10 @@ def make_lexicon():
          + Str('"""')
      )
  
-    beginstring = Opt(Any(string_prefixes)) + Opt(Any(raw_prefixes)) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
+    beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) |
+                      Any(raw_prefixes) + Opt(Any(bytes_prefixes)) |
+                      Any(char_prefixes)
+                      ) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
      two_oct = octdigit + octdigit
      three_oct = octdigit + octdigit + octdigit
      two_hex = hexdigit + hexdigit
diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py

index aa3f4a5..a77a227 100644 (file)
--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -785,13 +785,17 @@ def p_string_literal(s, kind_override=None):
  
      # s.sy == 'BEGIN_STRING'
      pos = s.position()
-    is_raw = 0
+    is_raw = False
      is_python3_source = s.context.language_level >= 3
      has_non_ASCII_literal_characters = False
      kind = s.systring[:1].lower()
      if kind == 'r':
-        kind = ''
-        is_raw = 1
+        # Py3 allows both 'br' and 'rb' as prefix
+        if s.systring[1:2].lower() == 'b':
+            kind = 'b'
+        else:
+            kind = ''
+        is_raw = True
      elif kind in 'ub':
          is_raw = s.systring[1:2].lower() == 'r'
      elif kind != 'c':
@@ -808,6 +812,7 @@ def p_string_literal(s, kind_override=None):
              chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
          else:
              chars = StringEncoding.BytesLiteralBuilder(s.source_encoding)
+
      while 1:
          s.next()
          sy = s.sy
@@ -872,6 +877,7 @@ def p_string_literal(s, kind_override=None):
          else:
              s.error("Unexpected token %r:%r in string literal" %
                      (sy, s.systring))
+
      if kind == 'c':
          unicode_value = None
          bytes_value = chars.getchar()
diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py

index 8ba17c8..93905e0 100644 (file)
--- a/Cython/Compiler/Scanning.py
+++ b/Cython/Compiler/Scanning.py
@@ -7,14 +7,14 @@ import os
  import platform
  
  import cython
-cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=unicode,
+cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
                 print_function=object)
  
  from Cython import Utils
  from Cython.Plex.Scanners import Scanner
  from Cython.Plex.Errors import UnrecognizedInput
  from Errors import error
-from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT
+from Lexicon import any_string_prefix, make_lexicon, IDENT
  from Future import print_function
  
  from StringEncoding import EncodedString
@@ -340,9 +340,7 @@ class PyrexScanner(Scanner):
      }
  
      def begin_string_action(self, text):
-        if text[:1] in string_prefixes:
-            text = text[1:]
-        if text[:1] in raw_prefixes:
+        while text[:1] in any_string_prefix:
              text = text[1:]
          self.begin(self.string_states[text])
          self.produce('BEGIN_STRING')
diff --git a/tests/run/strliterals.pyx b/tests/run/strliterals.pyx

index 517a45a..aafa943 100644 (file)
--- a/tests/run/strliterals.pyx
+++ b/tests/run/strliterals.pyx
@@ -180,6 +180,17 @@ s7 = Br"abc\x11"
  s8 = bR"abc\x11"
  s9 = BR"abc\x11"
  
+# and in reversed order: r+b
+s6_2 = rb"abc\x11"
+s7_2 = rB"abc\x11"
+s8_2 = Rb"abc\x11"
+s9_2 = RB"abc\x11"
+
+assert s6 == s6_2
+assert s7 == s7_2
+assert s8 == s8_2
+assert s9 == s9_2
+
  u1 = u"abc\x11"
  u2 = U"abc\x11"
  u3 = ur"abc\x11"
author	Stefan Behnel <stefan_ml@behnel.de>
	Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)
committer	Stefan Behnel <stefan_ml@behnel.de>
	Wed, 23 Oct 2013 16:58:46 +0000 (18:58 +0200)
Cython/Compiler/Lexicon.py		patch \| blob \| history
Cython/Compiler/Parsing.py		patch \| blob \| history
Cython/Compiler/Scanning.py		patch \| blob \| history
tests/run/strliterals.pyx		patch \| blob \| history