Features added
--------------
+* Named Unicode escapes ("\N{...}") are supported.
+
* Python functions/classes provide the special attribute "__qualname__"
as defined by PEP 3155.
two_hex = hexdigit + hexdigit
four_hex = two_hex + two_hex
escapeseq = Str("\\") + (two_oct | three_oct |
+ Str('N{') + Rep(AnyBut('}')) + Str('}') |
Str('u') + four_hex | Str('x') + two_hex |
Str('U') + four_hex + four_hex | AnyChar)
cython.declare(Nodes=object, ExprNodes=object, EncodedString=object)
import re
+import unicodedata
from Cython.Compiler.Scanning import PyrexScanner, FileSourceDescriptor
import Nodes
StringEncoding.char_from_escape_sequence(systr))
elif c == u'\n':
pass
- elif c == u'x':
+ elif c == u'x': # \xXX
if len(systr) == 4:
chars.append_charval( int(systr[2:], 16) )
else:
s.error("Invalid hex escape '%s'" % systr)
- elif c in u'Uu':
- if kind in ('u', ''):
- if len(systr) in (6,10):
- chrval = int(systr[2:], 16)
- if chrval > 1114111: # sys.maxunicode:
- s.error("Invalid unicode escape '%s'" % systr)
- else:
+ elif c in u'NUu' and kind in ('u', ''): # \uxxxx, \Uxxxxxxxx, \N{...}
+ chrval = -1
+ if c == u'N':
+ try:
+ chrval = ord(unicodedata.lookup(systr[3:-1]))
+ except KeyError:
+ s.error("Unknown Unicode character name %r" % systr[3:-1])
+ elif len(systr) in (6,10):
+ chrval = int(systr[2:], 16)
+ if chrval > 1114111: # sys.maxunicode:
s.error("Invalid unicode escape '%s'" % systr)
+ chrval = -1
else:
- # unicode escapes in byte strings are not unescaped
- chrval = None
- chars.append_uescape(chrval, systr)
+ s.error("Invalid unicode escape '%s'" % systr)
+ if chrval >= 0:
+ chars.append_uescape(chrval, systr)
else:
chars.append(u'\\' + systr[1:])
if is_python3_source and not has_non_ASCII_literal_characters \
>>> len(bytes_uescape)
28
- >>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) == 3 or
- ... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) == 4 or
- ... sys.version_info[0] < 3 and len(str_uescape) == 17 or
+ >>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) == 4 or
+ ... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) == 5 or
+ ... sys.version_info[0] < 3 and len(str_uescape) == 28 or
... len(str_uescape))
True
>>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
True
>>> print(str_uescape[-1])
B
+ >>> (sys.version_info[0] >= 3 and ord(str_uescape[-2]) == 0x2603 or
+ ... sys.version_info[0] < 3 and str_uescape[-12:-1] == b'\\N{SNOWMAN}' or
+ ... sys.version_info[0] >= 3 and ord(str_uescape[-2]) or str_uescape[-12:-1])
+ True
>>> newlines == "Aaa\n"
True
uresc = ur'\12\'\"\\'
bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX'
-str_uescape = '\u0063\U00012345\x42'
+str_uescape = '\u0063\U00012345\N{SNOWMAN}\x42'
newlines = "Aaa\n"
True
>>> h == u'\\ud800' # unescaped by Python (required by doctest)
True
+ >>> k == u'\\N{SNOWMAN}' == u'\\u2603'
+ True
>>> add == u'Søk ik' + u'üÖä' + 'abc'
True
>>> null == u'\\x00' # unescaped by Python (required by doctest)
f = u'\xf8'
g = u'\udc00' # lone trail surrogate
h = u'\ud800' # lone lead surrogate
+k = u'\N{SNOWMAN}'
add = u'Søk ik' + u'üÖä' + u'abc'
null = u'\x00'
True
>>> f == u'\\xf8' # unescaped by Python
True
+ >>> k == u'ä' == u'\\N{LATIN SMALL LETTER A WITH DIAERESIS}'
+ True
>>> add == u'Søk ik' + u'üÖä' + 'abc'
True
>>> null == u'\\x00' # unescaped by Python (required by doctest)
d = u'üÖä'
e = u'\x03\x67\xf8\uf8d2Søk ik'
f = u'\xf8'
+k = u'\N{LATIN SMALL LETTER A WITH DIAERESIS}'
add = u'Søk ik' + u'üÖä' + u'abc'
null = u'\x00'