From 8524b21fc885c2bbef582bc5b893be4bf3a677b0 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 6 Nov 2013 07:36:03 +0100 Subject: [PATCH] support bytearray as auto encoding string type --HG-- rename : tests/run/str_ascii_auto_encoding.pyx => tests/run/bytearray_ascii_auto_encoding.pyx rename : tests/run/str_default_auto_encoding.pyx => tests/run/bytearray_default_auto_encoding.pyx --- CHANGES.rst | 3 ++- Cython/Compiler/ExprNodes.py | 1 + Cython/Compiler/ModuleNode.py | 12 ++++++++---- Cython/Compiler/Options.py | 6 ++++-- runtests.py | 2 ++ tests/run/bytearray_ascii_auto_encoding.pyx | 14 ++++++++++++++ tests/run/bytearray_default_auto_encoding.pyx | 17 +++++++++++++++++ tests/run/unicode_ascii_auto_encoding.pyx | 21 +++++++++++++++++---- 8 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 tests/run/bytearray_ascii_auto_encoding.pyx create mode 100644 tests/run/bytearray_default_auto_encoding.pyx diff --git a/CHANGES.rst b/CHANGES.rst index 92347ed..23bc7ba 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,7 +9,8 @@ Features added -------------- * ``bytearray`` has become a known type and supports coercion from and - to C strings. Indexing, slicing and decoding is optimised. + to C strings. Indexing, slicing and decoding is optimised. Note that + this may have an impact on existing code due to type inference. * Using ``cdef basestring stringvar`` and function arguments typed as ``basestring`` is now meaningful and allows assigning exactly diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index c01b8b0..7534a37 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -99,6 +99,7 @@ def find_coercion_error(type_tuple, default, env): def default_str_type(env): return { 'bytes': bytes_type, + 'bytearray': bytearray_type, 'str': str_type, 'unicode': unicode_type }.get(env.directives['c_string_type']) diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index c45c444..2370bba 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -580,16 +580,20 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): c_string_type = env.directives['c_string_type'] c_string_encoding = env.directives['c_string_encoding'] - if c_string_type != 'bytes' and not c_string_encoding: - error(self.pos, "a default encoding must be provided if c_string_type != bytes") + if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding: + error(self.pos, "a default encoding must be provided if c_string_type is not a byte type") code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii')) if c_string_encoding == 'default': code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1') else: code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0') code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding) - code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title()) - code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title()) + if c_string_type == 'bytearray': + c_string_func_name = 'ByteArray' + else: + c_string_func_name = c_string_type.title() + code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name) + code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name) code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0]) # These utility functions are assumed to exist and used elsewhere. diff --git a/Cython/Compiler/Options.py b/Cython/Compiler/Options.py index 05f123e..fe11da5 100644 --- a/Cython/Compiler/Options.py +++ b/Cython/Compiler/Options.py @@ -208,7 +208,7 @@ directive_types = { 'returns' : type, 'set_initial_path': str, 'freelist': int, - 'c_string_type': one_of('bytes', 'str', 'unicode'), + 'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'), 'c_string_encoding': normalise_encoding_name, } @@ -254,11 +254,13 @@ def parse_directive_value(name, value, relaxed_bool=False): 'str' >>> parse_directive_value('c_string_type', 'bytes') 'bytes' + >>> parse_directive_value('c_string_type', 'bytearray') + 'bytearray' >>> parse_directive_value('c_string_type', 'unicode') 'unicode' >>> parse_directive_value('c_string_type', 'unnicode') Traceback (most recent call last): - ValueError: c_string_type directive must be one of ('bytes', 'str', 'unicode'), got 'unnicode' + ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode' """ type = directive_types.get(name) if not type: return None diff --git a/runtests.py b/runtests.py index 763a977..f131ee4 100755 --- a/runtests.py +++ b/runtests.py @@ -237,6 +237,8 @@ VER_DEP_MODULES = { 'run.struct_conversion', 'run.bytearray_coercion', 'run.bytearraymethods', + 'run.bytearray_ascii_auto_encoding', + 'run.bytearray_default_auto_encoding', # memory views require buffer protocol 'memoryview.relaxed_strides', 'memoryview.cythonarray', diff --git a/tests/run/bytearray_ascii_auto_encoding.pyx b/tests/run/bytearray_ascii_auto_encoding.pyx new file mode 100644 index 0000000..2f919db --- /dev/null +++ b/tests/run/bytearray_ascii_auto_encoding.pyx @@ -0,0 +1,14 @@ +#cython: c_string_type = bytearray +#cython: c_string_encoding = ascii + +"End of first directives" + +include "unicode_ascii_auto_encoding.pyx" + +auto_string_type = bytearray + +def check_auto_string_type(): + """ + >>> check_auto_string_type() + """ + assert auto_string_type is bytearray diff --git a/tests/run/bytearray_default_auto_encoding.pyx b/tests/run/bytearray_default_auto_encoding.pyx new file mode 100644 index 0000000..395a99c --- /dev/null +++ b/tests/run/bytearray_default_auto_encoding.pyx @@ -0,0 +1,17 @@ +# cython: c_string_type = bytearray +# cython: c_string_encoding = default + +import sys +if sys.version_info[0] >= 3: + __doc__ = r""" + >>> isinstance(as_objects("ab\xff"), bytearray) + True + >>> as_objects("ab\xff") == bytearray("ab\xff".encode()) + True + >>> isinstance(slice_as_objects("ab\xff", 1, 4), bytearray) + True + >>> slice_as_objects("ab\xffd", 1, 4) == bytearray("b\xff".encode()) + True + """ + +include "bytearray_ascii_auto_encoding.pyx" diff --git a/tests/run/unicode_ascii_auto_encoding.pyx b/tests/run/unicode_ascii_auto_encoding.pyx index aaa89d9..2ebfc37 100644 --- a/tests/run/unicode_ascii_auto_encoding.pyx +++ b/tests/run/unicode_ascii_auto_encoding.pyx @@ -6,10 +6,20 @@ auto_string_type = unicode from libc.string cimport strcmp +def _as_string(x): + try: + return x.decode('latin1') + except AttributeError: + return x + + def as_objects(char* ascii_data): """ - >>> print(as_objects('abc')) - abc + >>> x = as_objects('abc') + >>> isinstance(x, auto_string_type) or type(x) + True + >>> _as_string(x) == 'abc' or repr(x) + True """ assert isinstance(ascii_data, auto_string_type) assert isinstance(ascii_data, bytes) @@ -30,8 +40,11 @@ def from_object(): def slice_as_objects(char* ascii_data, int start, int end): """ - >>> print(slice_as_objects('grok', 1, 3)) - ro + >>> x = slice_as_objects('grok', 1, 3) + >>> isinstance(x, auto_string_type) or type(x) + True + >>> _as_string(x) == 'ro' or repr(x) + True """ assert isinstance(ascii_data[start:end], auto_string_type) assert isinstance(ascii_data[start:end], bytes) -- 2.7.4