--------------
* ``bytearray`` has become a known type and supports coercion from and
- to C strings. Indexing, slicing and decoding is optimised.
+ to C strings. Indexing, slicing and decoding is optimised. Note that
+ this may have an impact on existing code due to type inference.
* Using ``cdef basestring stringvar`` and function arguments typed as
``basestring`` is now meaningful and allows assigning exactly
def default_str_type(env):
return {
'bytes': bytes_type,
+ 'bytearray': bytearray_type,
'str': str_type,
'unicode': unicode_type
}.get(env.directives['c_string_type'])
c_string_type = env.directives['c_string_type']
c_string_encoding = env.directives['c_string_encoding']
- if c_string_type != 'bytes' and not c_string_encoding:
- error(self.pos, "a default encoding must be provided if c_string_type != bytes")
+ if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding:
+ error(self.pos, "a default encoding must be provided if c_string_type is not a byte type")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
if c_string_encoding == 'default':
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1')
else:
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0')
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
- code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title())
- code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title())
+ if c_string_type == 'bytearray':
+ c_string_func_name = 'ByteArray'
+ else:
+ c_string_func_name = c_string_type.title()
+ code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name)
+ code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name)
code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
# These utility functions are assumed to exist and used elsewhere.
'returns' : type,
'set_initial_path': str,
'freelist': int,
- 'c_string_type': one_of('bytes', 'str', 'unicode'),
+ 'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
'c_string_encoding': normalise_encoding_name,
}
'str'
>>> parse_directive_value('c_string_type', 'bytes')
'bytes'
+ >>> parse_directive_value('c_string_type', 'bytearray')
+ 'bytearray'
>>> parse_directive_value('c_string_type', 'unicode')
'unicode'
>>> parse_directive_value('c_string_type', 'unnicode')
Traceback (most recent call last):
- ValueError: c_string_type directive must be one of ('bytes', 'str', 'unicode'), got 'unnicode'
+ ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode'
"""
type = directive_types.get(name)
if not type: return None
'run.struct_conversion',
'run.bytearray_coercion',
'run.bytearraymethods',
+ 'run.bytearray_ascii_auto_encoding',
+ 'run.bytearray_default_auto_encoding',
# memory views require buffer protocol
'memoryview.relaxed_strides',
'memoryview.cythonarray',
--- /dev/null
+#cython: c_string_type = bytearray
+#cython: c_string_encoding = ascii
+
+"End of first directives"
+
+include "unicode_ascii_auto_encoding.pyx"
+
+auto_string_type = bytearray
+
+def check_auto_string_type():
+ """
+ >>> check_auto_string_type()
+ """
+ assert auto_string_type is bytearray
--- /dev/null
+# cython: c_string_type = bytearray
+# cython: c_string_encoding = default
+
+import sys
+if sys.version_info[0] >= 3:
+ __doc__ = r"""
+ >>> isinstance(as_objects("ab\xff"), bytearray)
+ True
+ >>> as_objects("ab\xff") == bytearray("ab\xff".encode())
+ True
+ >>> isinstance(slice_as_objects("ab\xff", 1, 4), bytearray)
+ True
+ >>> slice_as_objects("ab\xffd", 1, 4) == bytearray("b\xff".encode())
+ True
+ """
+
+include "bytearray_ascii_auto_encoding.pyx"
from libc.string cimport strcmp
+def _as_string(x):
+ try:
+ return x.decode('latin1')
+ except AttributeError:
+ return x
+
+
def as_objects(char* ascii_data):
"""
- >>> print(as_objects('abc'))
- abc
+ >>> x = as_objects('abc')
+ >>> isinstance(x, auto_string_type) or type(x)
+ True
+ >>> _as_string(x) == 'abc' or repr(x)
+ True
"""
assert isinstance(<object>ascii_data, auto_string_type)
assert isinstance(<bytes>ascii_data, bytes)
def slice_as_objects(char* ascii_data, int start, int end):
"""
- >>> print(slice_as_objects('grok', 1, 3))
- ro
+ >>> x = slice_as_objects('grok', 1, 3)
+ >>> isinstance(x, auto_string_type) or type(x)
+ True
+ >>> _as_string(x) == 'ro' or repr(x)
+ True
"""
assert isinstance(<object>ascii_data[start:end], auto_string_type)
assert isinstance(<bytes>ascii_data[start:end], bytes)