support bytearray as auto encoding string type

author Stefan Behnel <stefan_ml@behnel.de>

Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)

committer Stefan Behnel <stefan_ml@behnel.de>

Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)
author Stefan Behnel <stefan_ml@behnel.de>
Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)
committer Stefan Behnel <stefan_ml@behnel.de>
Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)
diff --git a/CHANGES.rst b/CHANGES.rst

index 92347ed..23bc7ba 100644 (file)
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -9,7 +9,8 @@ Features added
  --------------
  
  * ``bytearray`` has become a known type and supports coercion from and
-  to C strings.  Indexing, slicing and decoding is optimised.
+  to C strings.  Indexing, slicing and decoding is optimised. Note that
+  this may have an impact on existing code due to type inference.
  
  * Using ``cdef basestring stringvar`` and function arguments typed as
    ``basestring`` is now meaningful and allows assigning exactly
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py

index c01b8b0..7534a37 100644 (file)
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -99,6 +99,7 @@ def find_coercion_error(type_tuple, default, env):
  def default_str_type(env):
      return {
          'bytes': bytes_type,
+        'bytearray': bytearray_type,
          'str': str_type,
          'unicode': unicode_type
      }.get(env.directives['c_string_type'])
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py

index c45c444..2370bba 100644 (file)
--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -580,16 +580,20 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
  
          c_string_type = env.directives['c_string_type']
          c_string_encoding = env.directives['c_string_encoding']
-        if c_string_type != 'bytes' and not c_string_encoding:
-            error(self.pos, "a default encoding must be provided if c_string_type != bytes")
+        if c_string_type not in ('bytes', 'bytearray') and not c_string_encoding:
+            error(self.pos, "a default encoding must be provided if c_string_type is not a byte type")
          code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
          if c_string_encoding == 'default':
              code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 1')
          else:
              code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0')
              code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
-        code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title())
-        code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title())
+        if c_string_type == 'bytearray':
+            c_string_func_name = 'ByteArray'
+        else:
+            c_string_func_name = c_string_type.title()
+        code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name)
+        code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name)
          code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
          
          # These utility functions are assumed to exist and used elsewhere.
diff --git a/Cython/Compiler/Options.py b/Cython/Compiler/Options.py

index 05f123e..fe11da5 100644 (file)
--- a/Cython/Compiler/Options.py
+++ b/Cython/Compiler/Options.py
@@ -208,7 +208,7 @@ directive_types = {
      'returns' : type,
      'set_initial_path': str,
      'freelist': int,
-    'c_string_type': one_of('bytes', 'str', 'unicode'),
+    'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
      'c_string_encoding': normalise_encoding_name,
  }
  
@@ -254,11 +254,13 @@ def parse_directive_value(name, value, relaxed_bool=False):
      'str'
      >>> parse_directive_value('c_string_type', 'bytes')
      'bytes'
+    >>> parse_directive_value('c_string_type', 'bytearray')
+    'bytearray'
      >>> parse_directive_value('c_string_type', 'unicode')
      'unicode'
      >>> parse_directive_value('c_string_type', 'unnicode')
      Traceback (most recent call last):
-    ValueError: c_string_type directive must be one of ('bytes', 'str', 'unicode'), got 'unnicode'
+    ValueError: c_string_type directive must be one of ('bytes', 'bytearray', 'str', 'unicode'), got 'unnicode'
      """
      type = directive_types.get(name)
      if not type: return None
diff --git a/runtests.py b/runtests.py

index 763a977..f131ee4 100755 (executable)
--- a/runtests.py
+++ b/runtests.py
@@ -237,6 +237,8 @@ VER_DEP_MODULES = {
                                            'run.struct_conversion',
                                            'run.bytearray_coercion',
                                            'run.bytearraymethods',
+                                          'run.bytearray_ascii_auto_encoding',
+                                          'run.bytearray_default_auto_encoding',
                                            # memory views require buffer protocol
                                            'memoryview.relaxed_strides',
                                            'memoryview.cythonarray',
diff --git a/tests/run/bytearray_ascii_auto_encoding.pyx b/tests/run/bytearray_ascii_auto_encoding.pyx

new file mode 100644 (file)

index 0000000..2f919db
--- /dev/null
+++ b/tests/run/bytearray_ascii_auto_encoding.pyx
@@ -0,0 +1,14 @@
+#cython: c_string_type = bytearray
+#cython: c_string_encoding = ascii
+
+"End of first directives"
+
+include "unicode_ascii_auto_encoding.pyx"
+
+auto_string_type = bytearray
+
+def check_auto_string_type():
+    """
+    >>> check_auto_string_type()
+    """
+    assert auto_string_type is bytearray
diff --git a/tests/run/bytearray_default_auto_encoding.pyx b/tests/run/bytearray_default_auto_encoding.pyx

new file mode 100644 (file)

index 0000000..395a99c
--- /dev/null
+++ b/tests/run/bytearray_default_auto_encoding.pyx
@@ -0,0 +1,17 @@
+# cython: c_string_type = bytearray
+# cython: c_string_encoding = default
+
+import sys
+if sys.version_info[0] >= 3:
+    __doc__ = r"""
+        >>> isinstance(as_objects("ab\xff"), bytearray)
+        True
+        >>> as_objects("ab\xff") == bytearray("ab\xff".encode())
+        True
+        >>> isinstance(slice_as_objects("ab\xff", 1, 4), bytearray)
+        True
+        >>> slice_as_objects("ab\xffd", 1, 4) == bytearray("b\xff".encode())
+        True
+        """
+
+include "bytearray_ascii_auto_encoding.pyx"
diff --git a/tests/run/unicode_ascii_auto_encoding.pyx b/tests/run/unicode_ascii_auto_encoding.pyx

index aaa89d9..2ebfc37 100644 (file)
--- a/tests/run/unicode_ascii_auto_encoding.pyx
+++ b/tests/run/unicode_ascii_auto_encoding.pyx
@@ -6,10 +6,20 @@ auto_string_type = unicode
  from libc.string cimport strcmp
  
  
+def _as_string(x):
+    try:
+        return x.decode('latin1')
+    except AttributeError:
+        return x
+
+
  def as_objects(char* ascii_data):
      """
-    >>> print(as_objects('abc'))
-    abc
+    >>> x = as_objects('abc')
+    >>> isinstance(x, auto_string_type) or type(x)
+    True
+    >>> _as_string(x) == 'abc' or repr(x)
+    True
      """
      assert isinstance(<object>ascii_data, auto_string_type)
      assert isinstance(<bytes>ascii_data, bytes)
@@ -30,8 +40,11 @@ def from_object():
  
  def slice_as_objects(char* ascii_data, int start, int end):
      """
-    >>> print(slice_as_objects('grok', 1, 3))
-    ro
+    >>> x = slice_as_objects('grok', 1, 3)
+    >>> isinstance(x, auto_string_type) or type(x)
+    True
+    >>> _as_string(x) == 'ro' or repr(x)
+    True
      """
      assert isinstance(<object>ascii_data[start:end], auto_string_type)
      assert isinstance(<bytes>ascii_data[start:end], bytes)
author	Stefan Behnel <stefan_ml@behnel.de>
	Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)
committer	Stefan Behnel <stefan_ml@behnel.de>
	Wed, 6 Nov 2013 06:36:03 +0000 (07:36 +0100)
CHANGES.rst		patch \| blob \| history
Cython/Compiler/ExprNodes.py		patch \| blob \| history
Cython/Compiler/ModuleNode.py		patch \| blob \| history
Cython/Compiler/Options.py		patch \| blob \| history
runtests.py		patch \| blob \| history
tests/run/bytearray_ascii_auto_encoding.pyx	[new file with mode: 0644]	patch \| blob
tests/run/bytearray_default_auto_encoding.pyx	[new file with mode: 0644]	patch \| blob
tests/run/unicode_ascii_auto_encoding.pyx		patch \| blob \| history