support 'bytearray' in the same way as 'bytes', starting with Py2.6

author Stefan Behnel <stefan_ml@behnel.de>

Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)

committer Stefan Behnel <stefan_ml@behnel.de>

Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)
author Stefan Behnel <stefan_ml@behnel.de>
Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)
committer Stefan Behnel <stefan_ml@behnel.de>
Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)
diff --git a/CHANGES.rst b/CHANGES.rst

index 22e5257..3643869 100644 (file)
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,9 @@ Cython Changelog
  Features added
  --------------
  
+* ``bytearray`` has become a known type and supports coercion from and
+  to C strings.
+
  * Using ``cdef basestring stringvar`` and function arguments typed as
    ``basestring`` is now meaningful and allows assigning exactly
    ``str`` and ``unicode`` objects, but no subtypes of these types.
diff --git a/Cython/Compiler/Builtin.py b/Cython/Compiler/Builtin.py

index c251830..2da6f0c 100644 (file)
--- a/Cython/Compiler/Builtin.py
+++ b/Cython/Compiler/Builtin.py
@@ -271,6 +271,7 @@ builtin_types_table = [
                                      ]),
  
      ("basestring",   "PyBaseString_Type",      []),
+    ("bytearray", "PyByteArray_Type", []),
      ("bytes",   "PyBytes_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
                                      ]),
      ("str",     "PyString_Type",   [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
@@ -409,7 +410,7 @@ def init_builtins():
          pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
      global list_type, tuple_type, dict_type, set_type, frozenset_type
      global bytes_type, str_type, unicode_type, basestring_type
-    global float_type, bool_type, type_type, complex_type
+    global float_type, bool_type, type_type, complex_type, bytearray_type
      type_type  = builtin_scope.lookup('type').type
      list_type  = builtin_scope.lookup('list').type
      tuple_type = builtin_scope.lookup('tuple').type
@@ -420,6 +421,7 @@ def init_builtins():
      str_type   = builtin_scope.lookup('str').type
      unicode_type = builtin_scope.lookup('unicode').type
      basestring_type = builtin_scope.lookup('basestring').type
+    bytearray_type = builtin_scope.lookup('bytearray').type
      float_type = builtin_scope.lookup('float').type
      bool_type  = builtin_scope.lookup('bool').type
      complex_type  = builtin_scope.lookup('complex').type
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py

index d95ca8b..2376cbe 100644 (file)
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -10,7 +10,8 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj
                 list_type=object, tuple_type=object, set_type=object, dict_type=object,
                 unicode_type=object, str_type=object, bytes_type=object, type_type=object,
                 Builtin=object, Symtab=object, Utils=object, find_coercion_error=object,
-               debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object)
+               debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object,
+               bytearray_type=object)
  
  import sys
  import copy
@@ -28,7 +29,7 @@ from PyrexTypes import py_object_type, c_long_type, typecast, error_type, \
      unspecified_type
  import TypeSlots
  from Builtin import list_type, tuple_type, set_type, dict_type, \
-     unicode_type, str_type, bytes_type, type_type
+     unicode_type, str_type, bytes_type, bytearray_type, type_type
  import Builtin
  import Symtab
  from Cython import Utils
@@ -3674,8 +3675,9 @@ class SliceIndexNode(ExprNode):
  
      def coerce_to(self, dst_type, env):
          if ((self.base.type.is_string or self.base.type.is_cpp_string)
-                and dst_type in (bytes_type, str_type, unicode_type)):
-            if dst_type is not bytes_type and not env.directives['c_string_encoding']:
+                and dst_type in (bytes_type, bytearray_type, str_type, unicode_type)):
+            if (dst_type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
                  error(self.pos,
                      "default encoding required for conversion from '%s' to '%s'" % 
                      (self.base.type, dst_type))
@@ -3696,11 +3698,15 @@ class SliceIndexNode(ExprNode):
              base_result = self.base.result()
              if self.base.type != PyrexTypes.c_char_ptr_type:
                  base_result = '((const char*)%s)' % base_result
+            if self.type is bytearray_type:
+                type_name = 'ByteArray'
+            else:
+                type_name = self.type.name.title()
              if self.stop is None:
                  code.putln(
                      "%s = __Pyx_Py%s_FromString(%s + %s); %s" % (
                          result,
-                        self.type.name.title(),
+                        type_name,
                          base_result,
                          start_code,
                          code.error_goto_if_null(result, self.pos)))
@@ -3708,7 +3714,7 @@ class SliceIndexNode(ExprNode):
                  code.putln(
                      "%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % (
                          result,
-                        self.type.name.title(),
+                        type_name,
                          base_result,
                          start_code,
                          stop_code,
@@ -10289,7 +10295,8 @@ class CoerceToPyTypeNode(CoercionNode):
              elif arg.type.is_complex:
                  self.type = Builtin.complex_type
          elif arg.type.is_string or arg.type.is_cpp_string:
-            if type is not bytes_type and not env.directives['c_string_encoding']:
+            if (type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
                  error(arg.pos,
                      "default encoding required for conversion from '%s' to '%s'" % 
                      (arg.type, type))
@@ -10335,9 +10342,11 @@ class CoerceToPyTypeNode(CoercionNode):
              funccall = arg_type.get_to_py_function(self.env, self.arg)
          else:
              func = arg_type.to_py_function
-            if ((arg_type.is_string or arg_type.is_cpp_string)
-                    and self.type in (bytes_type, str_type, unicode_type)):
-                func = func.replace("Object", self.type.name.title())
+            if arg_type.is_string or arg_type.is_cpp_string:
+                if self.type in (bytes_type, str_type, unicode_type):
+                    func = func.replace("Object", self.type.name.title())
+                elif self.type is bytearray_type:
+                    func = func.replace("Object", "ByteArray")
              funccall = "%s(%s)" % (func, self.arg.result())
  
          code.putln('%s = %s; %s' % (
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py

index d6429f0..0051d3d 100644 (file)
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -952,7 +952,7 @@ class BuiltinObjectType(PyObjectType):
          return "<%s>"% self.cname
  
      def default_coerced_ctype(self):
-        if self.name == 'bytes':
+        if self.name in ('bytes', 'bytearray'):
              return c_char_ptr_type
          elif self.name == 'bool':
              return c_bint_type
@@ -992,6 +992,8 @@ class BuiltinObjectType(PyObjectType):
              type_check = 'PyString_Check'
          elif type_name == 'basestring':
              type_check = '__Pyx_PyBaseString_Check'
+        elif type_name == 'bytearray':
+            type_check = 'PyByteArray_Check'
          elif type_name == 'frozenset':
              type_check = 'PyFrozenSet_Check'
          else:
diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py

index 3896296..b38eb97 100644 (file)
--- a/Cython/Compiler/Symtab.py
+++ b/Cython/Compiler/Symtab.py
@@ -941,6 +941,7 @@ class BuiltinScope(Scope):
          "complex":["((PyObject*)&PyComplex_Type)", py_object_type],
  
          "bytes":  ["((PyObject*)&PyBytes_Type)", py_object_type],
+        "bytearray":   ["((PyObject*)&PyByteArray_Type)", py_object_type],
          "str":    ["((PyObject*)&PyString_Type)", py_object_type],
          "unicode":["((PyObject*)&PyUnicode_Type)", py_object_type],
  
diff --git a/Cython/Utility/TypeConversion.c b/Cython/Utility/TypeConversion.c

index f2c3a6e..0a5adaf 100644 (file)
--- a/Cython/Utility/TypeConversion.c
+++ b/Cython/Utility/TypeConversion.c
@@ -5,6 +5,8 @@
  static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
  static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
  
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize(s, strlen(s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize(s, l)
  #define __Pyx_PyBytes_FromString        PyBytes_FromString
  #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
  static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
@@ -20,6 +22,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
  #define __Pyx_PyObject_AsUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
  #define __Pyx_PyObject_FromUString(s)  __Pyx_PyObject_FromString((char*)s)
  #define __Pyx_PyBytes_FromUString(s)   __Pyx_PyBytes_FromString((char*)s)
+#define __Pyx_PyByteArray_FromUString(s)   __Pyx_PyByteArray_FromString((char*)s)
  #define __Pyx_PyStr_FromUString(s)     __Pyx_PyStr_FromString((char*)s)
  #define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
  
@@ -190,10 +193,17 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_
  #endif /* PY_VERSION_HEX < 0x03030000 */
      } else
  #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII  || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
+
+#if PY_VERSION_HEX >= 0x02060000
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
      {
          char* result;
          int r = PyBytes_AsStringAndSize(o, &result, length);
-        if (r < 0) {
+        if (unlikely(r < 0)) {
              return NULL;
          } else {
              return result;
diff --git a/docs/src/tutorial/strings.rst b/docs/src/tutorial/strings.rst

index cce4d52..cf38e48 100644 (file)
--- a/docs/src/tutorial/strings.rst
+++ b/docs/src/tutorial/strings.rst
@@ -19,7 +19,9 @@ Python string types in Cython code
  Cython supports four Python string types: ``bytes``, ``str``,
  ``unicode`` and ``basestring``.  The ``bytes`` and ``unicode`` types
  are the specific types known from normal Python 2.x (named ``bytes``
-and ``str`` in Python 3).
+and ``str`` in Python 3).  Additionally, Cython also supports the
+``bytearray`` type starting with Python 2.6.  It behaves like the
+``bytes`` type, except that it is mutable.
  
  The ``str`` type is special in that it is the byte string in Python 2
  and the Unicode string in Python 3 (for Cython code compiled with
@@ -161,6 +163,13 @@ however, when the C function stores the pointer for later use.  Apart
  from keeping a Python reference to the string object, no manual memory
  management is required.
  
+Starting with Cython 0.20, the ``bytearray`` type is supported and
+coerces in the same way as the ``bytes`` type.  However, when using it
+in a C context, special care must be taken not to grow or shrink the
+object buffer after converting it to a C string pointer.  These
+modifications can change the internal buffer address, which will make
+the pointer invalid.
+
  Dealing with "const"
  --------------------
  
diff --git a/runtests.py b/runtests.py

index dd30784..c189447 100755 (executable)
--- a/runtests.py
+++ b/runtests.py
@@ -235,6 +235,7 @@ VER_DEP_MODULES = {
                                            'run.pure_py', # decorators, with statement
                                            'run.purecdef',
                                            'run.struct_conversion',
+                                          'run.bytearray_coercion',
                                            # memory views require buffer protocol
                                            'memoryview.relaxed_strides',
                                            'memoryview.cythonarray',
diff --git a/tests/run/bytearray_coercion.pyx b/tests/run/bytearray_coercion.pyx

new file mode 100644 (file)

index 0000000..47b1ec2
--- /dev/null
+++ b/tests/run/bytearray_coercion.pyx
@@ -0,0 +1,37 @@
+# mode: run
+
+# NOTE: Py2.6+ only
+
+
+cpdef bytearray coerce_to_charptr(char* b):
+    """
+    >>> b = bytearray(b'abc')
+    >>> coerced = coerce_to_charptr(b)
+    >>> coerced == b or coerced
+    True
+    >>> isinstance(coerced, bytearray) or type(coerced)
+    True
+    """
+    return b
+
+def coerce_to_charptrs(bytearray b):
+    """
+    >>> b = bytearray(b'abc')
+    >>> coerce_to_charptrs(b)
+    True
+    """
+    cdef char* cs = b
+    cdef unsigned char* ucs = b
+    cdef signed char* scs = b
+    return b == <bytearray>cs == <bytearray> ucs == <bytearray>scs
+
+cpdef bytearray coerce_charptr_slice(char* b):
+    """
+    >>> b = bytearray(b'abc')
+    >>> coerced = coerce_charptr_slice(b)
+    >>> coerced == b[:2] or coerced
+    True
+    >>> isinstance(coerced, bytearray) or type(coerced)
+    True
+    """
+    return b[:2]
author	Stefan Behnel <stefan_ml@behnel.de>
	Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)
committer	Stefan Behnel <stefan_ml@behnel.de>
	Sat, 2 Nov 2013 18:19:55 +0000 (19:19 +0100)
CHANGES.rst		patch \| blob \| history
Cython/Compiler/Builtin.py		patch \| blob \| history
Cython/Compiler/ExprNodes.py		patch \| blob \| history
Cython/Compiler/PyrexTypes.py		patch \| blob \| history
Cython/Compiler/Symtab.py		patch \| blob \| history
Cython/Utility/TypeConversion.c		patch \| blob \| history
docs/src/tutorial/strings.rst		patch \| blob \| history
runtests.py		patch \| blob \| history
tests/run/bytearray_coercion.pyx	[new file with mode: 0644]	patch \| blob