Add unicode slicing support
authorzaur <aintellimath@gmail.com>
Mon, 18 Feb 2013 19:04:26 +0000 (22:04 +0300)
committerzaur <aintellimath@gmail.com>
Tue, 19 Feb 2013 10:13:58 +0000 (13:13 +0300)
Cython/Compiler/ExprNodes.py
Cython/Utility/StringTools.c

index 1af7eda..47968ce 100755 (executable)
@@ -3457,6 +3457,8 @@ class SliceIndexNode(ExprNode):
         base_type = self.base.type
         if base_type.is_string or base_type.is_cpp_string:
             self.type = bytes_type
+        elif base_type is unicode_type:
+            self.type = unicode_type
         elif base_type.is_ptr:
             self.type = base_type
         elif base_type.is_array:
@@ -3506,6 +3508,39 @@ class SliceIndexNode(ExprNode):
                         self.stop_code(),
                         self.start_code(),
                         code.error_goto_if_null(self.result(), self.pos)))
+        elif self.base.type is unicode_type:
+            base_result = self.base.result()
+            code.globalstate.use_utility_code( 
+                          UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c")) 
+            if self.start is None:
+                if self.stop is None:
+                    code.putln(
+                        "%s = __Pyx_PyUnicode_Substring(%s, 0, PY_SSIZE_T_MAX); %s" % (
+                            self.result(),
+                            base_result,
+                            code.error_goto_if_null(self.result(), self.pos)))
+                else:
+                    code.putln(
+                        "%s = __Pyx_PyUnicode_Substring(%s, 0, %s); %s" % (
+                            self.result(),
+                            base_result,
+                            self.stop_code(),
+                            code.error_goto_if_null(self.result(), self.pos)))
+            elif self.stop is None:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_Substring(%s, %s, PY_SSIZE_T_MAX); %s" % (
+                        self.result(),
+                        base_result,
+                        self.start_code(),
+                        code.error_goto_if_null(self.result(), self.pos)))
+            else:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % (
+                        self.result(),
+                        base_result,
+                        self.start_code(),
+                        self.stop_code(),
+                        code.error_goto_if_null(self.result(), self.pos)))
         else:
             code.putln(
                 "%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s" % (
@@ -10403,3 +10438,6 @@ proto="""
 #define UNARY_NEG_WOULD_OVERFLOW(x)    \
         (((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x)))
 """)
+
+
+pyunicode_substring = UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c")
index 53dfc77..55741cb 100644 (file)
@@ -374,3 +374,42 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
         return PyUnicode_Decode(cstring, length, encoding, errors);
     }
 }
+
+/////////////// PyUnicode_Substring.proto ///////////////
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
+            PyObject* text, Py_ssize_t start, Py_ssize_t stop);
+
+
+/////////////// PyUnicode_Substring ///////////////
+
+#if CYTHON_PEP393_ENABLED
+#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
+PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start)
+#else
+#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
+PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start)
+#endif
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
+            PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
+    Py_ssize_t length;
+#if CYTHON_PEP393_ENABLED
+    length = PyUnicode_GET_LENGTH(text);
+#else
+    length = PyUnicode_GET_SIZE(text);
+#endif
+    if (start < 0) {
+        start += length;
+        if (start < 0)
+            start = 0;
+    }
+    if (stop < 0)
+        stop += length;    
+    if (stop > length)
+        stop = length;
+    length = stop - start;
+    if (length <= 0)
+        return PyUnicode_FromUnicode(NULL, 0);
+    return (PyObject*)__Pyx_PyUnicode_SUBSTRING(text, start, stop);
+}
\ No newline at end of file