optimise isinstance(obj, basestring) and map basestring to unicode in Py3
authorStefan Behnel <stefan_ml@behnel.de>
Sat, 2 Mar 2013 18:45:35 +0000 (19:45 +0100)
committerStefan Behnel <stefan_ml@behnel.de>
Sat, 2 Mar 2013 18:45:35 +0000 (19:45 +0100)
CHANGES.rst
Cython/Compiler/Builtin.py
Cython/Compiler/Code.py
Cython/Compiler/PyrexTypes.py
Cython/Utility/ModuleSetupCode.c
tests/run/builtin_basestring.pyx [new file with mode: 0644]

index c2b15ed..0d0b331 100644 (file)
@@ -8,6 +8,12 @@ Cython Changelog
 Features added
 --------------
 
+* ``isinstance(obj, basestring)`` is optimised.  In Python 3 it only tests
+  for instances of ``str`` (i.e. Py2 ``unicode``).
+
+* The ``basestring`` builtin is mapped to ``str`` (i.e. Py2 ``unicode``) when
+  compiling the generated C code under Python 3.
+
 * A new class decorator ``@cython.freelist(N)`` creates a static freelist of N
   instances for an extension type, thus avoiding the costly allocation step if
   possible. This can speed up object instantiation by 20-30% in suitable
index ea6dc27..ccafe60 100644 (file)
@@ -268,6 +268,7 @@ builtin_types_table = [
                                     BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type),
                                     ]),
 
+    ("basestring",   "PyBaseString_Type",      []),
     ("bytes",   "PyBytes_Type",    [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
                                     ]),
     ("str",     "PyString_Type",   [BuiltinMethod("__contains__",  "TO",   "b", "PySequence_Contains"),
index af9ab37..5b752ba 100644 (file)
@@ -38,6 +38,7 @@ non_portable_builtins_map = {
     'unicode'       : ('PY_MAJOR_VERSION >= 3', 'str'),
     'xrange'        : ('PY_MAJOR_VERSION >= 3', 'range'),
     'BaseException' : ('PY_VERSION_HEX < 0x02050000', 'Exception'),
+    'basestring'    : ('PY_MAJOR_VERSION >= 3', 'unicode'),
     }
 
 basicsize_builtins_map = {
index 38b6982..91aaecb 100755 (executable)
@@ -973,6 +973,8 @@ class BuiltinObjectType(PyObjectType):
         type_name = self.name
         if type_name == 'str':
             type_check = 'PyString_Check'
+        elif type_name == 'basestring':
+            type_check = '__Pyx_PyBaseString_Check'
         elif type_name == 'frozenset':
             type_check = 'PyFrozenSet_Check'
         else:
index 615499a..6382691 100644 (file)
   #define PyBytes_ConcatAndDel         PyString_ConcatAndDel
 #endif
 
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
+                                         PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (Py_TYPE(obj) == &PyBaseString_Type)
+#endif
+
 #if PY_VERSION_HEX < 0x02060000
   #define PySet_Check(obj)             PyObject_TypeCheck(obj, &PySet_Type)
   #define PyFrozenSet_Check(obj)       PyObject_TypeCheck(obj, &PyFrozenSet_Type)
diff --git a/tests/run/builtin_basestring.pyx b/tests/run/builtin_basestring.pyx
new file mode 100644 (file)
index 0000000..22498e3
--- /dev/null
@@ -0,0 +1,39 @@
+
+import sys
+IS_PY3 = sys.version_info[0] >= 3
+
+ustring = u'abcdef'
+sstring =  'abcdef'
+bstring = b'abcdef'
+
+
+def isinstance_basestring(obj):
+    """
+    >>> isinstance_basestring(ustring)
+    True
+    >>> isinstance_basestring(sstring)
+    True
+    >>> if IS_PY3: print(not isinstance_basestring(bstring))
+    ... else: print(isinstance_basestring(bstring))
+    True
+    """
+    return isinstance(obj, basestring)
+
+
+def basestring_is_unicode_in_py3():
+    """
+    >>> basestring_is_unicode_in_py3()
+    True
+    """
+    if IS_PY3:
+        return basestring is unicode
+    else:
+        return basestring is not unicode
+
+
+def unicode_subtypes_basestring():
+    """
+    >>> unicode_subtypes_basestring()
+    True
+    """
+    return issubclass(unicode, basestring)