From 24735826105c0875b6994def48906e414d66adbe Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sat, 2 Mar 2013 19:45:35 +0100 Subject: [PATCH] optimise isinstance(obj, basestring) and map basestring to unicode in Py3 --- CHANGES.rst | 6 ++++++ Cython/Compiler/Builtin.py | 1 + Cython/Compiler/Code.py | 1 + Cython/Compiler/PyrexTypes.py | 2 ++ Cython/Utility/ModuleSetupCode.c | 9 +++++++++ tests/run/builtin_basestring.pyx | 39 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 58 insertions(+) create mode 100644 tests/run/builtin_basestring.pyx diff --git a/CHANGES.rst b/CHANGES.rst index c2b15ed..0d0b331 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,12 @@ Cython Changelog Features added -------------- +* ``isinstance(obj, basestring)`` is optimised. In Python 3 it only tests + for instances of ``str`` (i.e. Py2 ``unicode``). + +* The ``basestring`` builtin is mapped to ``str`` (i.e. Py2 ``unicode``) when + compiling the generated C code under Python 3. + * A new class decorator ``@cython.freelist(N)`` creates a static freelist of N instances for an extension type, thus avoiding the costly allocation step if possible. This can speed up object instantiation by 20-30% in suitable diff --git a/Cython/Compiler/Builtin.py b/Cython/Compiler/Builtin.py index ea6dc27..ccafe60 100644 --- a/Cython/Compiler/Builtin.py +++ b/Cython/Compiler/Builtin.py @@ -268,6 +268,7 @@ builtin_types_table = [ BuiltinAttribute('imag', 'cval.imag', field_type = PyrexTypes.c_double_type), ]), + ("basestring", "PyBaseString_Type", []), ("bytes", "PyBytes_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), ]), ("str", "PyString_Type", [BuiltinMethod("__contains__", "TO", "b", "PySequence_Contains"), diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index af9ab37..5b752ba 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -38,6 +38,7 @@ non_portable_builtins_map = { 'unicode' : ('PY_MAJOR_VERSION >= 3', 'str'), 'xrange' : ('PY_MAJOR_VERSION >= 3', 'range'), 'BaseException' : ('PY_VERSION_HEX < 0x02050000', 'Exception'), + 'basestring' : ('PY_MAJOR_VERSION >= 3', 'unicode'), } basicsize_builtins_map = { diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py index 38b6982..91aaecb 100755 --- a/Cython/Compiler/PyrexTypes.py +++ b/Cython/Compiler/PyrexTypes.py @@ -973,6 +973,8 @@ class BuiltinObjectType(PyObjectType): type_name = self.name if type_name == 'str': type_check = 'PyString_Check' + elif type_name == 'basestring': + type_check = '__Pyx_PyBaseString_Check' elif type_name == 'frozenset': type_check = 'PyFrozenSet_Check' else: diff --git a/Cython/Utility/ModuleSetupCode.c b/Cython/Utility/ModuleSetupCode.c index 615499a..6382691 100644 --- a/Cython/Utility/ModuleSetupCode.c +++ b/Cython/Utility/ModuleSetupCode.c @@ -168,6 +168,15 @@ #define PyBytes_ConcatAndDel PyString_ConcatAndDel #endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \ + PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (Py_TYPE(obj) == &PyBaseString_Type) +#endif + #if PY_VERSION_HEX < 0x02060000 #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type) #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type) diff --git a/tests/run/builtin_basestring.pyx b/tests/run/builtin_basestring.pyx new file mode 100644 index 0000000..22498e3 --- /dev/null +++ b/tests/run/builtin_basestring.pyx @@ -0,0 +1,39 @@ + +import sys +IS_PY3 = sys.version_info[0] >= 3 + +ustring = u'abcdef' +sstring = 'abcdef' +bstring = b'abcdef' + + +def isinstance_basestring(obj): + """ + >>> isinstance_basestring(ustring) + True + >>> isinstance_basestring(sstring) + True + >>> if IS_PY3: print(not isinstance_basestring(bstring)) + ... else: print(isinstance_basestring(bstring)) + True + """ + return isinstance(obj, basestring) + + +def basestring_is_unicode_in_py3(): + """ + >>> basestring_is_unicode_in_py3() + True + """ + if IS_PY3: + return basestring is unicode + else: + return basestring is not unicode + + +def unicode_subtypes_basestring(): + """ + >>> unicode_subtypes_basestring() + True + """ + return issubclass(unicode, basestring) -- 2.7.4