From: Stefan Behnel Date: Sun, 10 Nov 2013 14:52:48 +0000 (+0100) Subject: optimise string equality comparison by always looking at the first character before... X-Git-Tag: 0.20b1~242 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3e6bb233935defe0953aaed2e690b1b40e34b9ab;p=platform%2Fupstream%2Fpython-cython.git optimise string equality comparison by always looking at the first character before calling into the C-API --HG-- extra : amend_source : 65f946184bc3b291289c54f6787aba7d2ad43b20 --- diff --git a/Cython/Utility/StringTools.c b/Cython/Utility/StringTools.c index c0baf35..9b39d3a 100644 --- a/Cython/Utility/StringTools.c +++ b/Cython/Utility/StringTools.c @@ -142,26 +142,19 @@ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */ return (equals == Py_EQ); } else if (PyUnicode_CheckExact(s1) & PyUnicode_CheckExact(s2)) { + Py_ssize_t length; #if CYTHON_PEP393_ENABLED - if ((PyUnicode_READY(s1) < 0) || (PyUnicode_READY(s2) < 0)) + if (unlikely(PyUnicode_READY(s1) < 0) || unlikely(PyUnicode_READY(s2) < 0)) return -1; - if (PyUnicode_GET_LENGTH(s1) != PyUnicode_GET_LENGTH(s2)) { + #endif + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) return (equals == Py_NE); - } else if (PyUnicode_GET_LENGTH(s1) == 1) { - Py_UCS4 ch1 = PyUnicode_READ_CHAR(s1, 0); - Py_UCS4 ch2 = PyUnicode_READ_CHAR(s2, 0); - return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2); -//// currently disabled: may not be safe depending on who created the string -// } else if (PyUnicode_MAX_CHAR_VALUE(s1) != PyUnicode_MAX_CHAR_VALUE(s2)) { -// return (equals == Py_NE); - #else - if (PyUnicode_GET_SIZE(s1) != PyUnicode_GET_SIZE(s2)) { + // len(s1) == len(s2) >= 1 (empty string is interned, and "s1 is not s2") + if (__Pyx_PyUnicode_READ_CHAR(s1, 0) != __Pyx_PyUnicode_READ_CHAR(s2, 0)) { return (equals == Py_NE); - } else if (PyUnicode_GET_SIZE(s1) == 1) { - Py_UNICODE ch1 = PyUnicode_AS_UNICODE(s1)[0]; - Py_UNICODE ch2 = PyUnicode_AS_UNICODE(s2)[0]; - return (equals == Py_EQ) ? (ch1 == ch2) : (ch1 != ch2); - #endif + } else if (length == 1) { + return (equals == Py_EQ); } else { int result = PyUnicode_Compare(s1, s2); if ((result == -1) && unlikely(PyErr_Occurred())) @@ -200,15 +193,19 @@ static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int eq /* as done by PyObject_RichCompareBool(); also catches the (interned) empty string */ return (equals == Py_EQ); } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { - if (PyBytes_GET_SIZE(s1) != PyBytes_GET_SIZE(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + // len(s1) == len(s2) >= 1 (empty string is interned, and "s1 is not s2") + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { return (equals == Py_NE); - } else if (PyBytes_GET_SIZE(s1) == 1) { - if (equals == Py_EQ) - return (PyBytes_AS_STRING(s1)[0] == PyBytes_AS_STRING(s2)[0]); - else - return (PyBytes_AS_STRING(s1)[0] != PyBytes_AS_STRING(s2)[0]); + } else if (length == 1) { + return (equals == Py_EQ); } else { - int result = memcmp(PyBytes_AS_STRING(s1), PyBytes_AS_STRING(s2), (size_t)PyBytes_GET_SIZE(s1)); + int result = memcmp(ps1, ps2, (size_t)length); return (equals == Py_EQ) ? (result == 0) : (result != 0); } } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {