From 4893a5422e2978f4b9a0e7785af1696e3438ac22 Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Tue, 27 Mar 2012 18:40:06 +0100 Subject: [PATCH] New qHash algorithm for uchar/ushort arrays (QString, QByteArray, etc.) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Port of Robin's work from I0a53aa4581e25b351b9cb5033415b5163d05fe71 on top of the new qHash patches (the original commit just introduced lots of conflicts, so I redid it from scratch). This is based on the work done in the QHash benchmark over the past few months experimenting with the performance of the string hashing algorithm used by Java. The Java algorithm, in turn, appears to have been based off a variant of djb's work at http://cr.yp.to/cdb/cdb.txt. This commit provides a performance boost of ~12-33% on the QHash benchmark. Unfortunately, the rcc test depends on QHash ordering. Randomizing QHash or changing qHash will cause the test to fail (see QTBUG-25078), so for now the testdata is changed as well. Done-with: Robin Burchell Change-Id: Ie05d8e21588d1b2d4bd555ef254e1eb101864b75 Reviewed-by: João Abecasis Reviewed-by: Robin Burchell --- src/corelib/tools/qhash.cpp | 42 +++++++++++---------- tests/auto/tools/rcc/data/images/images.expected | 48 ++++++++++++------------ 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index ce7d4ad..20202a4 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -73,38 +73,42 @@ QT_BEGIN_NAMESPACE +/* + The Java's hashing algorithm for strings is a variation of D. J. Bernstein + hashing algorithm appeared here http://cr.yp.to/cdb/cdb.txt + and informally known as DJB33XX - DJB's 33 Times Xor. + Java uses DJB31XA, that is, 31 Times Add. -// ### Qt 5: see tests/benchmarks/corelib/tools/qhash/qhash_string.cpp -// Hashing of the whole string is a waste of cycles. + The original algorithm was a loop around + (h << 5) + h ^ c + (which is indeed h*33 ^ c); it was then changed to + (h << 5) - h ^ c + (so h*31^c: DJB31XX), and the XOR changed to a sum: + (h << 5) - h + c + (DJB31XA), which can save some assembly instructions. -/* - These functions are based on Peter J. Weinberger's hash function - (from the Dragon Book). The constant 24 in the original function - was replaced with 23 to produce fewer collisions on input such as - "a", "aa", "aaa", "aaaa", ... + Still, we can avoid writing the multiplication as "(h << 5) - h" + -- the compiler will turn it into a shift and an addition anyway + (for instance, gcc 4.4 does that even at -O0). */ -static uint hash(const uchar *p, int n, uint seed) +static inline uint hash(const uchar *p, int len, uint seed) { uint h = seed; - while (n--) { - h = (h << 4) + *p++; - h ^= (h & 0xf0000000) >> 23; - h &= 0x0fffffff; - } + for (int i = 0; i < len; ++i) + h = 31 * h + p[i]; + return h; } -static uint hash(const QChar *p, int n, uint seed) +static inline uint hash(const QChar *p, int len, uint seed) { uint h = seed; - while (n--) { - h = (h << 4) + (*p++).unicode(); - h ^= (h & 0xf0000000) >> 23; - h &= 0x0fffffff; - } + for (int i = 0; i < len; ++i) + h = 31 * h + p[i].unicode(); + return h; } diff --git a/tests/auto/tools/rcc/data/images/images.expected b/tests/auto/tools/rcc/data/images/images.expected index 71be819..4ebf066 100644 --- a/tests/auto/tools/rcc/data/images/images.expected +++ b/tests/auto/tools/rcc/data/images/images.expected @@ -1,8 +1,8 @@ /**************************************************************************** ** Resource object code ** -IGNORE: ** Created: Tue Jul 15 11:17:15 2008 -IGNORE: ** by: The Resource Compiler for Qt version 4.4.2 +IGNORE: ** Created: Sun Apr 1 21:20:28 2012 +IGNORE: ** by: The Resource Compiler for Qt version 5.0.0 ** ** WARNING! All changes made in this file will be lost! *****************************************************************************/ @@ -10,16 +10,7 @@ IGNORE: ** by: The Resource Compiler for Qt version 4.4.2 #include static const unsigned char qt_resource_data[] = { -IGNORE: // /data5/dev/qt/tests/auto/rcc/data/images/square.png - 0x0,0x0,0x0,0x5e, - 0x89, - 0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0, - 0x0,0x0,0x20,0x0,0x0,0x0,0x20,0x1,0x3,0x0,0x0,0x0,0x49,0xb4,0xe8,0xb7, - 0x0,0x0,0x0,0x6,0x50,0x4c,0x54,0x45,0x0,0x0,0x0,0x58,0xa8,0xff,0x8c,0x14, - 0x1f,0xab,0x0,0x0,0x0,0x13,0x49,0x44,0x41,0x54,0x8,0xd7,0x63,0x60,0x0,0x81, - 0xfa,0xff,0xff,0xff,0xd,0x3e,0x2,0x4,0x0,0x8d,0x4d,0x68,0x6b,0xcf,0xb8,0x8e, - 0x86,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82, -IGNORE: // /data5/dev/qt/tests/auto/rcc/data/images/circle.png +IGNORE: // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/circle.png 0x0,0x0,0x0,0xa5, 0x89, 0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0, @@ -33,7 +24,16 @@ IGNORE: // /data5/dev/qt/tests/auto/rcc/data/images/circle.png 0x4c,0x48,0x31,0x15,0x53,0xec,0x5,0x14,0x9b,0x11,0xc5,0x6e,0x8,0xdd,0x8e,0x1b, 0x14,0x54,0x19,0xf3,0xa1,0x23,0xdb,0xd5,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44, 0xae,0x42,0x60,0x82, -IGNORE: // /data5/dev/qt/tests/auto/rcc/data/images/subdir/triangle.png +IGNORE: // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/square.png + 0x0,0x0,0x0,0x5e, + 0x89, + 0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0, + 0x0,0x0,0x20,0x0,0x0,0x0,0x20,0x1,0x3,0x0,0x0,0x0,0x49,0xb4,0xe8,0xb7, + 0x0,0x0,0x0,0x6,0x50,0x4c,0x54,0x45,0x0,0x0,0x0,0x58,0xa8,0xff,0x8c,0x14, + 0x1f,0xab,0x0,0x0,0x0,0x13,0x49,0x44,0x41,0x54,0x8,0xd7,0x63,0x60,0x0,0x81, + 0xfa,0xff,0xff,0xff,0xd,0x3e,0x2,0x4,0x0,0x8d,0x4d,0x68,0x6b,0xcf,0xb8,0x8e, + 0x86,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82, +IGNORE: // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/subdir/triangle.png 0x0,0x0,0x0,0xaa, 0x89, 0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0, @@ -56,21 +56,21 @@ static const unsigned char qt_resource_name[] = { 0x7,0x3,0x7d,0xc3, 0x0,0x69, 0x0,0x6d,0x0,0x61,0x0,0x67,0x0,0x65,0x0,0x73, - // square.png - 0x0,0xa, - 0x8,0x8b,0x6,0x27, + // subdir + 0x0,0x6, + 0x7,0xab,0x8b,0x2, 0x0,0x73, - 0x0,0x71,0x0,0x75,0x0,0x61,0x0,0x72,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67, + 0x0,0x75,0x0,0x62,0x0,0x64,0x0,0x69,0x0,0x72, // circle.png 0x0,0xa, 0xa,0x2d,0x16,0x47, 0x0,0x63, 0x0,0x69,0x0,0x72,0x0,0x63,0x0,0x6c,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67, - // subdir - 0x0,0x6, - 0x7,0xab,0x8b,0x2, + // square.png + 0x0,0xa, + 0x8,0x8b,0x6,0x27, 0x0,0x73, - 0x0,0x75,0x0,0x62,0x0,0x64,0x0,0x69,0x0,0x72, + 0x0,0x71,0x0,0x75,0x0,0x61,0x0,0x72,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67, // triangle.png 0x0,0xc, 0x5,0x59,0xa7,0xc7, @@ -85,11 +85,11 @@ static const unsigned char qt_resource_struct[] = { // :/images 0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x3,0x0,0x0,0x0,0x2, // :/images/subdir - 0x0,0x0,0x0,0x46,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x5, + 0x0,0x0,0x0,0x12,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x5, // :/images/square.png - 0x0,0x0,0x0,0x12,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x3e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xa9, // :/images/circle.png - 0x0,0x0,0x0,0x2c,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x62, + 0x0,0x0,0x0,0x24,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0, // :/images/subdir/triangle.png 0x0,0x0,0x0,0x58,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x1,0xb, -- 2.7.4