New qHash algorithm for uchar/ushort arrays (QString, QByteArray, etc.)
authorGiuseppe D'Angelo <dangelog@gmail.com>
Tue, 27 Mar 2012 17:40:06 +0000 (18:40 +0100)
committerQt by Nokia <qt-info@nokia.com>
Mon, 9 Apr 2012 08:16:09 +0000 (10:16 +0200)
Port of Robin's work from I0a53aa4581e25b351b9cb5033415b5163d05fe71
on top of the new qHash patches (the original commit just introduced
lots of conflicts, so I redid it from scratch).

This is based on the work done in the QHash benchmark over the past
few months experimenting with the performance of the string hashing
algorithm used by Java.

The Java algorithm, in turn, appears to have been based off a
variant of djb's work at http://cr.yp.to/cdb/cdb.txt.

This commit provides a performance boost of ~12-33% on the
QHash benchmark.

Unfortunately, the rcc test depends on QHash ordering.
Randomizing QHash or changing qHash will cause the test to fail
(see QTBUG-25078), so for now the testdata is changed as well.

Done-with: Robin Burchell
Change-Id: Ie05d8e21588d1b2d4bd555ef254e1eb101864b75
Reviewed-by: João Abecasis <joao.abecasis@nokia.com>
Reviewed-by: Robin Burchell <robin+qt@viroteck.net>
src/corelib/tools/qhash.cpp
tests/auto/tools/rcc/data/images/images.expected

index ce7d4ad..20202a4 100644 (file)
 
 QT_BEGIN_NAMESPACE
 
+/*
+    The Java's hashing algorithm for strings is a variation of D. J. Bernstein
+    hashing algorithm appeared here http://cr.yp.to/cdb/cdb.txt
+    and informally known as DJB33XX - DJB's 33 Times Xor.
+    Java uses DJB31XA, that is, 31 Times Add.
 
-// ### Qt 5: see tests/benchmarks/corelib/tools/qhash/qhash_string.cpp
-// Hashing of the whole string is a waste of cycles.
+    The original algorithm was a loop around
+        (h << 5) + h ^ c
+    (which is indeed h*33 ^ c); it was then changed to
+        (h << 5) - h ^ c
+    (so h*31^c: DJB31XX), and the XOR changed to a sum:
+        (h << 5) - h + c
+    (DJB31XA), which can save some assembly instructions.
 
-/*
-    These functions are based on Peter J. Weinberger's hash function
-    (from the Dragon Book). The constant 24 in the original function
-    was replaced with 23 to produce fewer collisions on input such as
-    "a", "aa", "aaa", "aaaa", ...
+    Still, we can avoid writing the multiplication as "(h << 5) - h"
+    -- the compiler will turn it into a shift and an addition anyway
+    (for instance, gcc 4.4 does that even at -O0).
 */
 
-static uint hash(const uchar *p, int n, uint seed)
+static inline uint hash(const uchar *p, int len, uint seed)
 {
     uint h = seed;
 
-    while (n--) {
-        h = (h << 4) + *p++;
-        h ^= (h & 0xf0000000) >> 23;
-        h &= 0x0fffffff;
-    }
+    for (int i = 0; i < len; ++i)
+        h = 31 * h + p[i];
+
     return h;
 }
 
-static uint hash(const QChar *p, int n, uint seed)
+static inline uint hash(const QChar *p, int len, uint seed)
 {
     uint h = seed;
 
-    while (n--) {
-        h = (h << 4) + (*p++).unicode();
-        h ^= (h & 0xf0000000) >> 23;
-        h &= 0x0fffffff;
-    }
+    for (int i = 0; i < len; ++i)
+        h = 31 * h + p[i].unicode();
+
     return h;
 }
 
index 71be819..4ebf066 100644 (file)
@@ -1,8 +1,8 @@
 /****************************************************************************
 ** Resource object code
 **
-IGNORE: ** Created: Tue Jul 15 11:17:15 2008
-IGNORE: **      by: The Resource Compiler for Qt version 4.4.2
+IGNORE: ** Created: Sun Apr 1 21:20:28 2012
+IGNORE: **      by: The Resource Compiler for Qt version 5.0.0
 **
 ** WARNING! All changes made in this file will be lost!
 *****************************************************************************/
@@ -10,16 +10,7 @@ IGNORE: **      by: The Resource Compiler for Qt version 4.4.2
 #include <QtCore/qglobal.h>
 
 static const unsigned char qt_resource_data[] = {
-IGNORE:   // /data5/dev/qt/tests/auto/rcc/data/images/square.png
-  0x0,0x0,0x0,0x5e,
-  0x89,
-  0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,
-  0x0,0x0,0x20,0x0,0x0,0x0,0x20,0x1,0x3,0x0,0x0,0x0,0x49,0xb4,0xe8,0xb7,
-  0x0,0x0,0x0,0x6,0x50,0x4c,0x54,0x45,0x0,0x0,0x0,0x58,0xa8,0xff,0x8c,0x14,
-  0x1f,0xab,0x0,0x0,0x0,0x13,0x49,0x44,0x41,0x54,0x8,0xd7,0x63,0x60,0x0,0x81,
-  0xfa,0xff,0xff,0xff,0xd,0x3e,0x2,0x4,0x0,0x8d,0x4d,0x68,0x6b,0xcf,0xb8,0x8e,
-  0x86,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82,
-IGNORE:    // /data5/dev/qt/tests/auto/rcc/data/images/circle.png
+IGNORE:   // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/circle.png
   0x0,0x0,0x0,0xa5,
   0x89,
   0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,
@@ -33,7 +24,16 @@ IGNORE:    // /data5/dev/qt/tests/auto/rcc/data/images/circle.png
   0x4c,0x48,0x31,0x15,0x53,0xec,0x5,0x14,0x9b,0x11,0xc5,0x6e,0x8,0xdd,0x8e,0x1b,
   0x14,0x54,0x19,0xf3,0xa1,0x23,0xdb,0xd5,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,
   0xae,0x42,0x60,0x82,
-IGNORE:     // /data5/dev/qt/tests/auto/rcc/data/images/subdir/triangle.png
+IGNORE:     // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/square.png
+  0x0,0x0,0x0,0x5e,
+  0x89,
+  0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,
+  0x0,0x0,0x20,0x0,0x0,0x0,0x20,0x1,0x3,0x0,0x0,0x0,0x49,0xb4,0xe8,0xb7,
+  0x0,0x0,0x0,0x6,0x50,0x4c,0x54,0x45,0x0,0x0,0x0,0x58,0xa8,0xff,0x8c,0x14,
+  0x1f,0xab,0x0,0x0,0x0,0x13,0x49,0x44,0x41,0x54,0x8,0xd7,0x63,0x60,0x0,0x81,
+  0xfa,0xff,0xff,0xff,0xd,0x3e,0x2,0x4,0x0,0x8d,0x4d,0x68,0x6b,0xcf,0xb8,0x8e,
+  0x86,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82,
+IGNORE:     // /dev/qt5/qtbase/tests/auto/tools/rcc/data/images/images/subdir/triangle.png
   0x0,0x0,0x0,0xaa,
   0x89,
   0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,
@@ -56,21 +56,21 @@ static const unsigned char qt_resource_name[] = {
   0x7,0x3,0x7d,0xc3,
   0x0,0x69,
   0x0,0x6d,0x0,0x61,0x0,0x67,0x0,0x65,0x0,0x73,
-    // square.png
-  0x0,0xa,
-  0x8,0x8b,0x6,0x27,
+    // subdir
+  0x0,0x6,
+  0x7,0xab,0x8b,0x2,
   0x0,0x73,
-  0x0,0x71,0x0,0x75,0x0,0x61,0x0,0x72,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67,
+  0x0,0x75,0x0,0x62,0x0,0x64,0x0,0x69,0x0,0x72,
     // circle.png
   0x0,0xa,
   0xa,0x2d,0x16,0x47,
   0x0,0x63,
   0x0,0x69,0x0,0x72,0x0,0x63,0x0,0x6c,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67,
-    // subdir
-  0x0,0x6,
-  0x7,0xab,0x8b,0x2,
+    // square.png
+  0x0,0xa,
+  0x8,0x8b,0x6,0x27,
   0x0,0x73,
-  0x0,0x75,0x0,0x62,0x0,0x64,0x0,0x69,0x0,0x72,
+  0x0,0x71,0x0,0x75,0x0,0x61,0x0,0x72,0x0,0x65,0x0,0x2e,0x0,0x70,0x0,0x6e,0x0,0x67,
     // triangle.png
   0x0,0xc,
   0x5,0x59,0xa7,0xc7,
@@ -85,11 +85,11 @@ static const unsigned char qt_resource_struct[] = {
   // :/images
   0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x3,0x0,0x0,0x0,0x2,
   // :/images/subdir
-  0x0,0x0,0x0,0x46,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x5,
+  0x0,0x0,0x0,0x12,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x5,
   // :/images/square.png
-  0x0,0x0,0x0,0x12,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
+  0x0,0x0,0x0,0x3e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xa9,
   // :/images/circle.png
-  0x0,0x0,0x0,0x2c,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x62,
+  0x0,0x0,0x0,0x24,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
   // :/images/subdir/triangle.png
   0x0,0x0,0x0,0x58,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x1,0xb,