QHash security fix (1.5/2): qHash two arguments overload support

author Giuseppe D'Angelo <dangelog@gmail.com>

Sat, 24 Mar 2012 08:50:02 +0000 (08:50 +0000)

committer Qt by Nokia <qt-info@nokia.com>

Wed, 4 Apr 2012 11:02:58 +0000 (13:02 +0200)
author Giuseppe D'Angelo <dangelog@gmail.com>
Sat, 24 Mar 2012 08:50:02 +0000 (08:50 +0000)
committer Qt by Nokia <qt-info@nokia.com>
Wed, 4 Apr 2012 11:02:58 +0000 (13:02 +0200)
diff --git a/dist/changes-5.0.0 b/dist/changes-5.0.0

index 1258792..cfb83a4 100644 (file)
--- a/dist/changes-5.0.0
+++ b/dist/changes-5.0.0
@@ -346,6 +346,10 @@ QtCore
  * QEvent::AccessibilityPrepare, AccessibilityHelp and AccessibilityDescription removed:
    * The enum values simply didn't make sense in the first place and should simply be dropped.
  
+* [QTBUG-23529] QHash is now more resilient to a family of denial of service
+  attacks exploiting algorithmic complexity, by supporting two-arguments overloads
+  of the qHash() hashing function.
+
  QtGui
  -----
  * Accessibility has been refactored. The hierachy of accessible objects is implemented via
diff --git a/doc/src/snippets/code/src_corelib_tools_qhash.cpp b/doc/src/snippets/code/src_corelib_tools_qhash.cpp

index 6595119..2fa73ba 100644 (file)
--- a/doc/src/snippets/code/src_corelib_tools_qhash.cpp
+++ b/doc/src/snippets/code/src_corelib_tools_qhash.cpp
@@ -155,9 +155,9 @@ inline bool operator==(const Employee &e1, const Employee &e2)
             && e1.dateOfBirth() == e2.dateOfBirth();
  }
  
-inline uint qHash(const Employee &key)
+inline uint qHash(const Employee &key, uint seed)
  {
-    return qHash(key.name()) ^ key.dateOfBirth().day();
+    return qHash(key.name(), seed) ^ key.dateOfBirth().day();
  }
  
  #endif // EMPLOYEE_H
diff --git a/src/corelib/tools/qbitarray.h b/src/corelib/tools/qbitarray.h

index ac54c2a..5486c60 100644 (file)
--- a/src/corelib/tools/qbitarray.h
+++ b/src/corelib/tools/qbitarray.h
@@ -54,7 +54,7 @@ class Q_CORE_EXPORT QBitArray
  {
      friend Q_CORE_EXPORT QDataStream &operator<<(QDataStream &, const QBitArray &);
      friend Q_CORE_EXPORT QDataStream &operator>>(QDataStream &, QBitArray &);
-    friend Q_CORE_EXPORT uint qHash(const QBitArray &key);
+    friend Q_CORE_EXPORT uint qHash(const QBitArray &key, uint seed);
      QByteArray d;
  
  public:
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp

index 5ccc1b3..52a1eed 100644 (file)
--- a/src/corelib/tools/qhash.cpp
+++ b/src/corelib/tools/qhash.cpp
@@ -84,9 +84,9 @@ QT_BEGIN_NAMESPACE
      "a", "aa", "aaa", "aaaa", ...
  */
  
-static uint hash(const uchar *p, int n)
+static uint hash(const uchar *p, int n, uint seed)
  {
-    uint h = 0;
+    uint h = seed;
  
      while (n--) {
          h = (h << 4) + *p++;
@@ -96,9 +96,9 @@ static uint hash(const uchar *p, int n)
      return h;
  }
  
-static uint hash(const QChar *p, int n)
+static uint hash(const QChar *p, int n, uint seed)
  {
-    uint h = 0;
+    uint h = seed;
  
      while (n--) {
          h = (h << 4) + (*p++).unicode();
@@ -108,25 +108,25 @@ static uint hash(const QChar *p, int n)
      return h;
  }
  
-uint qHash(const QByteArray &key)
+uint qHash(const QByteArray &key, uint seed)
  {
-    return hash(reinterpret_cast<const uchar *>(key.constData()), key.size());
+    return hash(reinterpret_cast<const uchar *>(key.constData()), key.size(), seed);
  }
  
-uint qHash(const QString &key)
+uint qHash(const QString &key, uint seed)
  {
-    return hash(key.unicode(), key.size());
+    return hash(key.unicode(), key.size(), seed);
  }
  
-uint qHash(const QStringRef &key)
+uint qHash(const QStringRef &key, uint seed)
  {
-    return hash(key.unicode(), key.size());
+    return hash(key.unicode(), key.size(), seed);
  }
  
-uint qHash(const QBitArray &bitArray)
+uint qHash(const QBitArray &bitArray, uint seed)
  {
      int m = bitArray.d.size() - 1;
-    uint result = hash(reinterpret_cast<const uchar *>(bitArray.d.constData()), qMax(0, m));
+    uint result = hash(reinterpret_cast<const uchar *>(bitArray.d.constData()), qMax(0, m), seed);
  
      // deal with the last 0 to 7 bits manually, because we can't trust that
      // the padding is initialized to 0 in bitArray.d
@@ -614,17 +614,16 @@ void QHashData::checkSanity()
      Returns the hash value for the \a key.
  */
  
-/*! \fn uint qHash(const QByteArray &key)
-    \fn uint qHash(const QBitArray &key)
-    \relates QHash
+/*! \fn uint qHash(const QByteArray &key, uint seed = 0)
+    \fn uint qHash(const QBitArray &key, uint seed = 0)
+    \fn uint qHash(const QString &key, uint seed = 0)
+    \fn uint qHash(const QStringRef &key, uint seed = 0)
  
-    Returns the hash value for the \a key.
-*/
-
-/*! \fn uint qHash(const QString &key)
      \relates QHash
+    \since 5.0
  
-    Returns the hash value for the \a key.
+    Returns the hash value for the \a key, using \a seed to
+    seed the calculation.
  */
  
  /*! \fn uint qHash(const T *key)
@@ -656,8 +655,7 @@ void QHashData::checkSanity()
         key. With QHash, the items are arbitrarily ordered.
      \li The key type of a QMap must provide operator<(). The key
         type of a QHash must provide operator==() and a global
-       hash function called qHash() (see the related non-member
-       functions).
+       hash function called qHash() (see \l{qHash}).
      \endlist
  
      Here's an example QHash with QString keys and \c int values:
@@ -702,6 +700,15 @@ void QHashData::checkSanity()
      To avoid this problem, replace \c hash[i] with \c hash.value(i)
      in the code above.
  
+    Internally, QHash uses a hash table to perform lookups. Unlike Qt
+    3's \c QDict class, which needed to be initialized with a prime
+    number, QHash's hash table automatically grows and shrinks to
+    provide fast lookups without wasting too much memory. You can
+    still control the size of the hash table by calling reserve() if
+    you already know approximately how many items the QHash will
+    contain, but this isn't necessary to obtain good performance. You
+    can also call capacity() to retrieve the hash table's size.
+
      If you want to navigate through all the (key, value) pairs stored
      in a QHash, you can use an iterator. QHash provides both
      \l{Java-style iterators} (QHashIterator and QMutableHashIterator)
@@ -751,21 +758,15 @@ void QHashData::checkSanity()
  
      QHash's key and value data types must be \l{assignable data
      types}. You cannot, for example, store a QWidget as a value;
-    instead, store a QWidget *. In addition, QHash's key type must
-    provide operator==(), and there must also be a global qHash()
-    function that returns a hash value for an argument of the key's
-    type.
-
-    Here's a list of the C++ and Qt types that can serve as keys in a
-    QHash: any integer type (char, unsigned long, etc.), any pointer
-    type, QChar, QString, and QByteArray. For all of these, the \c
-    <QHash> header defines a qHash() function that computes an
-    adequate hash value. If you want to use other types as the key,
-    make sure that you provide operator==() and a qHash()
-    implementation.
+    instead, store a QWidget *.
  
-    Example:
-    \snippet doc/src/snippets/code/src_corelib_tools_qhash.cpp 13
+    \target qHash
+    \section2 The qHash() hashing function
+
+    A QHash's key type has additional requirements other than being an
+    assignable data type: it must provide operator==(), and there must also be
+    a global qHash() function that returns a hash value for an argument of the
+    key's type.
  
      The qHash() function computes a numeric value based on a key. It
      can use any algorithm imaginable, as long as it always returns
@@ -775,19 +776,56 @@ void QHashData::checkSanity()
      attempt to return different hash values for different keys to the
      largest extent possible.
  
+    For a key type \c{K}, the qHash function must have one of these signatures:
+
+    \code
+    uint qHash(K key);
+    uint qHash(const K &key);
+
+    uint qHash(K key, uint seed);
+    uint qHash(const K &key, uint seed);
+    \endcode
+
+    The two-arguments overloads take an unsigned integer that should be used to
+    seed the calculation of the hash function. This seed is provided by QHash
+    in order to prevent a family of \l{algorithmic complexity attacks}. If both
+    a one-argument and a two-arguments overload are defined for a key type,
+    the latter is used by QHash (note that you can simply define a
+    two-arguments version, and use a default value for the seed parameter).
+
+    Here's a partial list of the C++ and Qt types that can serve as keys in a
+    QHash: any integer type (char, unsigned long, etc.), any pointer type,
+    QChar, QString, and QByteArray. For all of these, the \c <QHash> header
+    defines a qHash() function that computes an adequate hash value. Many other
+    Qt classes also declare a qHash overload for their type; please refer to
+    the documentation of each class.
+
+    If you want to use other types as the key, make sure that you provide
+    operator==() and a qHash() implementation.
+
+    Example:
+    \snippet doc/src/snippets/code/src_corelib_tools_qhash.cpp 13
+
      In the example above, we've relied on Qt's global qHash(const
-    QString &) to give us a hash value for the employee's name, and
+    QString &, uint) to give us a hash value for the employee's name, and
      XOR'ed this with the day they were born to help produce unique
      hashes for people with the same name.
  
-    Internally, QHash uses a hash table to perform lookups. Unlike Qt
-    3's \c QDict class, which needed to be initialized with a prime
-    number, QHash's hash table automatically grows and shrinks to
-    provide fast lookups without wasting too much memory. You can
-    still control the size of the hash table by calling reserve() if
-    you already know approximately how many items the QHash will
-    contain, but this isn't necessary to obtain good performance. You
-    can also call capacity() to retrieve the hash table's size.
+    \section2 Algorithmic complexity attacks
+
+    All hash tables are vulnerable to a particular class of denial of service
+    attacks, in which the attacker carefully pre-computes a set of different
+    keys that are going to be hashed in the same bucket of a hash table (or
+    even have the very same hash value). The attack aims at getting the
+    worst-case algorithmic behavior (O(n) instead of amortized O(1), see
+    \l{Algorithmic Complexity} for the details) when the data is fed into the
+    table.
+
+    In order to avoid this worst-case behavior, the calculation of the hash
+    value done by qHash() can be salted by a random seed, that nullifies the
+    attack's extent. This seed is automatically generated by QHash once per
+    process, and then passed by QHash as the second argument of the
+    two-arguments overload of the qHash() function.
  
      \sa QHashIterator, QMutableHashIterator, QMap, QSet
  */
diff --git a/src/corelib/tools/qhash.h b/src/corelib/tools/qhash.h

index 1e0c053..e5606c6 100644 (file)
--- a/src/corelib/tools/qhash.h
+++ b/src/corelib/tools/qhash.h
@@ -84,10 +84,10 @@ inline uint qHash(quint64 key)
  }
  inline uint qHash(qint64 key) { return qHash(quint64(key)); }
  inline uint qHash(QChar key) { return qHash(key.unicode()); }
-Q_CORE_EXPORT uint qHash(const QByteArray &key);
-Q_CORE_EXPORT uint qHash(const QString &key);
-Q_CORE_EXPORT uint qHash(const QStringRef &key);
-Q_CORE_EXPORT uint qHash(const QBitArray &key);
+Q_CORE_EXPORT uint qHash(const QByteArray &key, uint seed = 0);
+Q_CORE_EXPORT uint qHash(const QString &key, uint seed = 0);
+Q_CORE_EXPORT uint qHash(const QStringRef &key, uint seed = 0);
+Q_CORE_EXPORT uint qHash(const QBitArray &key, uint seed = 0);
  
  #if defined(Q_CC_MSVC)
  #pragma warning( push )
@@ -108,6 +108,8 @@ template <typename T1, typename T2> inline uint qHash(const QPair<T1, T2> &key)
      return ((h1 << 16) | (h1 >> 16)) ^ h2;
  }
  
+template<typename T> inline uint qHash(const T &t, uint) { return qHash(t); }
+
  struct Q_CORE_EXPORT QHashData
  {
      struct Node {
@@ -857,7 +859,7 @@ Q_OUTOFLINE_TEMPLATE typename QHash<Key, T>::Node **QHash<Key, T>::findNode(cons
      uint h = 0;
  
      if (d->numBuckets || ahp) {
-        h = qHash(akey);
+        h = qHash(akey, 0);
          if (ahp)
              *ahp = h;
      }
diff --git a/src/dbus/qdbusextratypes.h b/src/dbus/qdbusextratypes.h

index a905cff..d8bdf74 100644 (file)
--- a/src/dbus/qdbusextratypes.h
+++ b/src/dbus/qdbusextratypes.h
@@ -47,6 +47,7 @@
  #include <QtCore/qvariant.h>
  #include <QtCore/qstring.h>
  #include <QtDBus/qdbusmacros.h>
+#include <QtCore/qhash.h>
  
  #ifndef QT_NO_DBUS
  
@@ -55,9 +56,6 @@ QT_BEGIN_HEADER
  QT_BEGIN_NAMESPACE
  
  
-// defined in qhash.cpp
-Q_CORE_EXPORT uint qHash(const QString &key);
-
  class Q_DBUS_EXPORT QDBusObjectPath
  {
      QString m_path;
diff --git a/tests/auto/corelib/tools/qhash/tst_qhash.cpp b/tests/auto/corelib/tools/qhash/tst_qhash.cpp

index 3b5d15d..9d18c7a 100644 (file)
--- a/tests/auto/corelib/tools/qhash/tst_qhash.cpp
+++ b/tests/auto/corelib/tools/qhash/tst_qhash.cpp
@@ -73,6 +73,7 @@ private slots:
      void noNeedlessRehashes();
  
      void const_shared_null();
+    void twoArguments_qHash();
  };
  
  struct Foo {
@@ -1203,5 +1204,101 @@ void tst_QHash::const_shared_null()
      QVERIFY(!hash2.isDetached());
  }
  
+// This gets set to != 0 in wrong qHash overloads
+static int wrongqHashOverload = 0;
+
+struct OneArgumentQHashStruct1 {};
+bool operator==(const OneArgumentQHashStruct1 &, const OneArgumentQHashStruct1 &) { return false; }
+uint qHash(OneArgumentQHashStruct1) { return 0; }
+
+struct OneArgumentQHashStruct2 {};
+bool operator==(const OneArgumentQHashStruct2 &, const OneArgumentQHashStruct2 &) { return false; }
+uint qHash(const OneArgumentQHashStruct2 &) { return 0; }
+
+struct OneArgumentQHashStruct3 {};
+bool operator==(const OneArgumentQHashStruct3 &, const OneArgumentQHashStruct3 &) { return false; }
+uint qHash(OneArgumentQHashStruct3) { return 0; }
+uint qHash(OneArgumentQHashStruct3 &, uint) { wrongqHashOverload = 1; return 0; }
+
+struct OneArgumentQHashStruct4 {};
+bool operator==(const OneArgumentQHashStruct4 &, const OneArgumentQHashStruct4 &) { return false; }
+uint qHash(const OneArgumentQHashStruct4 &) { return 0; }
+uint qHash(OneArgumentQHashStruct4 &, uint) { wrongqHashOverload = 1; return 0; }
+
+
+struct TwoArgumentsQHashStruct1 {};
+bool operator==(const TwoArgumentsQHashStruct1 &, const TwoArgumentsQHashStruct1 &) { return false; }
+uint qHash(const TwoArgumentsQHashStruct1 &) { wrongqHashOverload = 1; return 0; }
+uint qHash(const TwoArgumentsQHashStruct1 &, uint) { return 0; }
+
+struct TwoArgumentsQHashStruct2 {};
+bool operator==(const TwoArgumentsQHashStruct2 &, const TwoArgumentsQHashStruct2 &) { return false; }
+uint qHash(TwoArgumentsQHashStruct2) { wrongqHashOverload = 1; return 0; }
+uint qHash(const TwoArgumentsQHashStruct2 &, uint) { return 0; }
+
+struct TwoArgumentsQHashStruct3 {};
+bool operator==(const TwoArgumentsQHashStruct3 &, const TwoArgumentsQHashStruct3 &) { return false; }
+uint qHash(const TwoArgumentsQHashStruct3 &) { wrongqHashOverload = 1; return 0; }
+uint qHash(TwoArgumentsQHashStruct3, uint) { return 0; }
+
+struct TwoArgumentsQHashStruct4 {};
+bool operator==(const TwoArgumentsQHashStruct4 &, const TwoArgumentsQHashStruct4 &) { return false; }
+uint qHash(TwoArgumentsQHashStruct4) { wrongqHashOverload = 1; return 0; }
+uint qHash(TwoArgumentsQHashStruct4, uint) { return 0; }
+
+/*!
+    \internal
+
+    Check that QHash picks up the right overload.
+    The best one, for a type T, is the two-args version of qHash:
+    either uint qHash(T, uint) or uint qHash(const T &, uint).
+
+    If neither of these exists, then one between
+    uint qHash(T) or uint qHash(const T &) must exist
+    (and it gets selected instead).
+*/
+void tst_QHash::twoArguments_qHash()
+{
+    QHash<OneArgumentQHashStruct1, int> oneArgHash1;
+    OneArgumentQHashStruct1 oneArgObject1;
+    oneArgHash1[oneArgObject1] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<OneArgumentQHashStruct2, int> oneArgHash2;
+    OneArgumentQHashStruct2 oneArgObject2;
+    oneArgHash2[oneArgObject2] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<OneArgumentQHashStruct3, int> oneArgHash3;
+    OneArgumentQHashStruct3 oneArgObject3;
+    oneArgHash3[oneArgObject3] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<OneArgumentQHashStruct4, int> oneArgHash4;
+    OneArgumentQHashStruct4 oneArgObject4;
+    oneArgHash4[oneArgObject4] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<TwoArgumentsQHashStruct1, int> twoArgsHash1;
+    TwoArgumentsQHashStruct1 twoArgsObject1;
+    twoArgsHash1[twoArgsObject1] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<TwoArgumentsQHashStruct2, int> twoArgsHash2;
+    TwoArgumentsQHashStruct2 twoArgsObject2;
+    twoArgsHash2[twoArgsObject2] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<TwoArgumentsQHashStruct3, int> twoArgsHash3;
+    TwoArgumentsQHashStruct3 twoArgsObject3;
+    twoArgsHash3[twoArgsObject3] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+
+    QHash<TwoArgumentsQHashStruct4, int> twoArgsHash4;
+    TwoArgumentsQHashStruct4 twoArgsObject4;
+    twoArgsHash4[twoArgsObject4] = 1;
+    QCOMPARE(wrongqHashOverload, 0);
+}
+
  QTEST_APPLESS_MAIN(tst_QHash)
  #include "tst_qhash.moc"
author	Giuseppe D'Angelo <dangelog@gmail.com>
	Sat, 24 Mar 2012 08:50:02 +0000 (08:50 +0000)
committer	Qt by Nokia <qt-info@nokia.com>
	Wed, 4 Apr 2012 11:02:58 +0000 (13:02 +0200)
dist/changes-5.0.0		patch \| blob \| history
doc/src/snippets/code/src_corelib_tools_qhash.cpp		patch \| blob \| history
src/corelib/tools/qbitarray.h		patch \| blob \| history
src/corelib/tools/qhash.cpp		patch \| blob \| history
src/corelib/tools/qhash.h		patch \| blob \| history
src/dbus/qdbusextratypes.h		patch \| blob \| history
tests/auto/corelib/tools/qhash/tst_qhash.cpp		patch \| blob \| history