1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** All rights reserved.
5 ** Contact: Nokia Corporation (qt-info@nokia.com)
7 ** This file is part of the QtDeclarative module of the Qt Toolkit.
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** GNU Lesser General Public License Usage
11 ** This file may be used under the terms of the GNU Lesser General Public
12 ** License version 2.1 as published by the Free Software Foundation and
13 ** appearing in the file LICENSE.LGPL included in the packaging of this
14 ** file. Please review the following information to ensure the GNU Lesser
15 ** General Public License version 2.1 requirements will be met:
16 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
18 ** In addition, as a special exception, Nokia gives you certain additional
19 ** rights. These rights are described in the Nokia Qt LGPL Exception
20 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
22 ** GNU General Public License Usage
23 ** Alternatively, this file may be used under the terms of the GNU General
24 ** Public License version 3.0 as published by the Free Software Foundation
25 ** and appearing in the file LICENSE.GPL included in the packaging of this
26 ** file. Please review the following information to ensure the GNU General
27 ** Public License version 3.0 requirements will be met:
28 ** http://www.gnu.org/copyleft/gpl.html.
31 ** Alternatively, this file may be used in accordance with the terms and
32 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
42 #include "qhashedstring_p.h"
44 // This is a reimplementation of V8's string hash algorithm. It is significantly
45 // faster to do it here than call into V8, but it adds the maintainence burden of
46 // ensuring that the two hashes are identical. We Q_ASSERT() that the two return
47 // the same value. If these asserts start to fail, the hash code needs to be
50 static const int kMaxArrayIndexSize = 10;
51 static const int kMaxHashCalcLength = 16383;
52 static const int kNofHashBitFields = 2;
53 static const int kHashShift = kNofHashBitFields;
54 static const int kIsNotArrayIndexMask = 1 << 1;
55 static const int kArrayIndexValueBits = 24;
56 static const int kArrayIndexHashLengthShift = kArrayIndexValueBits + kNofHashBitFields;
57 static const int kMaxCachedArrayIndexLength = 7;
60 template <typename schar>
61 uint32_t calculateHash(const schar* chars, int length) {
62 if (length > String::kMaxHashCalcLength) {
64 return (length << String::kHashShift) | String::kIsNotArrayIndexMask;
67 uint32_t raw_running_hash = 0;
68 uint32_t array_index = 0;
69 bool is_array_index = (0 < length && length <= String::kMaxArrayIndexSize);
70 bool is_first_char = true;
73 for (;is_array_index && ii < length; ++ii) {
76 raw_running_hash += c;
77 raw_running_hash += (raw_running_hash << 10);
78 raw_running_hash ^= (raw_running_hash >> 6);
80 if (c < '0' || c > '9') {
81 is_array_index = false;
85 is_first_char = false;
86 if (c == '0' && length > 1) {
87 is_array_index = false;
91 if (array_index > 429496729U - ((d + 2) >> 3)) {
92 is_array_index = false;
94 array_index = array_index * 10 + d;
99 for (;ii < length; ++ii) {
100 raw_running_hash += *chars++;
101 raw_running_hash += (raw_running_hash << 10);
102 raw_running_hash ^= (raw_running_hash >> 6);
105 if (is_array_index) {
106 array_index <<= String::kHashShift;
107 array_index |= length << String::kArrayIndexHashLengthShift;
110 raw_running_hash += (raw_running_hash << 3);
111 raw_running_hash ^= (raw_running_hash >> 11);
112 raw_running_hash += (raw_running_hash << 15);
113 if (raw_running_hash == 0) {
114 raw_running_hash = 27;
117 return (raw_running_hash << String::kHashShift) | String::kIsNotArrayIndexMask;
121 inline quint32 stringHash(const QChar* data, int length)
123 quint32 rv = calculateHash<quint16>((quint16*)data, length) >> String::kHashShift;
124 Q_ASSERT(rv == v8::String::ComputeHash((uint16_t*)data, length));
128 inline quint32 stringHash(const char *data, int length)
130 quint32 rv = calculateHash<quint8>((quint8*)data, length) >> String::kHashShift;
131 Q_ASSERT(rv == v8::String::ComputeHash((char *)data, length));
135 void QHashedString::computeHash() const
137 m_hash = stringHash(constData(), length());
140 void QHashedStringRef::computeHash() const
142 m_hash = stringHash(m_data, m_length);
145 void QHashedCStringRef::computeHash() const
147 m_hash = stringHash(m_data, m_length);
151 A QHash has initially around pow(2, MinNumBits) buckets. For
152 example, if MinNumBits is 4, it has 17 buckets.
154 const int MinNumBits = 4;
157 The prime_deltas array is a table of selected prime values, even
158 though it doesn't look like one. The primes we are using are 1,
159 2, 5, 11, 17, 37, 67, 131, 257, ..., i.e. primes in the immediate
160 surrounding of a power of two.
162 The primeForNumBits() function returns the prime associated to a
163 power of two. For example, primeForNumBits(8) returns 257.
166 static const uchar prime_deltas[] = {
167 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
168 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
171 static inline int primeForNumBits(int numBits)
173 return (1 << numBits) + prime_deltas[numBits];
176 void QStringHashData::rehashToSize(int size)
178 short bits = qMax(MinNumBits, (int)numBits);
179 while (primeForNumBits(bits) < size) bits++;
185 void QStringHashData::rehashToBits(short bits)
187 numBits = qMax(MinNumBits, (int)bits);
189 int nb = primeForNumBits(numBits);
190 if (nb == numBuckets && buckets)
196 buckets = new QStringHashNode *[numBuckets];
197 ::memset(buckets, 0, sizeof(QStringHashNode *) * numBuckets);
199 QStringHashNode *nodeList = nodes;
201 int bucket = nodeList->hash % numBuckets;
202 nodeList->next = buckets[bucket];
203 buckets[bucket] = nodeList;
205 nodeList = nodeList->nlist;
209 // Copy of QString's qMemCompare
210 bool QHashedString::compare(const QChar *lhs, const QChar *rhs, int length)
212 Q_ASSERT(lhs && rhs);
213 const quint16 *a = (const quint16 *)lhs;
214 const quint16 *b = (const quint16 *)rhs;
216 if (a == b || !length)
228 if ((sa.value & 2) == (sb.value & 2)) {
229 // both addresses have the same alignment
231 // both addresses are not aligned to 4-bytes boundaries
232 // compare the first character
239 // now both addresses are 4-bytes aligned
242 // both addresses are 4-bytes aligned
243 // do a fast 32-bit comparison
244 register const quint32 *e = sa.d + (length >> 1);
245 for ( ; sa.d != e; ++sa.d, ++sb.d) {
250 // do we have a tail?
251 return (length & 1) ? *sa.w == *sb.w : true;
253 // one of the addresses isn't 4-byte aligned but the other is
254 register const quint16 *e = sa.w + length;
255 for ( ; sa.w != e; ++sa.w, ++sb.w) {
264 static inline bool isUnicodeNonCharacter(uint ucs4)
266 // Unicode has a couple of "non-characters" that one can use internally,
267 // but are not allowed to be used for text interchange.
269 // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
270 // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
271 // U+FDEF (inclusive)
273 return (ucs4 & 0xfffe) == 0xfffe
274 || (ucs4 - 0xfdd0U) < 16;
277 static int utf8LengthFromUtf16(const QChar *uc, int len)
281 int surrogate_high = -1;
283 const QChar *ch = uc;
286 const QChar *end = ch + len;
288 uint u = ch->unicode();
289 if (surrogate_high >= 0) {
290 if (u >= 0xdc00 && u < 0xe000) {
291 u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000;
294 // high surrogate without low
300 } else if (u >= 0xdc00 && u < 0xe000) {
301 // low surrogate without high
305 } else if (u >= 0xd800 && u < 0xdc00) {
317 // is it one of the Unicode non-characters?
318 if (isUnicodeNonCharacter(u)) {
341 // Writes the utf8 version of uc to output. uc is of length len.
342 // There must be at least utf8LengthFromUtf16(uc, len) bytes in output.
343 // A null terminator is not written.
344 static void utf8FromUtf16(char *output, const QChar *uc, int len)
346 uchar replacement = '?';
347 int surrogate_high = -1;
349 uchar* cursor = (uchar*)output;
350 const QChar *ch = uc;
353 const QChar *end = ch + len;
355 uint u = ch->unicode();
356 if (surrogate_high >= 0) {
357 if (u >= 0xdc00 && u < 0xe000) {
358 u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000;
361 // high surrogate without low
362 *cursor = replacement;
368 } else if (u >= 0xdc00 && u < 0xe000) {
369 // low surrogate without high
370 *cursor = replacement;
374 } else if (u >= 0xd800 && u < 0xdc00) {
381 *cursor++ = (uchar)u;
384 *cursor++ = 0xc0 | ((uchar) (u >> 6));
386 // is it one of the Unicode non-characters?
387 if (isUnicodeNonCharacter(u)) {
388 *cursor++ = replacement;
395 *cursor++ = 0xf0 | ((uchar) (u >> 18));
396 *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
398 *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
400 *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f);
402 *cursor++ = 0x80 | ((uchar) (u&0x3f));
408 void QHashedStringRef::computeUtf8Length() const
411 m_utf8length = utf8LengthFromUtf16(m_data, m_length);
416 QHashedStringRef QHashedStringRef::mid(int offset, int length) const
418 Q_ASSERT(offset < m_length);
419 return QHashedStringRef(m_data + offset,
420 (length == -1 || (offset + length) > m_length)?(m_length - offset):length);
423 bool QHashedStringRef::endsWith(const QString &s) const
425 return s.length() < m_length &&
426 QHashedString::compare(s.constData(), m_data + m_length - s.length(), s.length());
429 bool QHashedStringRef::startsWith(const QString &s) const
431 return s.length() < m_length &&
432 QHashedString::compare(s.constData(), m_data, s.length());
435 QString QHashedStringRef::toString() const
439 return QString(m_data, m_length);
442 QByteArray QHashedStringRef::toUtf8() const
448 result.resize(utf8length());
449 writeUtf8(result.data());
453 void QHashedStringRef::writeUtf8(char *output) const
456 int ulen = utf8length();
457 if (ulen == m_length) {
458 // Must be a latin1 string
459 uchar *o = (uchar *)output;
460 const QChar *c = m_data;
462 *o++ = (uchar)((*c++).unicode());
464 utf8FromUtf16(output, m_data, m_length);
469 QString QHashedCStringRef::toUtf16() const
476 writeUtf16((uint16_t*)rv.data());