2 Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <string.h> /* memcmp,strlen */
28 #include <stddef.h> /* ptrdiff_t */
32 /* These macros use decltype or the earlier __typeof GNU extension.
33 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
34 when compiling c++ source) this code uses whatever method is needed
35 or, for VS2008 where neither is available, uses casting workarounds. */
36 #ifdef _MSC_VER /* MS compiler */
37 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
38 #define DECLTYPE(x) (decltype(x))
39 #else /* VS2008 or older (or VS2010 in C mode) */
43 #else /* GNU, Sun and other compilers */
44 #define DECLTYPE(x) (__typeof(x))
48 #define DECLTYPE_ASSIGN(dst,src) \
50 char **_da_dst = (char**)(&(dst)); \
51 *_da_dst = (char*)(src); \
54 #define DECLTYPE_ASSIGN(dst,src) \
56 (dst) = DECLTYPE(dst)(src); \
60 /* a number of the hash function use uint32_t which isn't defined on win32 */
62 typedef unsigned int uint32_t;
64 #include <inttypes.h> /* uint32_t */
67 #define UTHASH_VERSION 1.9.3
70 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
73 #define uthash_malloc(sz) OCMalloc(sz)
74 #define uthash_free(ptr,sz) OCFree(ptr)
76 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
77 #define uthash_expand_fyi(tbl) /* can be defined to log expands */
79 /* initial number of buckets */
80 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
81 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
82 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
84 /* calculate the element whose hash handle address is hhe */
85 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
87 #define HASH_FIND(hh,head,keyptr,keylen,out) \
89 unsigned _hf_bkt,_hf_hashv; \
92 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
93 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
94 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
101 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
102 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
103 #define HASH_BLOOM_MAKE(tbl) \
105 (tbl)->bloom_nbits = HASH_BLOOM; \
106 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
107 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
108 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
109 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
112 #define HASH_BLOOM_FREE(tbl) \
114 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
117 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
118 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
120 #define HASH_BLOOM_ADD(tbl,hashv) \
121 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
123 #define HASH_BLOOM_TEST(tbl,hashv) \
124 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
127 #define HASH_BLOOM_MAKE(tbl)
128 #define HASH_BLOOM_FREE(tbl)
129 #define HASH_BLOOM_ADD(tbl,hashv)
130 #define HASH_BLOOM_TEST(tbl,hashv) (1)
133 #define HASH_MAKE_TABLE(hh,head) \
135 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
136 sizeof(UT_hash_table)); \
137 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
138 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
139 (head)->hh.tbl->tail = &((head)->hh); \
140 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
141 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
142 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
143 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
144 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
145 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
146 memset((head)->hh.tbl->buckets, 0, \
147 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
148 HASH_BLOOM_MAKE((head)->hh.tbl); \
149 (head)->hh.tbl->signature = HASH_SIGNATURE; \
152 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \
153 HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add)
155 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
158 (add)->hh.next = NULL; \
159 (add)->hh.key = (char*)keyptr; \
160 (add)->hh.keylen = keylen_in; \
163 (head)->hh.prev = NULL; \
164 HASH_MAKE_TABLE(hh,head); \
166 (head)->hh.tbl->tail->next = (add); \
167 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
168 (head)->hh.tbl->tail = &((add)->hh); \
170 (head)->hh.tbl->num_items++; \
171 (add)->hh.tbl = (head)->hh.tbl; \
172 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
173 (add)->hh.hashv, _ha_bkt); \
174 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
175 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
176 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
177 HASH_FSCK(hh,head); \
180 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \
182 bkt = ((hashv) & ((num_bkts) - 1)); \
185 /* delete "delptr" from the hash table.
186 * "the usual" patch-up process for the app-order doubly-linked-list.
187 * The use of _hd_hh_del below deserves special explanation.
188 * These used to be expressed using (delptr) but that led to a bug
189 * if someone used the same symbol for the head and deletee, like
190 * HASH_DELETE(hh,users,users);
191 * We want that to work, but by changing the head (users) below
192 * we were forfeiting our ability to further refer to the deletee (users)
193 * in the patch-up process. Solution: use scratch space to
194 * copy the deletee pointer, then the latter references are via that
195 * scratch pointer rather than through the repointed (users) symbol.
197 #define HASH_DELETE(hh,head,delptr) \
200 struct UT_hash_handle *_hd_hh_del; \
201 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
202 uthash_free((head)->hh.tbl->buckets, \
203 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
204 HASH_BLOOM_FREE((head)->hh.tbl); \
205 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
208 _hd_hh_del = &((delptr)->hh); \
209 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
210 (head)->hh.tbl->tail = \
211 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \
212 (head)->hh.tbl->hho); \
214 if ((delptr)->hh.prev) { \
215 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \
216 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
218 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
220 if (_hd_hh_del->next) { \
221 ((UT_hash_handle*)((char*)_hd_hh_del->next + \
222 (head)->hh.tbl->hho))->prev = \
225 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
226 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
227 (head)->hh.tbl->num_items--; \
229 HASH_FSCK(hh,head); \
233 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
234 #define HASH_FIND_STR(head,findstr,out) \
235 HASH_FIND(hh,head,findstr,strlen(findstr),out)
236 #define HASH_ADD_STR(head,strfield,add) \
237 HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
238 #define HASH_FIND_INT(head,findint,out) \
239 HASH_FIND(hh,head,findint,sizeof(int),out)
240 #define HASH_ADD_INT(head,intfield,add) \
241 HASH_ADD(hh,head,intfield,sizeof(int),add)
242 #define HASH_FIND_PTR(head,findptr,out) \
243 HASH_FIND(hh,head,findptr,sizeof(void *),out)
244 #define HASH_ADD_PTR(head,ptrfield,add) \
245 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
246 #define HASH_DEL(head,delptr) \
247 HASH_DELETE(hh,head,delptr)
249 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
250 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
253 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
254 #define HASH_FSCK(hh,head) \
257 unsigned _count, _bkt_count; \
259 struct UT_hash_handle *_thh; \
262 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
264 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
267 if (_prev != (char*)(_thh->hh_prev)) { \
268 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
269 _thh->hh_prev, _prev ); \
272 _prev = (char*)(_thh); \
273 _thh = _thh->hh_next; \
275 _count += _bkt_count; \
276 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
277 HASH_OOPS("invalid bucket count %d, actual %d\n", \
278 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
281 if (_count != (head)->hh.tbl->num_items) { \
282 HASH_OOPS("invalid hh item count %d, actual %d\n", \
283 (head)->hh.tbl->num_items, _count ); \
285 /* traverse hh in app order; check next/prev integrity, count */ \
288 _thh = &(head)->hh; \
291 if (_prev !=(char*)(_thh->prev)) { \
292 HASH_OOPS("invalid prev %p, actual %p\n", \
293 _thh->prev, _prev ); \
295 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
296 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
297 (head)->hh.tbl->hho) : NULL ); \
299 if (_count != (head)->hh.tbl->num_items) { \
300 HASH_OOPS("invalid app item count %d, actual %d\n", \
301 (head)->hh.tbl->num_items, _count ); \
306 #define HASH_FSCK(hh,head)
309 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
310 * the descriptor to which this macro is defined for tuning the hash function.
311 * The app can #include <unistd.h> to get the prototype for write(2). */
312 #ifdef HASH_EMIT_KEYS
313 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
315 unsigned _klen = fieldlen; \
316 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
317 write(HASH_EMIT_KEYS, keyptr, fieldlen); \
320 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
323 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
325 #define HASH_FCN HASH_FUNCTION
327 #define HASH_FCN HASH_JEN
330 /* The Bernstein hash function, used in Perl prior to v5.6 */
331 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
333 unsigned _hb_keylen=keylen; \
334 char *_hb_key=(char*)(key); \
336 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
337 bkt = (hashv) & (num_bkts-1); \
341 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
342 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
343 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
346 char *_hs_key=(char*)(key); \
348 for(_sx_i=0; _sx_i < keylen; _sx_i++) \
349 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
350 bkt = hashv & (num_bkts-1); \
353 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
356 char *_hf_key=(char*)(key); \
357 hashv = 2166136261UL; \
358 for(_fn_i=0; _fn_i < keylen; _fn_i++) \
359 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
360 bkt = hashv & (num_bkts-1); \
363 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
366 char *_ho_key=(char*)(key); \
368 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
369 hashv += _ho_key[_ho_i]; \
370 hashv += (hashv << 10); \
371 hashv ^= (hashv >> 6); \
373 hashv += (hashv << 3); \
374 hashv ^= (hashv >> 11); \
375 hashv += (hashv << 15); \
376 bkt = hashv & (num_bkts-1); \
379 #define HASH_JEN_MIX(a,b,c) \
381 a -= b; a -= c; a ^= ( c >> 13 ); \
382 b -= c; b -= a; b ^= ( a << 8 ); \
383 c -= a; c -= b; c ^= ( b >> 13 ); \
384 a -= b; a -= c; a ^= ( c >> 12 ); \
385 b -= c; b -= a; b ^= ( a << 16 ); \
386 c -= a; c -= b; c ^= ( b >> 5 ); \
387 a -= b; a -= c; a ^= ( c >> 3 ); \
388 b -= c; b -= a; b ^= ( a << 10 ); \
389 c -= a; c -= b; c ^= ( b >> 15 ); \
392 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
394 unsigned _hj_i,_hj_j,_hj_k; \
395 char *_hj_key=(char*)(key); \
396 hashv = 0xfeedbeef; \
397 _hj_i = _hj_j = 0x9e3779b9; \
399 while (_hj_k >= 12) { \
400 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
401 + ( (unsigned)_hj_key[2] << 16 ) \
402 + ( (unsigned)_hj_key[3] << 24 ) ); \
403 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
404 + ( (unsigned)_hj_key[6] << 16 ) \
405 + ( (unsigned)_hj_key[7] << 24 ) ); \
406 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
407 + ( (unsigned)_hj_key[10] << 16 ) \
408 + ( (unsigned)_hj_key[11] << 24 ) ); \
410 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
417 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
418 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
419 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
420 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
421 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
422 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
423 case 5: _hj_j += _hj_key[4]; \
424 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
425 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
426 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
427 case 1: _hj_i += _hj_key[0]; \
429 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
430 bkt = hashv & (num_bkts-1); \
433 /* The Paul Hsieh hash function */
435 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
436 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
437 #define get16bits(d) (*((const uint16_t *) (d)))
440 #if !defined (get16bits)
441 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
442 +(uint32_t)(((const uint8_t *)(d))[0]) )
444 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
446 char *_sfh_key=(char*)(key); \
447 uint32_t _sfh_tmp, _sfh_len = keylen; \
449 int _sfh_rem = _sfh_len & 3; \
451 hashv = 0xcafebabe; \
454 for (;_sfh_len > 0; _sfh_len--) { \
455 hashv += get16bits (_sfh_key); \
456 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
457 hashv = (hashv << 16) ^ _sfh_tmp; \
458 _sfh_key += 2*sizeof (uint16_t); \
459 hashv += hashv >> 11; \
462 /* Handle end cases */ \
463 switch (_sfh_rem) { \
464 case 3: hashv += get16bits (_sfh_key); \
465 hashv ^= hashv << 16; \
466 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
467 hashv += hashv >> 11; \
469 case 2: hashv += get16bits (_sfh_key); \
470 hashv ^= hashv << 11; \
471 hashv += hashv >> 17; \
473 case 1: hashv += *_sfh_key; \
474 hashv ^= hashv << 10; \
475 hashv += hashv >> 1; \
478 /* Force "avalanching" of final 127 bits */ \
479 hashv ^= hashv << 3; \
480 hashv += hashv >> 5; \
481 hashv ^= hashv << 4; \
482 hashv += hashv >> 17; \
483 hashv ^= hashv << 25; \
484 hashv += hashv >> 6; \
485 bkt = hashv & (num_bkts-1); \
488 #ifdef HASH_USING_NO_STRICT_ALIASING
489 /* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads.
490 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
491 * So MurmurHash comes in two versions, the faster unaligned one and the slower
492 * aligned one. We only use the faster one on CPU's where we know it's safe.
494 * Note the preprocessor built-in defines can be emitted using:
496 * gcc -m64 -dM -E - < /dev/null (on gcc)
497 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
499 #if (defined(__i386__) || defined(__x86_64__))
500 #define HASH_MUR HASH_MUR_UNALIGNED
502 #define HASH_MUR HASH_MUR_ALIGNED
505 /* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */
506 #define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \
508 const unsigned int _mur_m = 0x5bd1e995; \
509 const int _mur_r = 24; \
510 hashv = 0xcafebabe ^ keylen; \
511 char *_mur_key = (char *)(key); \
512 uint32_t _mur_tmp, _mur_len = keylen; \
514 for (;_mur_len >= 4; _mur_len-=4) { \
515 _mur_tmp = *(uint32_t *)_mur_key; \
516 _mur_tmp *= _mur_m; \
517 _mur_tmp ^= _mur_tmp >> _mur_r; \
518 _mur_tmp *= _mur_m; \
526 case 3: hashv ^= _mur_key[2] << 16; \
527 case 2: hashv ^= _mur_key[1] << 8; \
528 case 1: hashv ^= _mur_key[0]; \
532 hashv ^= hashv >> 13; \
534 hashv ^= hashv >> 15; \
536 bkt = hashv & (num_bkts-1); \
539 /* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */
540 #define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \
542 const unsigned int _mur_m = 0x5bd1e995; \
543 const int _mur_r = 24; \
544 hashv = 0xcafebabe ^ (keylen); \
545 char *_mur_key = (char *)(key); \
546 uint32_t _mur_len = keylen; \
547 int _mur_align = (int)_mur_key & 3; \
549 if (_mur_align && (_mur_len >= 4)) { \
550 unsigned _mur_t = 0, _mur_d = 0; \
551 switch(_mur_align) { \
552 case 1: _mur_t |= _mur_key[2] << 16; \
553 case 2: _mur_t |= _mur_key[1] << 8; \
554 case 3: _mur_t |= _mur_key[0]; \
556 _mur_t <<= (8 * _mur_align); \
557 _mur_key += 4-_mur_align; \
558 _mur_len -= 4-_mur_align; \
559 int _mur_sl = 8 * (4-_mur_align); \
560 int _mur_sr = 8 * _mur_align; \
562 for (;_mur_len >= 4; _mur_len-=4) { \
563 _mur_d = *(unsigned *)_mur_key; \
564 _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \
565 unsigned _mur_k = _mur_t; \
567 _mur_k ^= _mur_k >> _mur_r; \
575 if(_mur_len >= _mur_align) { \
576 switch(_mur_align) { \
577 case 3: _mur_d |= _mur_key[2] << 16; \
578 case 2: _mur_d |= _mur_key[1] << 8; \
579 case 1: _mur_d |= _mur_key[0]; \
581 unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \
583 _mur_k ^= _mur_k >> _mur_r; \
587 _mur_k += _mur_align; \
588 _mur_len -= _mur_align; \
592 case 3: hashv ^= _mur_key[2] << 16; \
593 case 2: hashv ^= _mur_key[1] << 8; \
594 case 1: hashv ^= _mur_key[0]; \
600 case 3: _mur_d ^= _mur_key[2] << 16; \
601 case 2: _mur_d ^= _mur_key[1] << 8; \
602 case 1: _mur_d ^= _mur_key[0]; \
603 case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \
608 hashv ^= hashv >> 13; \
610 hashv ^= hashv >> 15; \
612 for (;_mur_len >= 4; _mur_len-=4) { \
613 unsigned _mur_k = *(unsigned*)_mur_key; \
615 _mur_k ^= _mur_k >> _mur_r; \
623 case 3: hashv ^= _mur_key[2] << 16; \
624 case 2: hashv ^= _mur_key[1] << 8; \
625 case 1: hashv ^= _mur_key[0]; \
629 hashv ^= hashv >> 13; \
631 hashv ^= hashv >> 15; \
633 bkt = hashv & (num_bkts-1); \
635 #endif /* HASH_USING_NO_STRICT_ALIASING */
637 /* key comparison function; return 0 if keys equal */
638 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
640 /* iterate over items in a known bucket to find desired item */
641 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
643 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
646 if (out->hh.keylen == keylen_in) { \
647 if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \
649 if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \
654 /* add an item to a bucket */
655 #define HASH_ADD_TO_BKT(head,addhh) \
658 (addhh)->hh_next = head.hh_head; \
659 (addhh)->hh_prev = NULL; \
660 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
661 (head).hh_head=addhh; \
662 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
663 && (addhh)->tbl->noexpand != 1) { \
664 HASH_EXPAND_BUCKETS((addhh)->tbl); \
668 /* remove an item from a given bucket */
669 #define HASH_DEL_IN_BKT(hh,head,hh_del) \
671 if ((head).hh_head == hh_del) { \
672 (head).hh_head = hh_del->hh_next; \
674 if (hh_del->hh_prev) { \
675 hh_del->hh_prev->hh_next = hh_del->hh_next; \
677 if (hh_del->hh_next) { \
678 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
681 /* Bucket expansion has the effect of doubling the number of buckets
682 * and redistributing the items into the new buckets. Ideally the
683 * items will distribute more or less evenly into the new buckets
684 * (the extent to which this is true is a measure of the quality of
685 * the hash function as it applies to the key domain).
687 * With the items distributed into more buckets, the chain length
688 * (item count) in each bucket is reduced. Thus by expanding buckets
689 * the hash keeps a bound on the chain length. This bounded chain
690 * length is the essence of how a hash provides constant time lookup.
692 * The calculation of tbl->ideal_chain_maxlen below deserves some
693 * explanation. First, keep in mind that we're calculating the ideal
694 * maximum chain length based on the *new* (doubled) bucket count.
695 * In fractions this is just n/b (n=number of items,b=new num buckets).
696 * Since the ideal chain length is an integer, we want to calculate
697 * ceil(n/b). We don't depend on floating point arithmetic in this
698 * hash, so to calculate ceil(n/b) with integers we could write
700 * ceil(n/b) = (n/b) + ((n%b)?1:0)
702 * and in fact a previous version of this hash did just that.
703 * But now we have improved things a bit by recognizing that b is
704 * always a power of two. We keep its base 2 log handy (call it lb),
705 * so now we can write this with a bit shift and logical AND:
707 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
710 #define HASH_EXPAND_BUCKETS(tbl) \
713 unsigned _he_bkt_i; \
714 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
715 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
716 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
717 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
718 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
719 memset(_he_new_buckets, 0, \
720 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
721 tbl->ideal_chain_maxlen = \
722 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
723 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
724 tbl->nonideal_items = 0; \
725 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
727 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
729 _he_hh_nxt = _he_thh->hh_next; \
730 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
731 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
732 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
733 tbl->nonideal_items++; \
734 _he_newbkt->expand_mult = _he_newbkt->count / \
735 tbl->ideal_chain_maxlen; \
737 _he_thh->hh_prev = NULL; \
738 _he_thh->hh_next = _he_newbkt->hh_head; \
739 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
741 _he_newbkt->hh_head = _he_thh; \
742 _he_thh = _he_hh_nxt; \
745 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
746 tbl->num_buckets *= 2; \
747 tbl->log2_num_buckets++; \
748 tbl->buckets = _he_new_buckets; \
749 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
750 (tbl->ineff_expands+1) : 0; \
751 if (tbl->ineff_expands > 1) { \
753 uthash_noexpand_fyi(tbl); \
755 uthash_expand_fyi(tbl); \
759 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
760 /* Note that HASH_SORT assumes the hash handle name to be hh.
761 * HASH_SRT was added to allow the hash handle name to be passed in. */
762 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
763 #define HASH_SRT(hh,head,cmpfcn) \
766 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
767 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
771 _hs_list = &((head)->hh); \
772 while (_hs_looping) { \
781 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
783 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
784 ((void*)((char*)(_hs_q->next) + \
785 (head)->hh.tbl->hho)) : NULL); \
786 if (! (_hs_q) ) break; \
788 _hs_qsize = _hs_insize; \
789 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
790 if (_hs_psize == 0) { \
792 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
793 ((void*)((char*)(_hs_q->next) + \
794 (head)->hh.tbl->hho)) : NULL); \
796 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
798 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
799 ((void*)((char*)(_hs_p->next) + \
800 (head)->hh.tbl->hho)) : NULL); \
803 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
804 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
807 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
808 ((void*)((char*)(_hs_p->next) + \
809 (head)->hh.tbl->hho)) : NULL); \
813 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
814 ((void*)((char*)(_hs_q->next) + \
815 (head)->hh.tbl->hho)) : NULL); \
819 _hs_tail->next = ((_hs_e) ? \
820 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
824 _hs_e->prev = ((_hs_tail) ? \
825 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
830 _hs_tail->next = NULL; \
831 if ( _hs_nmerges <= 1 ) { \
833 (head)->hh.tbl->tail = _hs_tail; \
834 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
838 HASH_FSCK(hh,head); \
842 /* This function selects items from one hash into another hash.
843 * The end result is that the selected items have dual presence
844 * in both hashes. There is no copy of the items made; rather
845 * they are added into the new hash through a secondary hash
846 * hash handle that must be present in the structure. */
847 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
849 unsigned _src_bkt, _dst_bkt; \
850 void *_last_elt=NULL, *_elt; \
851 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
852 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
854 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
855 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
857 _src_hh = _src_hh->hh_next) { \
858 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
860 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
861 _dst_hh->key = _src_hh->key; \
862 _dst_hh->keylen = _src_hh->keylen; \
863 _dst_hh->hashv = _src_hh->hashv; \
864 _dst_hh->prev = _last_elt; \
865 _dst_hh->next = NULL; \
866 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
868 DECLTYPE_ASSIGN(dst,_elt); \
869 HASH_MAKE_TABLE(hh_dst,dst); \
871 _dst_hh->tbl = (dst)->hh_dst.tbl; \
873 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
874 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
875 (dst)->hh_dst.tbl->num_items++; \
877 _last_elt_hh = _dst_hh; \
882 HASH_FSCK(hh_dst,dst); \
885 #define HASH_CLEAR(hh,head) \
888 uthash_free((head)->hh.tbl->buckets, \
889 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
890 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
896 #define HASH_ITER(hh,head,el,tmp) \
897 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
898 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
900 #define HASH_ITER(hh,head,el,tmp) \
901 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
902 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
905 /* obtain a count of items in the hash */
906 #define HASH_COUNT(head) HASH_CNT(hh,head)
907 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
909 typedef struct UT_hash_bucket {
910 struct UT_hash_handle *hh_head;
913 /* expand_mult is normally set to 0. In this situation, the max chain length
914 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
915 * the bucket's chain exceeds this length, bucket expansion is triggered).
916 * However, setting expand_mult to a non-zero value delays bucket expansion
917 * (that would be triggered by additions to this particular bucket)
918 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
919 * (The multiplier is simply expand_mult+1). The whole idea of this
920 * multiplier is to reduce bucket expansions, since they are expensive, in
921 * situations where we know that a particular bucket tends to be overused.
922 * It is better to let its chain length grow to a longer yet-still-bounded
923 * value, than to do an O(n) bucket expansion too often.
925 unsigned expand_mult;
929 /* random signature used only to find hash tables in external analysis */
930 #define HASH_SIGNATURE 0xa0111fe1
931 #define HASH_BLOOM_SIGNATURE 0xb12220f2
933 typedef struct UT_hash_table {
934 UT_hash_bucket *buckets;
935 unsigned num_buckets, log2_num_buckets;
937 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
938 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
940 /* in an ideal situation (all buckets used equally), no bucket would have
941 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
942 unsigned ideal_chain_maxlen;
944 /* nonideal_items is the number of items in the hash whose chain position
945 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
946 * hash distribution; reaching them in a chain traversal takes >ideal steps */
947 unsigned nonideal_items;
949 /* ineffective expands occur when a bucket doubling was performed, but
950 * afterward, more than half the items in the hash had nonideal chain
951 * positions. If this happens on two consecutive expansions we inhibit any
952 * further expansion, as it's not helping; this happens when the hash
953 * function isn't a good fit for the key domain. When expansion is inhibited
954 * the hash will still work, albeit no longer in constant time. */
955 unsigned ineff_expands, noexpand;
957 uint32_t signature; /* used only to find hash tables in external analysis */
959 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
966 typedef struct UT_hash_handle {
967 struct UT_hash_table *tbl;
968 void *prev; /* prev element in app order */
969 void *next; /* next element in app order */
970 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
971 struct UT_hash_handle *hh_next; /* next hh in bucket order */
972 void *key; /* ptr to enclosing struct's key */
973 unsigned keylen; /* enclosing struct's key len */
974 unsigned hashv; /* result of hash-fcn(key) */
977 #endif /* UTHASH_H */