2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <michael@metaparadigm.com>
6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P.
8 * This library is free software; you can redistribute it and/or modify
9 * it under the terms of the MIT license. See COPYING for details.
23 # include <endian.h> /* attempt to define endianness */
26 #if defined(_MSC_VER) || defined(__MINGW32__)
27 # define WIN32_LEAN_AND_MEAN
28 # include <windows.h> /* Get InterlockedCompareExchange */
31 #include "random_seed.h"
35 static unsigned long lh_char_hash(const void *k);
36 static unsigned long lh_perllike_str_hash(const void *k);
37 static lh_hash_fn *char_hash_fn = lh_char_hash;
40 json_global_set_string_hash(const int h)
43 case JSON_C_STR_HASH_DFLT:
44 char_hash_fn = lh_char_hash;
46 case JSON_C_STR_HASH_PERLLIKE:
47 char_hash_fn = lh_perllike_str_hash;
55 void lh_abort(const char *msg, ...)
64 static unsigned long lh_ptr_hash(const void *k)
66 /* CAW: refactored to be 64bit nice */
67 return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX);
70 int lh_ptr_equal(const void *k1, const void *k2)
76 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
77 * http://burtleburtle.net/bob/c/lookup3.c
78 * minor modifications to make functions static so no symbols are exported
79 * minor mofifications to compile with -Werror
83 -------------------------------------------------------------------------------
84 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
86 These are functions for producing 32-bit hashes for hash table lookup.
87 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
88 are externally useful functions. Routines to test the hash are included
89 if SELF_TEST is defined. You can use this free for any purpose. It's in
90 the public domain. It has no warranty.
92 You probably want to use hashlittle(). hashlittle() and hashbig()
93 hash byte arrays. hashlittle() is is faster than hashbig() on
94 little-endian machines. Intel and AMD are little-endian machines.
95 On second thought, you probably want hashlittle2(), which is identical to
96 hashlittle() except it returns two 32-bit hashes for the price of one.
97 You could implement hashbig2() if you wanted but I haven't bothered here.
99 If you want to find a hash of, say, exactly 7 integers, do
100 a = i1; b = i2; c = i3;
102 a += i4; b += i5; c += i6;
106 then use c as the hash value. If you have a variable length array of
107 4-byte integers to hash, use hashword(). If you have a byte array (like
108 a character string), use hashlittle(). If you have several byte arrays, or
109 a mix of things, see the comments above hashlittle().
111 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
112 then mix those integers. This is fast (you can do a lot more thorough
113 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
114 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
115 -------------------------------------------------------------------------------
119 * My best guess at if you are big-endian or little-endian. This may
122 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
123 __BYTE_ORDER == __LITTLE_ENDIAN) || \
124 (defined(i386) || defined(__i386__) || defined(__i486__) || \
125 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
126 # define HASH_LITTLE_ENDIAN 1
127 # define HASH_BIG_ENDIAN 0
128 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
129 __BYTE_ORDER == __BIG_ENDIAN) || \
130 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
131 # define HASH_LITTLE_ENDIAN 0
132 # define HASH_BIG_ENDIAN 1
134 # define HASH_LITTLE_ENDIAN 0
135 # define HASH_BIG_ENDIAN 0
138 #define hashsize(n) ((uint32_t)1<<(n))
139 #define hashmask(n) (hashsize(n)-1)
140 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
143 -------------------------------------------------------------------------------
144 mix -- mix 3 32-bit values reversibly.
146 This is reversible, so any information in (a,b,c) before mix() is
147 still in (a,b,c) after mix().
149 If four pairs of (a,b,c) inputs are run through mix(), or through
150 mix() in reverse, there are at least 32 bits of the output that
151 are sometimes the same for one pair and different for another pair.
153 * pairs that differed by one bit, by two bits, in any combination
154 of top bits of (a,b,c), or in any combination of bottom bits of
156 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
157 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
158 is commonly produced by subtraction) look like a single 1-bit
160 * the base values were pseudorandom, all zero but one bit set, or
161 all zero plus a counter that starts at zero.
163 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
168 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
169 for "differ" defined as + with a one-bit base and a two-bit delta. I
170 used http://burtleburtle.net/bob/hash/avalanche.html to choose
171 the operations, constants, and arrangements of the variables.
173 This does not achieve avalanche. There are input bits of (a,b,c)
174 that fail to affect some output bits of (a,b,c), especially of a. The
175 most thoroughly mixed value is c, but it doesn't really even achieve
178 This allows some parallelism. Read-after-writes are good at doubling
179 the number of bits affected, so the goal of mixing pulls in the opposite
180 direction as the goal of parallelism. I did what I could. Rotates
181 seem to cost as much as shifts on every machine I could lay my hands
182 on, and rotates are much kinder to the top and bottom bits, so I used
184 -------------------------------------------------------------------------------
188 a -= c; a ^= rot(c, 4); c += b; \
189 b -= a; b ^= rot(a, 6); a += c; \
190 c -= b; c ^= rot(b, 8); b += a; \
191 a -= c; a ^= rot(c,16); c += b; \
192 b -= a; b ^= rot(a,19); a += c; \
193 c -= b; c ^= rot(b, 4); b += a; \
197 -------------------------------------------------------------------------------
198 final -- final mixing of 3 32-bit values (a,b,c) into c
200 Pairs of (a,b,c) values differing in only a few bits will usually
201 produce values of c that look totally different. This was tested for
202 * pairs that differed by one bit, by two bits, in any combination
203 of top bits of (a,b,c), or in any combination of bottom bits of
205 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
206 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
207 is commonly produced by subtraction) look like a single 1-bit
209 * the base values were pseudorandom, all zero but one bit set, or
210 all zero plus a counter that starts at zero.
212 These constants passed:
215 and these came close:
219 -------------------------------------------------------------------------------
221 #define final(a,b,c) \
223 c ^= b; c -= rot(b,14); \
224 a ^= c; a -= rot(c,11); \
225 b ^= a; b -= rot(a,25); \
226 c ^= b; c -= rot(b,16); \
227 a ^= c; a -= rot(c,4); \
228 b ^= a; b -= rot(a,14); \
229 c ^= b; c -= rot(b,24); \
234 -------------------------------------------------------------------------------
235 hashlittle() -- hash a variable-length key into a 32-bit value
236 k : the key (the unaligned variable-length array of bytes)
237 length : the length of the key, counting by bytes
238 initval : can be any 4-byte value
239 Returns a 32-bit value. Every bit of the key affects every bit of
240 the return value. Two keys differing by one or two bits will have
241 totally different hash values.
243 The best hash table sizes are powers of 2. There is no need to do
244 mod a prime (mod is sooo slow!). If you need less than 32 bits,
245 use a bitmask. For example, if you need only 10 bits, do
246 h = (h & hashmask(10));
247 In which case, the hash table should have hashsize(10) elements.
249 If you are hashing n strings (uint8_t **)k, do it like this:
250 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
252 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
253 code any way you wish, private, educational, or commercial. It's free.
255 Use for hash table lookup, or anything where one collision in 2^^32 is
256 acceptable. Do NOT use for cryptographic purposes.
257 -------------------------------------------------------------------------------
261 /* Prevent ASan bug report */
263 __attribute__((no_sanitize_address))
265 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
267 uint32_t a,b,c; /* internal state */
268 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
270 /* Set up the internal state */
271 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
274 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
275 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
277 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
288 /*----------------------------- handle the last (probably partial) block */
290 * "k[2]&0xffffff" actually reads beyond the end of the string, but
291 * then masks off the part it's not allowed to read. Because the
292 * string is aligned, the masked-off tail is in the same word as the
293 * rest of the string. Every machine with memory protection I've seen
294 * does it on word boundaries, so is OK with this. But VALGRIND will
295 * still catch it and complain. The masking trick does make the hash
296 * noticably faster for short strings (like English words).
297 * AddressSanitizer is similarly picky about overrunning
298 * the buffer. (http://clang.llvm.org/docs/AddressSanitizer.html
301 # define PRECISE_MEMORY_ACCESS 1
302 #elif defined(__SANITIZE_ADDRESS__) /* GCC's ASAN */
303 # define PRECISE_MEMORY_ACCESS 1
304 #elif defined(__has_feature)
305 # if __has_feature(address_sanitizer) /* Clang's ASAN */
306 # define PRECISE_MEMORY_ACCESS 1
309 #ifndef PRECISE_MEMORY_ACCESS
313 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
314 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
315 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
316 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
317 case 8 : b+=k[1]; a+=k[0]; break;
318 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
319 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
320 case 5 : b+=k[1]&0xff; a+=k[0]; break;
321 case 4 : a+=k[0]; break;
322 case 3 : a+=k[0]&0xffffff; break;
323 case 2 : a+=k[0]&0xffff; break;
324 case 1 : a+=k[0]&0xff; break;
325 case 0 : return c; /* zero length strings require no mixing */
328 #else /* make valgrind happy */
330 const uint8_t *k8 = (const uint8_t *)k;
333 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
334 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
335 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
336 case 9 : c+=k8[8]; /* fall through */
337 case 8 : b+=k[1]; a+=k[0]; break;
338 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
339 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
340 case 5 : b+=k8[4]; /* fall through */
341 case 4 : a+=k[0]; break;
342 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
343 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
344 case 1 : a+=k8[0]; break;
348 #endif /* !valgrind */
350 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
351 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
354 /*--------------- all but last block: aligned reads and different mixing */
357 a += k[0] + (((uint32_t)k[1])<<16);
358 b += k[2] + (((uint32_t)k[3])<<16);
359 c += k[4] + (((uint32_t)k[5])<<16);
365 /*----------------------------- handle the last (probably partial) block */
366 k8 = (const uint8_t *)k;
369 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
370 b+=k[2]+(((uint32_t)k[3])<<16);
371 a+=k[0]+(((uint32_t)k[1])<<16);
373 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
375 b+=k[2]+(((uint32_t)k[3])<<16);
376 a+=k[0]+(((uint32_t)k[1])<<16);
378 case 9 : c+=k8[8]; /* fall through */
379 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
380 a+=k[0]+(((uint32_t)k[1])<<16);
382 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
384 a+=k[0]+(((uint32_t)k[1])<<16);
386 case 5 : b+=k8[4]; /* fall through */
387 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
389 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
394 case 0 : return c; /* zero length requires no mixing */
397 } else { /* need to read the key one byte at a time */
398 const uint8_t *k = (const uint8_t *)key;
400 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
404 a += ((uint32_t)k[1])<<8;
405 a += ((uint32_t)k[2])<<16;
406 a += ((uint32_t)k[3])<<24;
408 b += ((uint32_t)k[5])<<8;
409 b += ((uint32_t)k[6])<<16;
410 b += ((uint32_t)k[7])<<24;
412 c += ((uint32_t)k[9])<<8;
413 c += ((uint32_t)k[10])<<16;
414 c += ((uint32_t)k[11])<<24;
420 /*-------------------------------- last block: affect all 32 bits of (c) */
421 switch(length) /* all the case statements fall through */
423 case 12: c+=((uint32_t)k[11])<<24; /* FALLTHRU */
424 case 11: c+=((uint32_t)k[10])<<16; /* FALLTHRU */
425 case 10: c+=((uint32_t)k[9])<<8; /* FALLTHRU */
426 case 9 : c+=k[8]; /* FALLTHRU */
427 case 8 : b+=((uint32_t)k[7])<<24; /* FALLTHRU */
428 case 7 : b+=((uint32_t)k[6])<<16; /* FALLTHRU */
429 case 6 : b+=((uint32_t)k[5])<<8; /* FALLTHRU */
430 case 5 : b+=k[4]; /* FALLTHRU */
431 case 4 : a+=((uint32_t)k[3])<<24; /* FALLTHRU */
432 case 3 : a+=((uint32_t)k[2])<<16; /* FALLTHRU */
433 case 2 : a+=((uint32_t)k[1])<<8; /* FALLTHRU */
444 /* a simple hash function similiar to what perl does for strings.
445 * for good results, the string should not be excessivly large.
447 static unsigned long lh_perllike_str_hash(const void *k)
449 const char *rkey = (const char *)k;
450 unsigned hashval = 1;
453 hashval = hashval * 33 + *rkey++;
458 static unsigned long lh_char_hash(const void *k)
460 #if defined _MSC_VER || defined __MINGW32__
461 #define RANDOM_SEED_TYPE LONG
463 #define RANDOM_SEED_TYPE int
465 static volatile RANDOM_SEED_TYPE random_seed = -1;
467 if (random_seed == -1) {
468 RANDOM_SEED_TYPE seed;
469 /* we can't use -1 as it is the unitialized sentinel */
470 while ((seed = json_c_get_random_seed()) == -1);
471 #if SIZEOF_INT == 8 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
472 #define USE_SYNC_COMPARE_AND_SWAP 1
474 #if SIZEOF_INT == 4 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
475 #define USE_SYNC_COMPARE_AND_SWAP 1
477 #if SIZEOF_INT == 2 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
478 #define USE_SYNC_COMPARE_AND_SWAP 1
480 #if defined USE_SYNC_COMPARE_AND_SWAP
481 (void)__sync_val_compare_and_swap(&random_seed, -1, seed);
482 #elif defined _MSC_VER || defined __MINGW32__
483 InterlockedCompareExchange(&random_seed, seed, -1);
485 //#warning "racy random seed initializtion if used by multiple threads"
486 random_seed = seed; /* potentially racy */
490 return hashlittle((const char*)k, strlen((const char*)k), random_seed);
493 int lh_char_equal(const void *k1, const void *k2)
495 return (strcmp((const char*)k1, (const char*)k2) == 0);
498 struct lh_table* lh_table_new(int size,
499 lh_entry_free_fn *free_fn,
501 lh_equal_fn *equal_fn)
506 t = (struct lh_table*)calloc(1, sizeof(struct lh_table));
512 t->table = (struct lh_entry*)calloc(size, sizeof(struct lh_entry));
518 t->free_fn = free_fn;
519 t->hash_fn = hash_fn;
520 t->equal_fn = equal_fn;
521 for(i = 0; i < size; i++) t->table[i].k = LH_EMPTY;
525 struct lh_table* lh_kchar_table_new(int size,
526 lh_entry_free_fn *free_fn)
528 return lh_table_new(size, free_fn, char_hash_fn, lh_char_equal);
531 struct lh_table* lh_kptr_table_new(int size,
532 lh_entry_free_fn *free_fn)
534 return lh_table_new(size, free_fn, lh_ptr_hash, lh_ptr_equal);
537 int lh_table_resize(struct lh_table *t, int new_size)
539 struct lh_table *new_t;
540 struct lh_entry *ent;
542 new_t = lh_table_new(new_size, NULL, t->hash_fn, t->equal_fn);
546 for (ent = t->head; ent != NULL; ent = ent->next)
548 unsigned long h = lh_get_hash(new_t, ent->k);
549 unsigned int opts = 0;
550 if (ent->k_is_constant)
551 opts = JSON_C_OBJECT_KEY_IS_CONSTANT;
552 if (lh_table_insert_w_hash(new_t, ent->k, ent->v, h, opts) != 0)
554 lh_table_free(new_t);
559 t->table = new_t->table;
561 t->head = new_t->head;
562 t->tail = new_t->tail;
568 void lh_table_free(struct lh_table *t)
572 for(c = t->head; c != NULL; c = c->next)
580 int lh_table_insert_w_hash(struct lh_table *t, const void *k, const void *v, const unsigned long h, const unsigned opts)
584 if (t->count >= t->size * LH_LOAD_FACTOR)
585 if (lh_table_resize(t, t->size * 2) != 0)
591 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) break;
592 if ((int)++n == t->size) n = 0;
596 t->table[n].k_is_constant = (opts & JSON_C_OBJECT_KEY_IS_CONSTANT);
600 if(t->head == NULL) {
601 t->head = t->tail = &t->table[n];
602 t->table[n].next = t->table[n].prev = NULL;
604 t->tail->next = &t->table[n];
605 t->table[n].prev = t->tail;
606 t->table[n].next = NULL;
607 t->tail = &t->table[n];
612 int lh_table_insert(struct lh_table *t, const void *k, const void *v)
614 return lh_table_insert_w_hash(t, k, v, lh_get_hash(t, k), 0);
618 struct lh_entry* lh_table_lookup_entry_w_hash(struct lh_table *t, const void *k, const unsigned long h)
620 unsigned long n = h % t->size;
623 while( count < t->size ) {
624 if(t->table[n].k == LH_EMPTY) return NULL;
625 if(t->table[n].k != LH_FREED &&
626 t->equal_fn(t->table[n].k, k)) return &t->table[n];
627 if ((int)++n == t->size) n = 0;
633 struct lh_entry* lh_table_lookup_entry(struct lh_table *t, const void *k)
635 return lh_table_lookup_entry_w_hash(t, k, lh_get_hash(t, k));
638 const void* lh_table_lookup(struct lh_table *t, const void *k)
641 lh_table_lookup_ex(t, k, &result);
645 json_bool lh_table_lookup_ex(struct lh_table* t, const void* k, void **v)
647 struct lh_entry *e = lh_table_lookup_entry(t, k);
649 if (v != NULL) *v = lh_entry_v(e);
650 return TRUE; /* key found */
652 if (v != NULL) *v = NULL;
653 return FALSE; /* key not found */
656 int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e)
658 ptrdiff_t n = (ptrdiff_t)(e - t->table); /* CAW: fixed to be 64bit nice, still need the crazy negative case... */
660 /* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */
661 if(n < 0) { return -2; }
663 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) return -1;
665 if(t->free_fn) t->free_fn(e);
666 t->table[n].v = NULL;
667 t->table[n].k = LH_FREED;
668 if(t->tail == &t->table[n] && t->head == &t->table[n]) {
669 t->head = t->tail = NULL;
670 } else if (t->head == &t->table[n]) {
671 t->head->next->prev = NULL;
672 t->head = t->head->next;
673 } else if (t->tail == &t->table[n]) {
674 t->tail->prev->next = NULL;
675 t->tail = t->tail->prev;
677 t->table[n].prev->next = t->table[n].next;
678 t->table[n].next->prev = t->table[n].prev;
680 t->table[n].next = t->table[n].prev = NULL;
685 int lh_table_delete(struct lh_table *t, const void *k)
687 struct lh_entry *e = lh_table_lookup_entry(t, k);
689 return lh_table_delete_entry(t, e);
692 int lh_table_length(struct lh_table *t)