2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <michael@metaparadigm.com>
6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P.
8 * This library is free software; you can redistribute it and/or modify
9 * it under the terms of the MIT license. See COPYING for details.
24 #include <endian.h> /* attempt to define endianness */
27 #if defined(_MSC_VER) || defined(__MINGW32__)
28 #define WIN32_LEAN_AND_MEAN
29 #include <windows.h> /* Get InterlockedCompareExchange */
33 #include "random_seed.h"
36 static unsigned long lh_char_hash(const void *k);
37 static unsigned long lh_perllike_str_hash(const void *k);
38 static lh_hash_fn *char_hash_fn = lh_char_hash;
40 /* comparison functions */
41 int lh_char_equal(const void *k1, const void *k2);
42 int lh_ptr_equal(const void *k1, const void *k2);
44 int json_global_set_string_hash(const int h)
48 case JSON_C_STR_HASH_DFLT: char_hash_fn = lh_char_hash; break;
49 case JSON_C_STR_HASH_PERLLIKE: char_hash_fn = lh_perllike_str_hash; break;
55 static unsigned long lh_ptr_hash(const void *k)
57 /* CAW: refactored to be 64bit nice */
58 return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX);
61 int lh_ptr_equal(const void *k1, const void *k2)
67 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
68 * http://burtleburtle.net/bob/c/lookup3.c
69 * minor modifications to make functions static so no symbols are exported
70 * minor mofifications to compile with -Werror
74 -------------------------------------------------------------------------------
75 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
77 These are functions for producing 32-bit hashes for hash table lookup.
78 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
79 are externally useful functions. Routines to test the hash are included
80 if SELF_TEST is defined. You can use this free for any purpose. It's in
81 the public domain. It has no warranty.
83 You probably want to use hashlittle(). hashlittle() and hashbig()
84 hash byte arrays. hashlittle() is is faster than hashbig() on
85 little-endian machines. Intel and AMD are little-endian machines.
86 On second thought, you probably want hashlittle2(), which is identical to
87 hashlittle() except it returns two 32-bit hashes for the price of one.
88 You could implement hashbig2() if you wanted but I haven't bothered here.
90 If you want to find a hash of, say, exactly 7 integers, do
91 a = i1; b = i2; c = i3;
93 a += i4; b += i5; c += i6;
97 then use c as the hash value. If you have a variable length array of
98 4-byte integers to hash, use hashword(). If you have a byte array (like
99 a character string), use hashlittle(). If you have several byte arrays, or
100 a mix of things, see the comments above hashlittle().
102 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
103 then mix those integers. This is fast (you can do a lot more thorough
104 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
105 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
106 -------------------------------------------------------------------------------
110 * My best guess at if you are big-endian or little-endian. This may
113 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || \
114 (defined(i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || \
115 defined(__i686__) || defined(vax) || defined(MIPSEL))
116 #define HASH_LITTLE_ENDIAN 1
117 #define HASH_BIG_ENDIAN 0
118 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || \
119 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
120 #define HASH_LITTLE_ENDIAN 0
121 #define HASH_BIG_ENDIAN 1
123 #define HASH_LITTLE_ENDIAN 0
124 #define HASH_BIG_ENDIAN 0
127 #define hashsize(n) ((uint32_t)1 << (n))
128 #define hashmask(n) (hashsize(n) - 1)
129 #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
132 -------------------------------------------------------------------------------
133 mix -- mix 3 32-bit values reversibly.
135 This is reversible, so any information in (a,b,c) before mix() is
136 still in (a,b,c) after mix().
138 If four pairs of (a,b,c) inputs are run through mix(), or through
139 mix() in reverse, there are at least 32 bits of the output that
140 are sometimes the same for one pair and different for another pair.
142 * pairs that differed by one bit, by two bits, in any combination
143 of top bits of (a,b,c), or in any combination of bottom bits of
145 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
146 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
147 is commonly produced by subtraction) look like a single 1-bit
149 * the base values were pseudorandom, all zero but one bit set, or
150 all zero plus a counter that starts at zero.
152 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
157 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
158 for "differ" defined as + with a one-bit base and a two-bit delta. I
159 used http://burtleburtle.net/bob/hash/avalanche.html to choose
160 the operations, constants, and arrangements of the variables.
162 This does not achieve avalanche. There are input bits of (a,b,c)
163 that fail to affect some output bits of (a,b,c), especially of a. The
164 most thoroughly mixed value is c, but it doesn't really even achieve
167 This allows some parallelism. Read-after-writes are good at doubling
168 the number of bits affected, so the goal of mixing pulls in the opposite
169 direction as the goal of parallelism. I did what I could. Rotates
170 seem to cost as much as shifts on every machine I could lay my hands
171 on, and rotates are much kinder to the top and bottom bits, so I used
173 -------------------------------------------------------------------------------
175 /* clang-format off */
178 a -= c; a ^= rot(c, 4); c += b; \
179 b -= a; b ^= rot(a, 6); a += c; \
180 c -= b; c ^= rot(b, 8); b += a; \
181 a -= c; a ^= rot(c,16); c += b; \
182 b -= a; b ^= rot(a,19); a += c; \
183 c -= b; c ^= rot(b, 4); b += a; \
185 /* clang-format on */
188 -------------------------------------------------------------------------------
189 final -- final mixing of 3 32-bit values (a,b,c) into c
191 Pairs of (a,b,c) values differing in only a few bits will usually
192 produce values of c that look totally different. This was tested for
193 * pairs that differed by one bit, by two bits, in any combination
194 of top bits of (a,b,c), or in any combination of bottom bits of
196 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
197 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
198 is commonly produced by subtraction) look like a single 1-bit
200 * the base values were pseudorandom, all zero but one bit set, or
201 all zero plus a counter that starts at zero.
203 These constants passed:
206 and these came close:
210 -------------------------------------------------------------------------------
212 /* clang-format off */
213 #define final(a,b,c) \
215 c ^= b; c -= rot(b,14); \
216 a ^= c; a -= rot(c,11); \
217 b ^= a; b -= rot(a,25); \
218 c ^= b; c -= rot(b,16); \
219 a ^= c; a -= rot(c,4); \
220 b ^= a; b -= rot(a,14); \
221 c ^= b; c -= rot(b,24); \
223 /* clang-format on */
226 -------------------------------------------------------------------------------
227 hashlittle() -- hash a variable-length key into a 32-bit value
228 k : the key (the unaligned variable-length array of bytes)
229 length : the length of the key, counting by bytes
230 initval : can be any 4-byte value
231 Returns a 32-bit value. Every bit of the key affects every bit of
232 the return value. Two keys differing by one or two bits will have
233 totally different hash values.
235 The best hash table sizes are powers of 2. There is no need to do
236 mod a prime (mod is sooo slow!). If you need less than 32 bits,
237 use a bitmask. For example, if you need only 10 bits, do
238 h = (h & hashmask(10));
239 In which case, the hash table should have hashsize(10) elements.
241 If you are hashing n strings (uint8_t **)k, do it like this:
242 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
244 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
245 code any way you wish, private, educational, or commercial. It's free.
247 Use for hash table lookup, or anything where one collision in 2^^32 is
248 acceptable. Do NOT use for cryptographic purposes.
249 -------------------------------------------------------------------------------
252 /* clang-format off */
253 static uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
255 uint32_t a,b,c; /* internal state */
260 } u; /* needed for Mac Powerbook G4 */
262 /* Set up the internal state */
263 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
266 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
267 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
269 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
280 /*----------------------------- handle the last (probably partial) block */
282 * "k[2]&0xffffff" actually reads beyond the end of the string, but
283 * then masks off the part it's not allowed to read. Because the
284 * string is aligned, the masked-off tail is in the same word as the
285 * rest of the string. Every machine with memory protection I've seen
286 * does it on word boundaries, so is OK with this. But VALGRIND will
287 * still catch it and complain. The masking trick does make the hash
288 * noticably faster for short strings (like English words).
289 * AddressSanitizer is similarly picky about overrunning
290 * the buffer. (http://clang.llvm.org/docs/AddressSanitizer.html
293 #define PRECISE_MEMORY_ACCESS 1
294 #elif defined(__SANITIZE_ADDRESS__) /* GCC's ASAN */
295 #define PRECISE_MEMORY_ACCESS 1
296 #elif defined(__has_feature)
297 #if __has_feature(address_sanitizer) /* Clang's ASAN */
298 #define PRECISE_MEMORY_ACCESS 1
301 #ifndef PRECISE_MEMORY_ACCESS
305 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
306 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
307 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
308 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
309 case 8 : b+=k[1]; a+=k[0]; break;
310 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
311 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
312 case 5 : b+=k[1]&0xff; a+=k[0]; break;
313 case 4 : a+=k[0]; break;
314 case 3 : a+=k[0]&0xffffff; break;
315 case 2 : a+=k[0]&0xffff; break;
316 case 1 : a+=k[0]&0xff; break;
317 case 0 : return c; /* zero length strings require no mixing */
320 #else /* make valgrind happy */
322 const uint8_t *k8 = (const uint8_t *)k;
325 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
326 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
327 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
328 case 9 : c+=k8[8]; /* fall through */
329 case 8 : b+=k[1]; a+=k[0]; break;
330 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
331 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
332 case 5 : b+=k8[4]; /* fall through */
333 case 4 : a+=k[0]; break;
334 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
335 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
336 case 1 : a+=k8[0]; break;
340 #endif /* !valgrind */
343 else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0))
345 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
348 /*--------------- all but last block: aligned reads and different mixing */
351 a += k[0] + (((uint32_t)k[1])<<16);
352 b += k[2] + (((uint32_t)k[3])<<16);
353 c += k[4] + (((uint32_t)k[5])<<16);
359 /*----------------------------- handle the last (probably partial) block */
360 k8 = (const uint8_t *)k;
363 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
364 b+=k[2]+(((uint32_t)k[3])<<16);
365 a+=k[0]+(((uint32_t)k[1])<<16);
367 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
369 b+=k[2]+(((uint32_t)k[3])<<16);
370 a+=k[0]+(((uint32_t)k[1])<<16);
372 case 9 : c+=k8[8]; /* fall through */
373 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
374 a+=k[0]+(((uint32_t)k[1])<<16);
376 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
378 a+=k[0]+(((uint32_t)k[1])<<16);
380 case 5 : b+=k8[4]; /* fall through */
381 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
383 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
388 case 0 : return c; /* zero length requires no mixing */
394 /* need to read the key one byte at a time */
395 const uint8_t *k = (const uint8_t *)key;
397 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
401 a += ((uint32_t)k[1])<<8;
402 a += ((uint32_t)k[2])<<16;
403 a += ((uint32_t)k[3])<<24;
405 b += ((uint32_t)k[5])<<8;
406 b += ((uint32_t)k[6])<<16;
407 b += ((uint32_t)k[7])<<24;
409 c += ((uint32_t)k[9])<<8;
410 c += ((uint32_t)k[10])<<16;
411 c += ((uint32_t)k[11])<<24;
417 /*-------------------------------- last block: affect all 32 bits of (c) */
418 switch(length) /* all the case statements fall through */
420 case 12: c+=((uint32_t)k[11])<<24; /* FALLTHRU */
421 case 11: c+=((uint32_t)k[10])<<16; /* FALLTHRU */
422 case 10: c+=((uint32_t)k[9])<<8; /* FALLTHRU */
423 case 9 : c+=k[8]; /* FALLTHRU */
424 case 8 : b+=((uint32_t)k[7])<<24; /* FALLTHRU */
425 case 7 : b+=((uint32_t)k[6])<<16; /* FALLTHRU */
426 case 6 : b+=((uint32_t)k[5])<<8; /* FALLTHRU */
427 case 5 : b+=k[4]; /* FALLTHRU */
428 case 4 : a+=((uint32_t)k[3])<<24; /* FALLTHRU */
429 case 3 : a+=((uint32_t)k[2])<<16; /* FALLTHRU */
430 case 2 : a+=((uint32_t)k[1])<<8; /* FALLTHRU */
440 /* clang-format on */
442 /* a simple hash function similiar to what perl does for strings.
443 * for good results, the string should not be excessivly large.
445 static unsigned long lh_perllike_str_hash(const void *k)
447 const char *rkey = (const char *)k;
448 unsigned hashval = 1;
451 hashval = hashval * 33 + *rkey++;
456 static unsigned long lh_char_hash(const void *k)
458 #if defined _MSC_VER || defined __MINGW32__
459 #define RANDOM_SEED_TYPE LONG
461 #define RANDOM_SEED_TYPE int
463 static volatile RANDOM_SEED_TYPE random_seed = -1;
465 if (random_seed == -1)
467 RANDOM_SEED_TYPE seed;
468 /* we can't use -1 as it is the unitialized sentinel */
469 while ((seed = json_c_get_random_seed()) == -1) {}
470 #if SIZEOF_INT == 8 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
471 #define USE_SYNC_COMPARE_AND_SWAP 1
473 #if SIZEOF_INT == 4 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
474 #define USE_SYNC_COMPARE_AND_SWAP 1
476 #if SIZEOF_INT == 2 && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
477 #define USE_SYNC_COMPARE_AND_SWAP 1
479 #if defined USE_SYNC_COMPARE_AND_SWAP
480 (void)__sync_val_compare_and_swap(&random_seed, -1, seed);
481 #elif defined _MSC_VER || defined __MINGW32__
482 InterlockedCompareExchange(&random_seed, seed, -1);
484 //#warning "racy random seed initializtion if used by multiple threads"
485 random_seed = seed; /* potentially racy */
489 return hashlittle((const char *)k, strlen((const char *)k), random_seed);
492 int lh_char_equal(const void *k1, const void *k2)
494 return (strcmp((const char *)k1, (const char *)k2) == 0);
497 struct lh_table *lh_table_new(int size, lh_entry_free_fn *free_fn, lh_hash_fn *hash_fn,
498 lh_equal_fn *equal_fn)
503 /* Allocate space for elements to avoid divisions by zero. */
505 t = (struct lh_table *)calloc(1, sizeof(struct lh_table));
511 t->table = (struct lh_entry *)calloc(size, sizeof(struct lh_entry));
517 t->free_fn = free_fn;
518 t->hash_fn = hash_fn;
519 t->equal_fn = equal_fn;
520 for (i = 0; i < size; i++)
521 t->table[i].k = LH_EMPTY;
525 struct lh_table *lh_kchar_table_new(int size, lh_entry_free_fn *free_fn)
527 return lh_table_new(size, free_fn, char_hash_fn, lh_char_equal);
530 struct lh_table *lh_kptr_table_new(int size, lh_entry_free_fn *free_fn)
532 return lh_table_new(size, free_fn, lh_ptr_hash, lh_ptr_equal);
535 int lh_table_resize(struct lh_table *t, int new_size)
537 struct lh_table *new_t;
538 struct lh_entry *ent;
540 new_t = lh_table_new(new_size, NULL, t->hash_fn, t->equal_fn);
544 for (ent = t->head; ent != NULL; ent = ent->next)
546 unsigned long h = lh_get_hash(new_t, ent->k);
547 unsigned int opts = 0;
548 if (ent->k_is_constant)
549 opts = JSON_C_OBJECT_KEY_IS_CONSTANT;
550 if (lh_table_insert_w_hash(new_t, ent->k, ent->v, h, opts) != 0)
552 lh_table_free(new_t);
557 t->table = new_t->table;
559 t->head = new_t->head;
560 t->tail = new_t->tail;
566 void lh_table_free(struct lh_table *t)
571 for (c = t->head; c != NULL; c = c->next)
578 int lh_table_insert_w_hash(struct lh_table *t, const void *k, const void *v, const unsigned long h,
583 if (t->count >= t->size * LH_LOAD_FACTOR)
585 /* Avoid signed integer overflow with large tables. */
586 int new_size = (t->size > INT_MAX / 2) ? INT_MAX : (t->size * 2);
587 if (t->size == INT_MAX || lh_table_resize(t, new_size) != 0)
595 if (t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED)
597 if ((int)++n == t->size)
602 t->table[n].k_is_constant = (opts & JSON_C_OBJECT_KEY_IS_CONSTANT);
608 t->head = t->tail = &t->table[n];
609 t->table[n].next = t->table[n].prev = NULL;
613 t->tail->next = &t->table[n];
614 t->table[n].prev = t->tail;
615 t->table[n].next = NULL;
616 t->tail = &t->table[n];
621 int lh_table_insert(struct lh_table *t, const void *k, const void *v)
623 return lh_table_insert_w_hash(t, k, v, lh_get_hash(t, k), 0);
626 struct lh_entry *lh_table_lookup_entry_w_hash(struct lh_table *t, const void *k,
627 const unsigned long h)
629 unsigned long n = h % t->size;
632 while (count < t->size)
634 if (t->table[n].k == LH_EMPTY)
636 if (t->table[n].k != LH_FREED && t->equal_fn(t->table[n].k, k))
638 if ((int)++n == t->size)
645 struct lh_entry *lh_table_lookup_entry(struct lh_table *t, const void *k)
647 return lh_table_lookup_entry_w_hash(t, k, lh_get_hash(t, k));
650 json_bool lh_table_lookup_ex(struct lh_table *t, const void *k, void **v)
652 struct lh_entry *e = lh_table_lookup_entry(t, k);
657 return 1; /* key found */
661 return 0; /* key not found */
664 int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e)
666 /* CAW: fixed to be 64bit nice, still need the crazy negative case... */
667 ptrdiff_t n = (ptrdiff_t)(e - t->table);
669 /* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */
675 if (t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED)
680 t->table[n].v = NULL;
681 t->table[n].k = LH_FREED;
682 if (t->tail == &t->table[n] && t->head == &t->table[n])
684 t->head = t->tail = NULL;
686 else if (t->head == &t->table[n])
688 t->head->next->prev = NULL;
689 t->head = t->head->next;
691 else if (t->tail == &t->table[n])
693 t->tail->prev->next = NULL;
694 t->tail = t->tail->prev;
698 t->table[n].prev->next = t->table[n].next;
699 t->table[n].next->prev = t->table[n].prev;
701 t->table[n].next = t->table[n].prev = NULL;
705 int lh_table_delete(struct lh_table *t, const void *k)
707 struct lh_entry *e = lh_table_lookup_entry(t, k);
710 return lh_table_delete_entry(t, e);
713 int lh_table_length(struct lh_table *t)