1 /* An expandable hash tables datatype.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov (vmakarov@cygnus.com).
5 This file is part of the libiberty library.
6 Libiberty is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 Libiberty is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with libiberty; see the file COPYING.LIB. If
18 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* This package implements basic hash table functionality. It is possible
22 to search for an entry, create an entry and destroy an entry.
24 Elements in the table are generic pointers.
26 The size of the table is not fixed; if the occupancy of the table
27 grows too high the hash table will be expanded.
29 The abstract data implementation is based on generalized Algorithm D
30 from Knuth's book "The art of computer programming". Hash table is
31 expanded by creation of new hash table and transferring elements from
32 the old table to the new table. */
38 #include <sys/types.h>
54 #include "libiberty.h"
57 /* This macro defines reserved value for empty table entry. */
59 #define EMPTY_ENTRY ((PTR) 0)
61 /* This macro defines reserved value for table entry which contained
64 #define DELETED_ENTRY ((PTR) 1)
66 static unsigned long higher_prime_number PARAMS ((unsigned long));
67 static hashval_t hash_pointer PARAMS ((const void *));
68 static int eq_pointer PARAMS ((const void *, const void *));
69 static int htab_expand PARAMS ((htab_t));
70 static PTR *find_empty_slot_for_expand PARAMS ((htab_t, hashval_t));
72 /* At some point, we could make these be NULL, and modify the
73 hash-table routines to handle NULL specially; that would avoid
74 function-call overhead for the common case of hashing pointers. */
75 htab_hash htab_hash_pointer = hash_pointer;
76 htab_eq htab_eq_pointer = eq_pointer;
78 /* The following function returns a nearest prime number which is
79 greater than N, and near a power of two. */
82 higher_prime_number (n)
85 /* These are primes that are near, but slightly smaller than, a
87 static const unsigned long primes[] = {
99 (unsigned long) 16381,
100 (unsigned long) 32749,
101 (unsigned long) 65521,
102 (unsigned long) 131071,
103 (unsigned long) 262139,
104 (unsigned long) 524287,
105 (unsigned long) 1048573,
106 (unsigned long) 2097143,
107 (unsigned long) 4194301,
108 (unsigned long) 8388593,
109 (unsigned long) 16777213,
110 (unsigned long) 33554393,
111 (unsigned long) 67108859,
112 (unsigned long) 134217689,
113 (unsigned long) 268435399,
114 (unsigned long) 536870909,
115 (unsigned long) 1073741789,
116 (unsigned long) 2147483647,
118 ((unsigned long) 2147483647) + ((unsigned long) 2147483644),
121 const unsigned long *low = &primes[0];
122 const unsigned long *high = &primes[sizeof(primes) / sizeof(primes[0])];
126 const unsigned long *mid = low + (high - low) / 2;
133 /* If we've run out of primes, abort. */
136 fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
143 /* Returns a hash code for P. */
149 return (hashval_t) ((long)p >> 3);
152 /* Returns non-zero if P1 and P2 are equal. */
162 /* Return the current size of given hash table. */
171 /* Return the current number of elements in given hash table. */
177 return htab->n_elements - htab->n_deleted;
180 /* Compute the primary hash for HASH given HTAB's current size. */
182 static inline hashval_t
183 htab_mod (hash, htab)
187 return hash % htab_size (htab);
190 /* Compute the secondary hash for HASH given HTAB's current size. */
192 static inline hashval_t
193 htab_mod_m2 (hash, htab)
197 return 1 + hash % (htab_size (htab) - 2);
200 /* This function creates table with length slightly longer than given
201 source length. Created hash table is initiated as empty (all the
202 hash table entries are EMPTY_ENTRY). The function returns the
203 created hash table, or NULL if memory allocation fails. */
206 htab_create_alloc (size, hash_f, eq_f, del_f, alloc_f, free_f)
216 size = higher_prime_number (size);
217 result = (htab_t) (*alloc_f) (1, sizeof (struct htab));
220 result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
221 if (result->entries == NULL)
228 result->hash_f = hash_f;
230 result->del_f = del_f;
231 result->alloc_f = alloc_f;
232 result->free_f = free_f;
236 /* As above, but use the variants of alloc_f and free_f which accept
237 an extra argument. */
240 htab_create_alloc_ex (size, hash_f, eq_f, del_f, alloc_arg, alloc_f,
247 htab_alloc_with_arg alloc_f;
248 htab_free_with_arg free_f;
252 size = higher_prime_number (size);
253 result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
256 result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
257 if (result->entries == NULL)
260 (*free_f) (alloc_arg, result);
264 result->hash_f = hash_f;
266 result->del_f = del_f;
267 result->alloc_arg = alloc_arg;
268 result->alloc_with_arg_f = alloc_f;
269 result->free_with_arg_f = free_f;
273 /* Update the function pointers and allocation parameter in the htab_t. */
276 htab_set_functions_ex (htab, hash_f, eq_f, del_f, alloc_arg, alloc_f, free_f)
282 htab_alloc_with_arg alloc_f;
283 htab_free_with_arg free_f;
285 htab->hash_f = hash_f;
288 htab->alloc_arg = alloc_arg;
289 htab->alloc_with_arg_f = alloc_f;
290 htab->free_with_arg_f = free_f;
293 /* These functions exist solely for backward compatibility. */
297 htab_create (size, hash_f, eq_f, del_f)
303 return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
307 htab_try_create (size, hash_f, eq_f, del_f)
313 return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
316 /* This function frees all memory allocated for given hash table.
317 Naturally the hash table must already exist. */
323 size_t size = htab_size (htab);
324 PTR *entries = htab->entries;
328 for (i = size - 1; i >= 0; i--)
329 if (entries[i] != EMPTY_ENTRY && entries[i] != DELETED_ENTRY)
330 (*htab->del_f) (entries[i]);
332 if (htab->free_f != NULL)
334 (*htab->free_f) (entries);
335 (*htab->free_f) (htab);
337 else if (htab->free_with_arg_f != NULL)
339 (*htab->free_with_arg_f) (htab->alloc_arg, entries);
340 (*htab->free_with_arg_f) (htab->alloc_arg, htab);
344 /* This function clears all entries in the given hash table. */
350 size_t size = htab_size (htab);
351 PTR *entries = htab->entries;
355 for (i = size - 1; i >= 0; i--)
356 if (entries[i] != EMPTY_ENTRY && entries[i] != DELETED_ENTRY)
357 (*htab->del_f) (entries[i]);
359 memset (entries, 0, size * sizeof (PTR));
362 /* Similar to htab_find_slot, but without several unwanted side effects:
363 - Does not call htab->eq_f when it finds an existing entry.
364 - Does not change the count of elements/searches/collisions in the
366 This function also assumes there are no deleted entries in the table.
367 HASH is the hash value for the element to be inserted. */
370 find_empty_slot_for_expand (htab, hash)
374 hashval_t index = htab_mod (hash, htab);
375 size_t size = htab_size (htab);
376 PTR *slot = htab->entries + index;
379 if (*slot == EMPTY_ENTRY)
381 else if (*slot == DELETED_ENTRY)
384 hash2 = htab_mod_m2 (hash, htab);
391 slot = htab->entries + index;
392 if (*slot == EMPTY_ENTRY)
394 else if (*slot == DELETED_ENTRY)
399 /* The following function changes size of memory allocated for the
400 entries and repeatedly inserts the table elements. The occupancy
401 of the table after the call will be about 50%. Naturally the hash
402 table must already exist. Remember also that the place of the
403 table entries is changed. If memory allocation failures are allowed,
404 this function will return zero, indicating that the table could not be
405 expanded. If all goes well, it will return a non-zero value. */
417 oentries = htab->entries;
418 olimit = oentries + htab->size;
420 /* Resize only when table after removal of unused elements is either
421 too full or too empty. */
422 if ((htab->n_elements - htab->n_deleted) * 2 > htab->size
423 || ((htab->n_elements - htab->n_deleted) * 8 < htab->size
425 nsize = higher_prime_number ((htab->n_elements - htab->n_deleted) * 2);
429 if (htab->alloc_with_arg_f != NULL)
430 nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
433 nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
434 if (nentries == NULL)
436 htab->entries = nentries;
439 htab->n_elements -= htab->n_deleted;
447 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
449 PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
458 if (htab->free_f != NULL)
459 (*htab->free_f) (oentries);
460 else if (htab->free_with_arg_f != NULL)
461 (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
465 /* This function searches for a hash table entry equal to the given
466 element. It cannot be used to insert or delete an element. */
469 htab_find_with_hash (htab, element, hash)
474 hashval_t index, hash2;
479 size = htab_size (htab);
480 index = htab_mod (hash, htab);
482 entry = htab->entries[index];
483 if (entry == EMPTY_ENTRY
484 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
487 hash2 = htab_mod_m2 (hash, htab);
495 entry = htab->entries[index];
496 if (entry == EMPTY_ENTRY
497 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
502 /* Like htab_find_slot_with_hash, but compute the hash value from the
506 htab_find (htab, element)
510 return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
513 /* This function searches for a hash table slot containing an entry
514 equal to the given element. To delete an entry, call this with
515 INSERT = 0, then call htab_clear_slot on the slot returned (possibly
516 after doing some checks). To insert an entry, call this with
517 INSERT = 1, then write the value you want into the returned slot.
518 When inserting an entry, NULL may be returned if memory allocation
522 htab_find_slot_with_hash (htab, element, hash, insert)
526 enum insert_option insert;
528 PTR *first_deleted_slot;
529 hashval_t index, hash2;
533 size = htab_size (htab);
534 if (insert == INSERT && size * 3 <= htab->n_elements * 4)
536 if (htab_expand (htab) == 0)
538 size = htab_size (htab);
541 index = htab_mod (hash, htab);
544 first_deleted_slot = NULL;
546 entry = htab->entries[index];
547 if (entry == EMPTY_ENTRY)
549 else if (entry == DELETED_ENTRY)
550 first_deleted_slot = &htab->entries[index];
551 else if ((*htab->eq_f) (entry, element))
552 return &htab->entries[index];
554 hash2 = htab_mod_m2 (hash, htab);
562 entry = htab->entries[index];
563 if (entry == EMPTY_ENTRY)
565 else if (entry == DELETED_ENTRY)
567 if (!first_deleted_slot)
568 first_deleted_slot = &htab->entries[index];
570 else if ((*htab->eq_f) (entry, element))
571 return &htab->entries[index];
575 if (insert == NO_INSERT)
578 if (first_deleted_slot)
581 *first_deleted_slot = EMPTY_ENTRY;
582 return first_deleted_slot;
586 return &htab->entries[index];
589 /* Like htab_find_slot_with_hash, but compute the hash value from the
593 htab_find_slot (htab, element, insert)
596 enum insert_option insert;
598 return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
602 /* This function deletes an element with the given value from hash
603 table (the hash is computed from the element). If there is no matching
604 element in the hash table, this function does nothing. */
607 htab_remove_elt (htab, element)
611 htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element));
615 /* This function deletes an element with the given value from hash
616 table. If there is no matching element in the hash table, this
617 function does nothing. */
620 htab_remove_elt_with_hash (htab, element, hash)
627 slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT);
628 if (*slot == EMPTY_ENTRY)
632 (*htab->del_f) (*slot);
634 *slot = DELETED_ENTRY;
638 /* This function clears a specified slot in a hash table. It is
639 useful when you've already done the lookup and don't want to do it
643 htab_clear_slot (htab, slot)
647 if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
648 || *slot == EMPTY_ENTRY || *slot == DELETED_ENTRY)
652 (*htab->del_f) (*slot);
654 *slot = DELETED_ENTRY;
658 /* This function scans over the entire hash table calling
659 CALLBACK for each live entry. If CALLBACK returns false,
660 the iteration stops. INFO is passed as CALLBACK's second
664 htab_traverse_noresize (htab, callback, info)
672 slot = htab->entries;
673 limit = slot + htab_size (htab);
679 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
680 if (!(*callback) (slot, info))
683 while (++slot < limit);
686 /* Like htab_traverse_noresize, but does resize the table when it is
687 too empty to improve effectivity of subsequent calls. */
690 htab_traverse (htab, callback, info)
695 if (htab_elements (htab) * 8 < htab_size (htab))
698 htab_traverse_noresize (htab, callback, info);
701 /* Return the fraction of fixed collisions during all work with given
705 htab_collisions (htab)
708 if (htab->searches == 0)
711 return (double) htab->collisions / (double) htab->searches;
714 /* Hash P as a null-terminated string.
716 Copied from gcc/hashtable.c. Zack had the following to say with respect
717 to applicability, though note that unlike hashtable.c, this hash table
718 implementation re-hashes rather than chain buckets.
720 http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
721 From: Zack Weinberg <zackw@panix.com>
722 Date: Fri, 17 Aug 2001 02:15:56 -0400
724 I got it by extracting all the identifiers from all the source code
725 I had lying around in mid-1999, and testing many recurrences of
726 the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
727 prime numbers or the appropriate identity. This was the best one.
728 I don't remember exactly what constituted "best", except I was
729 looking at bucket-length distributions mostly.
731 So it should be very good at hashing identifiers, but might not be
732 as good at arbitrary strings.
734 I'll add that it thoroughly trounces the hash functions recommended
735 for this use at http://burtleburtle.net/bob/hash/index.html, both
736 on speed and bucket distribution. I haven't tried it against the
737 function they just started using for Perl's hashes. */
743 const unsigned char *str = (const unsigned char *) p;
747 while ((c = *str++) != 0)
748 r = r * 67 + c - 113;
754 --------------------------------------------------------------------
755 lookup2.c, by Bob Jenkins, December 1996, Public Domain.
756 hash(), hash2(), hash3, and mix() are externally useful functions.
757 Routines to test the hash are included if SELF_TEST is defined.
758 You can use this free for any purpose. It has no warranty.
759 --------------------------------------------------------------------
763 --------------------------------------------------------------------
764 mix -- mix 3 32-bit values reversibly.
765 For every delta with one or two bit set, and the deltas of all three
766 high bits or all three low bits, whether the original value of a,b,c
767 is almost all zero or is uniformly distributed,
768 * If mix() is run forward or backward, at least 32 bits in a,b,c
769 have at least 1/4 probability of changing.
770 * If mix() is run forward, every bit of c will change between 1/3 and
771 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
772 mix() was built out of 36 single-cycle latency instructions in a
773 structure that could supported 2x parallelism, like so:
781 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
782 of that parallelism. They've also turned some of those single-cycle
783 latency instructions into multi-cycle latency instructions. Still,
784 this is the fastest good hash I could find. There were about 2^^68
785 to choose from. I only looked at a billion or so.
786 --------------------------------------------------------------------
788 /* same, but slower, works on systems that might have 8 byte hashval_t's */
791 a -= b; a -= c; a ^= (c>>13); \
792 b -= c; b -= a; b ^= (a<< 8); \
793 c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
794 a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
795 b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
796 c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
797 a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
798 b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
799 c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
803 --------------------------------------------------------------------
804 hash() -- hash a variable-length key into a 32-bit value
805 k : the key (the unaligned variable-length array of bytes)
806 len : the length of the key, counting by bytes
807 level : can be any 4-byte value
808 Returns a 32-bit value. Every bit of the key affects every bit of
809 the return value. Every 1-bit and 2-bit delta achieves avalanche.
810 About 36+6len instructions.
812 The best hash table sizes are powers of 2. There is no need to do
813 mod a prime (mod is sooo slow!). If you need less than 32 bits,
814 use a bitmask. For example, if you need only 10 bits, do
815 h = (h & hashmask(10));
816 In which case, the hash table should have hashsize(10) elements.
818 If you are hashing n strings (ub1 **)k, do it like this:
819 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
821 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
822 code any way you wish, private, educational, or commercial. It's free.
824 See http://burtleburtle.net/bob/hash/evahash.html
825 Use for hash table lookup, or anything where one collision in 2^32 is
826 acceptable. Do NOT use for cryptographic purposes.
827 --------------------------------------------------------------------
830 hashval_t iterative_hash (k_in, length, initval)
831 const PTR k_in; /* the key */
832 register size_t length; /* the length of the key */
833 register hashval_t initval; /* the previous hash, or an arbitrary value */
835 register const unsigned char *k = (const unsigned char *)k_in;
836 register hashval_t a,b,c,len;
838 /* Set up the internal state */
840 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
841 c = initval; /* the previous hash value */
843 /*---------------------------------------- handle most of the key */
844 #ifndef WORDS_BIGENDIAN
845 /* On a little-endian machine, if the data is 4-byte aligned we can hash
846 by word for better speed. This gives nondeterministic results on
847 big-endian machines. */
848 if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
849 while (len >= 12) /* aligned */
851 a += *(hashval_t *)(k+0);
852 b += *(hashval_t *)(k+4);
853 c += *(hashval_t *)(k+8);
861 a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
862 b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
863 c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
868 /*------------------------------------- handle the last 11 bytes */
870 switch(len) /* all the case statements fall through */
872 case 11: c+=((hashval_t)k[10]<<24);
873 case 10: c+=((hashval_t)k[9]<<16);
874 case 9 : c+=((hashval_t)k[8]<<8);
875 /* the first byte of c is reserved for the length */
876 case 8 : b+=((hashval_t)k[7]<<24);
877 case 7 : b+=((hashval_t)k[6]<<16);
878 case 6 : b+=((hashval_t)k[5]<<8);
880 case 4 : a+=((hashval_t)k[3]<<24);
881 case 3 : a+=((hashval_t)k[2]<<16);
882 case 2 : a+=((hashval_t)k[1]<<8);
884 /* case 0: nothing left to add */
887 /*-------------------------------------------- report the result */