1 /* An expandable hash tables datatype.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov (vmakarov@cygnus.com).
5 This file is part of the libiberty library.
6 Libiberty is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 Libiberty is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with libiberty; see the file COPYING.LIB. If
18 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* This package implements basic hash table functionality. It is possible
22 to search for an entry, create an entry and destroy an entry.
24 Elements in the table are generic pointers.
26 The size of the table is not fixed; if the occupancy of the table
27 grows too high the hash table will be expanded.
29 The abstract data implementation is based on generalized Algorithm D
30 from Knuth's book "The art of computer programming". Hash table is
31 expanded by creation of new hash table and transferring elements from
32 the old table to the new table. */
38 #include <sys/types.h>
54 #include "libiberty.h"
57 /* This macro defines reserved value for empty table entry. */
59 #define EMPTY_ENTRY ((PTR) 0)
61 /* This macro defines reserved value for table entry which contained
64 #define DELETED_ENTRY ((PTR) 1)
66 static unsigned long higher_prime_number PARAMS ((unsigned long));
67 static hashval_t hash_pointer PARAMS ((const void *));
68 static int eq_pointer PARAMS ((const void *, const void *));
69 static int htab_expand PARAMS ((htab_t));
70 static PTR *find_empty_slot_for_expand PARAMS ((htab_t, hashval_t));
72 /* At some point, we could make these be NULL, and modify the
73 hash-table routines to handle NULL specially; that would avoid
74 function-call overhead for the common case of hashing pointers. */
75 htab_hash htab_hash_pointer = hash_pointer;
76 htab_eq htab_eq_pointer = eq_pointer;
78 /* The following function returns a nearest prime number which is
79 greater than N, and near a power of two. */
82 higher_prime_number (n)
85 /* These are primes that are near, but slightly smaller than, a
87 static const unsigned long primes[] = {
99 (unsigned long) 16381,
100 (unsigned long) 32749,
101 (unsigned long) 65521,
102 (unsigned long) 131071,
103 (unsigned long) 262139,
104 (unsigned long) 524287,
105 (unsigned long) 1048573,
106 (unsigned long) 2097143,
107 (unsigned long) 4194301,
108 (unsigned long) 8388593,
109 (unsigned long) 16777213,
110 (unsigned long) 33554393,
111 (unsigned long) 67108859,
112 (unsigned long) 134217689,
113 (unsigned long) 268435399,
114 (unsigned long) 536870909,
115 (unsigned long) 1073741789,
116 (unsigned long) 2147483647,
118 ((unsigned long) 2147483647) + ((unsigned long) 2147483644),
121 const unsigned long *low = &primes[0];
122 const unsigned long *high = &primes[sizeof(primes) / sizeof(primes[0])];
126 const unsigned long *mid = low + (high - low) / 2;
133 /* If we've run out of primes, abort. */
136 fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
143 /* Returns a hash code for P. */
149 return (hashval_t) ((long)p >> 3);
152 /* Returns non-zero if P1 and P2 are equal. */
162 /* This function creates table with length slightly longer than given
163 source length. Created hash table is initiated as empty (all the
164 hash table entries are EMPTY_ENTRY). The function returns the
165 created hash table, or NULL if memory allocation fails. */
168 htab_create_alloc (size, hash_f, eq_f, del_f, alloc_f, free_f)
178 size = higher_prime_number (size);
179 result = (htab_t) (*alloc_f) (1, sizeof (struct htab));
182 result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
183 if (result->entries == NULL)
190 result->hash_f = hash_f;
192 result->del_f = del_f;
193 result->alloc_f = alloc_f;
194 result->free_f = free_f;
198 /* As above, but use the variants of alloc_f and free_f which accept
199 an extra argument. */
202 htab_create_alloc_ex (size, hash_f, eq_f, del_f, alloc_arg, alloc_f,
209 htab_alloc_with_arg alloc_f;
210 htab_free_with_arg free_f;
214 size = higher_prime_number (size);
215 result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
218 result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
219 if (result->entries == NULL)
222 (*free_f) (alloc_arg, result);
226 result->hash_f = hash_f;
228 result->del_f = del_f;
229 result->alloc_arg = alloc_arg;
230 result->alloc_with_arg_f = alloc_f;
231 result->free_with_arg_f = free_f;
235 /* Update the function pointers and allocation parameter in the htab_t. */
238 htab_set_functions_ex (htab, hash_f, eq_f, del_f, alloc_arg, alloc_f, free_f)
244 htab_alloc_with_arg alloc_f;
245 htab_free_with_arg free_f;
247 htab->hash_f = hash_f;
250 htab->alloc_arg = alloc_arg;
251 htab->alloc_with_arg_f = alloc_f;
252 htab->free_with_arg_f = free_f;
255 /* These functions exist solely for backward compatibility. */
259 htab_create (size, hash_f, eq_f, del_f)
265 return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
269 htab_try_create (size, hash_f, eq_f, del_f)
275 return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
278 /* This function frees all memory allocated for given hash table.
279 Naturally the hash table must already exist. */
288 for (i = htab->size - 1; i >= 0; i--)
289 if (htab->entries[i] != EMPTY_ENTRY
290 && htab->entries[i] != DELETED_ENTRY)
291 (*htab->del_f) (htab->entries[i]);
293 if (htab->free_f != NULL)
295 (*htab->free_f) (htab->entries);
296 (*htab->free_f) (htab);
298 else if (htab->free_with_arg_f != NULL)
300 (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
301 (*htab->free_with_arg_f) (htab->alloc_arg, htab);
305 /* This function clears all entries in the given hash table. */
314 for (i = htab->size - 1; i >= 0; i--)
315 if (htab->entries[i] != EMPTY_ENTRY
316 && htab->entries[i] != DELETED_ENTRY)
317 (*htab->del_f) (htab->entries[i]);
319 memset (htab->entries, 0, htab->size * sizeof (PTR));
322 /* Similar to htab_find_slot, but without several unwanted side effects:
323 - Does not call htab->eq_f when it finds an existing entry.
324 - Does not change the count of elements/searches/collisions in the
326 This function also assumes there are no deleted entries in the table.
327 HASH is the hash value for the element to be inserted. */
330 find_empty_slot_for_expand (htab, hash)
334 size_t size = htab->size;
335 unsigned int index = hash % size;
336 PTR *slot = htab->entries + index;
339 if (*slot == EMPTY_ENTRY)
341 else if (*slot == DELETED_ENTRY)
344 hash2 = 1 + hash % (size - 2);
351 slot = htab->entries + index;
352 if (*slot == EMPTY_ENTRY)
354 else if (*slot == DELETED_ENTRY)
359 /* The following function changes size of memory allocated for the
360 entries and repeatedly inserts the table elements. The occupancy
361 of the table after the call will be about 50%. Naturally the hash
362 table must already exist. Remember also that the place of the
363 table entries is changed. If memory allocation failures are allowed,
364 this function will return zero, indicating that the table could not be
365 expanded. If all goes well, it will return a non-zero value. */
377 oentries = htab->entries;
378 olimit = oentries + htab->size;
380 /* Resize only when table after removal of unused elements is either
381 too full or too empty. */
382 if ((htab->n_elements - htab->n_deleted) * 2 > htab->size
383 || ((htab->n_elements - htab->n_deleted) * 8 < htab->size
385 nsize = higher_prime_number ((htab->n_elements - htab->n_deleted) * 2);
389 if (htab->alloc_with_arg_f != NULL)
390 nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
393 nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
394 if (nentries == NULL)
396 htab->entries = nentries;
399 htab->n_elements -= htab->n_deleted;
407 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
409 PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
418 if (htab->free_f != NULL)
419 (*htab->free_f) (oentries);
420 else if (htab->free_with_arg_f != NULL)
421 (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
425 /* This function searches for a hash table entry equal to the given
426 element. It cannot be used to insert or delete an element. */
429 htab_find_with_hash (htab, element, hash)
443 entry = htab->entries[index];
444 if (entry == EMPTY_ENTRY
445 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
448 hash2 = 1 + hash % (size - 2);
457 entry = htab->entries[index];
458 if (entry == EMPTY_ENTRY
459 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
464 /* Like htab_find_slot_with_hash, but compute the hash value from the
468 htab_find (htab, element)
472 return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
475 /* This function searches for a hash table slot containing an entry
476 equal to the given element. To delete an entry, call this with
477 INSERT = 0, then call htab_clear_slot on the slot returned (possibly
478 after doing some checks). To insert an entry, call this with
479 INSERT = 1, then write the value you want into the returned slot.
480 When inserting an entry, NULL may be returned if memory allocation
484 htab_find_slot_with_hash (htab, element, hash, insert)
488 enum insert_option insert;
490 PTR *first_deleted_slot;
496 if (insert == INSERT && htab->size * 3 <= htab->n_elements * 4
497 && htab_expand (htab) == 0)
504 first_deleted_slot = NULL;
506 entry = htab->entries[index];
507 if (entry == EMPTY_ENTRY)
509 else if (entry == DELETED_ENTRY)
510 first_deleted_slot = &htab->entries[index];
511 else if ((*htab->eq_f) (entry, element))
512 return &htab->entries[index];
514 hash2 = 1 + hash % (size - 2);
522 entry = htab->entries[index];
523 if (entry == EMPTY_ENTRY)
525 else if (entry == DELETED_ENTRY)
527 if (!first_deleted_slot)
528 first_deleted_slot = &htab->entries[index];
530 else if ((*htab->eq_f) (entry, element))
531 return &htab->entries[index];
535 if (insert == NO_INSERT)
540 if (first_deleted_slot)
542 *first_deleted_slot = EMPTY_ENTRY;
543 return first_deleted_slot;
546 return &htab->entries[index];
549 /* Like htab_find_slot_with_hash, but compute the hash value from the
553 htab_find_slot (htab, element, insert)
556 enum insert_option insert;
558 return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
562 /* This function deletes an element with the given value from hash
563 table. If there is no matching element in the hash table, this
564 function does nothing. */
567 htab_remove_elt (htab, element)
573 slot = htab_find_slot (htab, element, NO_INSERT);
574 if (*slot == EMPTY_ENTRY)
578 (*htab->del_f) (*slot);
580 *slot = DELETED_ENTRY;
584 /* This function clears a specified slot in a hash table. It is
585 useful when you've already done the lookup and don't want to do it
589 htab_clear_slot (htab, slot)
593 if (slot < htab->entries || slot >= htab->entries + htab->size
594 || *slot == EMPTY_ENTRY || *slot == DELETED_ENTRY)
598 (*htab->del_f) (*slot);
600 *slot = DELETED_ENTRY;
604 /* This function scans over the entire hash table calling
605 CALLBACK for each live entry. If CALLBACK returns false,
606 the iteration stops. INFO is passed as CALLBACK's second
610 htab_traverse_noresize (htab, callback, info)
618 slot = htab->entries;
619 limit = slot + htab->size;
625 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
626 if (!(*callback) (slot, info))
629 while (++slot < limit);
632 /* Like htab_traverse_noresize, but does resize the table when it is
633 too empty to improve effectivity of subsequent calls. */
636 htab_traverse (htab, callback, info)
641 if ((htab->n_elements - htab->n_deleted) * 8 < htab->size)
644 htab_traverse_noresize (htab, callback, info);
647 /* Return the current size of given hash table. */
656 /* Return the current number of elements in given hash table. */
662 return htab->n_elements - htab->n_deleted;
665 /* Return the fraction of fixed collisions during all work with given
669 htab_collisions (htab)
672 if (htab->searches == 0)
675 return (double) htab->collisions / (double) htab->searches;
678 /* Hash P as a null-terminated string.
680 Copied from gcc/hashtable.c. Zack had the following to say with respect
681 to applicability, though note that unlike hashtable.c, this hash table
682 implementation re-hashes rather than chain buckets.
684 http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
685 From: Zack Weinberg <zackw@panix.com>
686 Date: Fri, 17 Aug 2001 02:15:56 -0400
688 I got it by extracting all the identifiers from all the source code
689 I had lying around in mid-1999, and testing many recurrences of
690 the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
691 prime numbers or the appropriate identity. This was the best one.
692 I don't remember exactly what constituted "best", except I was
693 looking at bucket-length distributions mostly.
695 So it should be very good at hashing identifiers, but might not be
696 as good at arbitrary strings.
698 I'll add that it thoroughly trounces the hash functions recommended
699 for this use at http://burtleburtle.net/bob/hash/index.html, both
700 on speed and bucket distribution. I haven't tried it against the
701 function they just started using for Perl's hashes. */
707 const unsigned char *str = (const unsigned char *) p;
711 while ((c = *str++) != 0)
712 r = r * 67 + c - 113;
718 --------------------------------------------------------------------
719 lookup2.c, by Bob Jenkins, December 1996, Public Domain.
720 hash(), hash2(), hash3, and mix() are externally useful functions.
721 Routines to test the hash are included if SELF_TEST is defined.
722 You can use this free for any purpose. It has no warranty.
723 --------------------------------------------------------------------
727 --------------------------------------------------------------------
728 mix -- mix 3 32-bit values reversibly.
729 For every delta with one or two bit set, and the deltas of all three
730 high bits or all three low bits, whether the original value of a,b,c
731 is almost all zero or is uniformly distributed,
732 * If mix() is run forward or backward, at least 32 bits in a,b,c
733 have at least 1/4 probability of changing.
734 * If mix() is run forward, every bit of c will change between 1/3 and
735 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
736 mix() was built out of 36 single-cycle latency instructions in a
737 structure that could supported 2x parallelism, like so:
745 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
746 of that parallelism. They've also turned some of those single-cycle
747 latency instructions into multi-cycle latency instructions. Still,
748 this is the fastest good hash I could find. There were about 2^^68
749 to choose from. I only looked at a billion or so.
750 --------------------------------------------------------------------
752 /* same, but slower, works on systems that might have 8 byte hashval_t's */
755 a -= b; a -= c; a ^= (c>>13); \
756 b -= c; b -= a; b ^= (a<< 8); \
757 c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
758 a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
759 b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
760 c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
761 a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
762 b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
763 c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
767 --------------------------------------------------------------------
768 hash() -- hash a variable-length key into a 32-bit value
769 k : the key (the unaligned variable-length array of bytes)
770 len : the length of the key, counting by bytes
771 level : can be any 4-byte value
772 Returns a 32-bit value. Every bit of the key affects every bit of
773 the return value. Every 1-bit and 2-bit delta achieves avalanche.
774 About 36+6len instructions.
776 The best hash table sizes are powers of 2. There is no need to do
777 mod a prime (mod is sooo slow!). If you need less than 32 bits,
778 use a bitmask. For example, if you need only 10 bits, do
779 h = (h & hashmask(10));
780 In which case, the hash table should have hashsize(10) elements.
782 If you are hashing n strings (ub1 **)k, do it like this:
783 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
785 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
786 code any way you wish, private, educational, or commercial. It's free.
788 See http://burtleburtle.net/bob/hash/evahash.html
789 Use for hash table lookup, or anything where one collision in 2^32 is
790 acceptable. Do NOT use for cryptographic purposes.
791 --------------------------------------------------------------------
794 hashval_t iterative_hash (k_in, length, initval)
795 const PTR k_in; /* the key */
796 register size_t length; /* the length of the key */
797 register hashval_t initval; /* the previous hash, or an arbitrary value */
799 register const unsigned char *k = (const unsigned char *)k_in;
800 register hashval_t a,b,c,len;
802 /* Set up the internal state */
804 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
805 c = initval; /* the previous hash value */
807 /*---------------------------------------- handle most of the key */
808 #ifndef WORDS_BIGENDIAN
809 /* On a little-endian machine, if the data is 4-byte aligned we can hash
810 by word for better speed. This gives nondeterministic results on
811 big-endian machines. */
812 if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
813 while (len >= 12) /* aligned */
815 a += *(hashval_t *)(k+0);
816 b += *(hashval_t *)(k+4);
817 c += *(hashval_t *)(k+8);
825 a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
826 b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
827 c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
832 /*------------------------------------- handle the last 11 bytes */
834 switch(len) /* all the case statements fall through */
836 case 11: c+=((hashval_t)k[10]<<24);
837 case 10: c+=((hashval_t)k[9]<<16);
838 case 9 : c+=((hashval_t)k[8]<<8);
839 /* the first byte of c is reserved for the length */
840 case 8 : b+=((hashval_t)k[7]<<24);
841 case 7 : b+=((hashval_t)k[6]<<16);
842 case 6 : b+=((hashval_t)k[5]<<8);
844 case 4 : a+=((hashval_t)k[3]<<24);
845 case 3 : a+=((hashval_t)k[2]<<16);
846 case 2 : a+=((hashval_t)k[1]<<8);
848 /* case 0: nothing left to add */
851 /*-------------------------------------------- report the result */