1 /* hash.c - hash table lookup strings -
2 Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1998
3 Free Software Foundation, Inc.
5 This file is part of GAS, the GNU Assembler.
7 GAS is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GAS is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GAS; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * BUGS, GRIPES, APOLOGIA etc.
24 * A typical user doesn't need ALL this: I intend to make a library out
25 * of it one day - Dean Elsner.
26 * Also, I want to change the definition of a symbol to (address,length)
27 * so I can put arbitrary binary in the names stored. [see hsh.c for that]
29 * This slime is common coupled inside the module. Com-coupling (and other
30 * vandalism) was done to speed running time. The interfaces at the
31 * module's edges are adequately clean.
33 * There is no way to (a) run a test script through this heap and (b)
34 * compare results with previous scripts, to see if we have broken any
35 * code. Use GNU (f)utilities to do this. A few commands assist test.
36 * The testing is awkward: it tries to be both batch & interactive.
37 * For now, interactive rules!
41 * The idea is to implement a symbol table. A test jig is here.
42 * Symbols are arbitrary strings; they can't contain '\0'.
43 * [See hsh.c for a more general symbol flavour.]
44 * Each symbol is associated with a char*, which can point to anything
45 * you want, allowing an arbitrary property list for each symbol.
47 * The basic operations are:
49 * new creates symbol table, returns handle
50 * find (symbol) returns char*
51 * insert (symbol,char*) error if symbol already in table
52 * delete (symbol) returns char* if symbol was in table
53 * apply so you can delete all symbols before die()
54 * die destroy symbol table (free up memory)
56 * Supplementary functions include:
58 * say how big? what % full?
59 * replace (symbol,newval) report previous value
60 * jam (symbol,value) assert symbol:=value
62 * You, the caller, have control over errors: this just reports them.
64 * This package requires malloc(), free().
65 * Malloc(size) returns NULL or address of char[size].
66 * Free(address) frees same.
70 * The code and its structures are re-enterent.
72 * Before you do anything else, you must call hash_new() which will
73 * return the address of a hash-table-control-block. You then use
74 * this address as a handle of the symbol table by passing it to all
75 * the other hash_...() functions. The only approved way to recover
76 * the memory used by the symbol table is to call hash_die() with the
77 * handle of the symbol table.
79 * Before you call hash_die() you normally delete anything pointed to
80 * by individual symbols. After hash_die() you can't use that symbol
83 * The char* you associate with a symbol may not be NULL (0) because
84 * NULL is returned whenever a symbol is not in the table. Any other
85 * value is OK, except DELETED, #defined below.
87 * When you supply a symbol string for insertion, YOU MUST PRESERVE THE
88 * STRING until that symbol is deleted from the table. The reason is that
89 * only the address you supply, NOT the symbol string itself, is stored
90 * in the symbol table.
92 * You may delete and add symbols arbitrarily.
93 * Any or all symbols may have the same 'value' (char *). In fact, these
94 * routines don't do anything with your symbol values.
96 * You have no right to know where the symbol:char* mapping is stored,
97 * because it moves around in memory; also because we may change how it
98 * works and we don't want to break your code do we? However the handle
99 * (address of struct hash_control) is never changed in
100 * the life of the symbol table.
102 * What you CAN find out about a symbol table is:
103 * how many slots are in the hash table?
104 * how many slots are filled with symbols?
105 * (total hashes,collisions) for (reads,writes) (*)
106 * All of the above values vary in time.
107 * (*) some of these numbers will not be meaningful if we change the
113 * Hash table is an array of hash_entries; each entry is a pointer to a
114 * a string and a user-supplied value 1 char* wide.
116 * The array always has 2 ** n elements, n>0, n integer.
117 * There is also a 'wall' entry after the array, which is always empty
118 * and acts as a sentinel to stop running off the end of the array.
119 * When the array gets too full, we create a new array twice as large
120 * and re-hash the symbols into the new array, then forget the old array.
121 * (Of course, we copy the values into the new array before we junk the
130 #define TRUE (!FALSE)
131 #endif /* no FALSE yet */
134 #define min(a, b) ((a) < (b) ? (a) : (b))
138 #define error as_fatal
140 static char _deleted_[1];
141 #define DELETED ((PTR)_deleted_) /* guarenteed unique address */
142 #define START_POWER (10) /* power of two: size of new hash table */
144 /* TRUE if a symbol is in entry @ ptr. */
145 #define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED)
148 /* Number of slots in hash table. The wall does not count here.
149 We expect this is always a power of 2. */
151 /* Number of hash_ask calls. */
154 /* Number of collisions (total). This may exceed STAT_ACCESS if we
155 have lots of collisions/access. */
158 /* Slots used right now. */
160 /* How many string compares? */
163 /* Size of statistics block... this must be last. */
166 #define STAT__READ (0) /* reading */
167 #define STAT__WRITE (1) /* writing */
169 /* When we grow a hash table, by what power of two do we increase it? */
170 #define GROW_FACTOR 1
171 /* When should we grow it? */
172 #define FULL_VALUE(N) ((N) / 2)
174 /* #define SUSPECT to do runtime checks */
175 /* #define TEST to be a test jig for hash...() */
178 /* TEST: use smaller hash table */
180 #define START_POWER (3)
182 #define START_SIZE (8)
184 #define START_FULL (4)
189 const char *hash_string; /* points to where the symbol string is */
190 /* NULL means slot is not used */
191 /* DELETED means slot was deleted */
192 PTR hash_value; /* user's datum, associated with symbol */
196 struct hash_control {
197 struct hash_entry *hash_where;/* address of hash table */
198 int hash_sizelog; /* Log of ( hash_mask + 1 ) */
199 int hash_mask; /* masks a hash into index into table */
200 int hash_full; /* when hash_stat[STAT_USED] exceeds this, */
202 struct hash_entry *hash_wall; /* point just after last (usable) entry */
203 /* here we have some statistics */
204 int hash_stat[STATLENGTH]; /* lies & statistics */
207 /*------------------ plan ---------------------------------- i = internal
209 struct hash_control * c;
210 struct hash_entry * e; i
211 int b[z]; buffer for statistics
213 char * s; symbol string (address) [ key ]
214 char * v; value string (address) [datum]
215 boolean f; TRUE if we found s in hash table i
216 char * t; error string; 0 means OK
217 int a; access type [0...n) i
219 c=hash_new () create new hash_control
221 hash_die (c) destroy hash_control (and hash table)
222 table should be empty.
223 doesn't check if table is empty.
224 c has no meaning after this.
226 hash_say (c,b,z) report statistics of hash_control.
227 also report number of available statistics.
229 v=hash_delete (c,s) delete symbol, return old value if any.
230 ask() NULL means no old value.
233 v=hash_replace (c,s,v) replace old value of s with v.
234 ask() NULL means no old value: no table change.
237 t=hash_insert (c,s,v) insert (s,v) in c.
238 ask() return error string.
239 f it is an error to insert if s is already
241 if any error, c is unchanged.
243 t=hash_jam (c,s,v) assert that new value of s will be v. i
244 ask() it may decide to GROW the table. i
247 t=hash_grow (c) grow the hash table. i
248 jam() will invoke JAM. i
250 ?=hash_apply (c,y) apply y() to every symbol in c.
251 y evtries visited in 'unspecified' order.
253 v=hash_find (c,s) return value of s, or NULL if s not in c.
257 f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i
258 code() maintain collision stats in c. i
260 .=hash_code (c,s) compute hash-code for s, i
261 from parameters of c. i
265 /* Returned by hash_ask() to stop extra testing. hash_ask() wants to
266 return both a slot and a status. This is the status. TRUE: found
267 symbol FALSE: absent: empty or deleted slot Also returned by
268 hash_jam(). TRUE: we replaced a value FALSE: we inserted a value. */
269 static char hash_found;
271 static struct hash_entry *hash_ask PARAMS ((struct hash_control *,
273 static int hash_code PARAMS ((struct hash_control *, const char *));
274 static const char *hash_grow PARAMS ((struct hash_control *));
276 /* Create a new hash table. Return NULL if failed; otherwise return handle
277 (address of struct hash). */
278 struct hash_control *
281 struct hash_control *retval;
282 struct hash_entry *room; /* points to hash table */
283 struct hash_entry *wall;
284 struct hash_entry *entry;
285 int *ip; /* scan stats block of struct hash_control */
286 int *nd; /* limit of stats block */
288 room = (struct hash_entry *) xmalloc (sizeof (struct hash_entry)
289 /* +1 for the wall entry */
290 * ((1 << START_POWER) + 1));
291 retval = (struct hash_control *) xmalloc (sizeof (struct hash_control));
293 nd = retval->hash_stat + STATLENGTH;
294 for (ip = retval->hash_stat; ip < nd; ip++)
297 retval->hash_stat[STAT_SIZE] = 1 << START_POWER;
298 retval->hash_mask = (1 << START_POWER) - 1;
299 retval->hash_sizelog = START_POWER;
300 /* works for 1's compl ok */
301 retval->hash_where = room;
303 wall = room + (1 << START_POWER);
304 retval->hash_full = FULL_VALUE (1 << START_POWER);
305 for (entry = room; entry <= wall; entry++)
306 entry->hash_string = NULL;
311 * h a s h _ d i e ( )
313 * Table should be empty, but this is not checked.
314 * To empty the table, try hash_apply()ing a symbol deleter.
315 * Return to free memory both the hash table and it's control
317 * 'handle' has no meaning after this function.
318 * No errors are recoverable.
322 struct hash_control *handle;
324 free ((char *) handle->hash_where);
325 free ((char *) handle);
330 * h a s h _ s a y ( )
332 * Return the size of the statistics table, and as many statistics as
333 * we can until either (a) we have run out of statistics or (b) caller
334 * has run out of buffer.
335 * NOTE: hash_say treats all statistics alike.
336 * These numbers may change with time, due to insertions, deletions
337 * and expansions of the table.
338 * The first "statistic" returned is the length of hash_stat[].
339 * Then contents of hash_stat[] are read out (in ascending order)
340 * until your buffer or hash_stat[] is exausted.
343 hash_say (handle, buffer, bufsiz)
344 struct hash_control *handle;
345 int buffer[ /*bufsiz*/ ];
348 int *nd; /* limit of statistics block */
349 int *ip; /* scan statistics */
351 ip = handle->hash_stat;
352 nd = ip + min (bufsiz - 1, STATLENGTH);
353 if (bufsiz > 0) /* trust nothing! bufsiz<=0 is dangerous */
355 *buffer++ = STATLENGTH;
356 for (; ip < nd; ip++, buffer++)
365 * h a s h _ d e l e t e ( )
367 * Try to delete a symbol from the table.
368 * If it was there, return its value (and adjust STAT_USED).
369 * Otherwise, return NULL.
370 * Anyway, the symbol is not present after this function.
373 PTR /* NULL if string not in table, else */
374 /* returns value of deleted symbol */
375 hash_delete (handle, string)
376 struct hash_control *handle;
380 struct hash_entry *entry;
382 entry = hash_ask (handle, string, STAT__WRITE);
385 retval = entry->hash_value;
386 entry->hash_string = DELETED;
387 handle->hash_stat[STAT_USED] -= 1;
389 if (handle->hash_stat[STAT_USED] < 0)
391 error ("hash_delete");
393 #endif /* def SUSPECT */
403 * h a s h _ r e p l a c e ( )
405 * Try to replace the old value of a symbol with a new value.
406 * Normally return the old value.
407 * Return NULL and don't change the table if the symbol is not already
411 hash_replace (handle, string, value)
412 struct hash_control *handle;
416 struct hash_entry *entry;
419 entry = hash_ask (handle, string, STAT__WRITE);
422 retval = entry->hash_value;
423 entry->hash_value = value;
434 * h a s h _ i n s e r t ( )
436 * Insert a (symbol-string, value) into the hash table.
437 * Return an error string, 0 means OK.
438 * It is an 'error' to insert an existing symbol.
441 const char * /* return error string */
442 hash_insert (handle, string, value)
443 struct hash_control *handle;
447 struct hash_entry *entry;
451 if (handle->hash_stat[STAT_USED] > handle->hash_full)
453 retval = hash_grow (handle);
457 entry = hash_ask (handle, string, STAT__WRITE);
464 entry->hash_value = value;
465 entry->hash_string = string;
466 handle->hash_stat[STAT_USED] += 1;
473 * h a s h _ j a m ( )
475 * Regardless of what was in the symbol table before, after hash_jam()
476 * the named symbol has the given value. The symbol is either inserted or
477 * (its value is) replaced.
478 * An error message string is returned, 0 means OK.
480 * WARNING: this may decide to grow the hashed symbol table.
481 * To do this, we call hash_grow(), WHICH WILL recursively CALL US.
483 * We report status internally: hash_found is TRUE if we replaced, but
484 * false if we inserted.
487 hash_jam (handle, string, value)
488 struct hash_control *handle;
493 struct hash_entry *entry;
496 if (handle->hash_stat[STAT_USED] > handle->hash_full)
498 retval = hash_grow (handle);
502 entry = hash_ask (handle, string, STAT__WRITE);
505 entry->hash_string = string;
506 handle->hash_stat[STAT_USED] += 1;
508 entry->hash_value = value;
514 * h a s h _ g r o w ( )
516 * Grow a new (bigger) hash table from the old one.
517 * We choose to double the hash table's size.
518 * Return a human-scrutible error string: 0 if OK.
519 * Warning! This uses hash_jam(), which had better not recurse
520 * back here! Hash_jam() conditionally calls us, but we ALWAYS
525 hash_grow (handle) /* make a hash table grow */
526 struct hash_control *handle;
528 struct hash_entry *newwall;
529 struct hash_entry *newwhere;
530 struct hash_entry *newtrack;
531 struct hash_entry *oldtrack;
532 struct hash_entry *oldwhere;
533 struct hash_entry *oldwall;
543 * capture info about old hash table
545 oldwhere = handle->hash_where;
546 oldwall = handle->hash_wall;
548 oldused = handle->hash_stat[STAT_USED];
551 * attempt to get enough room for a hash table twice as big
553 temp = handle->hash_stat[STAT_SIZE];
554 newwhere = ((struct hash_entry *)
555 xmalloc ((unsigned long) ((temp << (GROW_FACTOR + 1))
556 /* +1 for wall slot */
557 * sizeof (struct hash_entry))));
558 if (newwhere == NULL)
562 * have enough room: now we do all the work.
563 * double the size of everything in handle.
565 handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1;
566 handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR;
567 newsize = handle->hash_stat[STAT_SIZE];
568 handle->hash_where = newwhere;
569 handle->hash_full <<= GROW_FACTOR;
570 handle->hash_sizelog += GROW_FACTOR;
571 handle->hash_wall = newwall = newwhere + newsize;
572 /* Set all those pesky new slots to vacant. */
573 for (newtrack = newwhere; newtrack <= newwall; newtrack++)
574 newtrack->hash_string = NULL;
575 /* We will do a scan of the old table, the hard way, using the
576 * new control block to re-insert the data into new hash table. */
577 handle->hash_stat[STAT_USED] = 0;
578 for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++)
579 if (((string = oldtrack->hash_string) != NULL) && string != DELETED)
580 if ((retval = hash_jam (handle, string, oldtrack->hash_value)))
584 if (handle->hash_stat[STAT_USED] != oldused)
588 /* We have a completely faked up control block.
589 Return the old hash table. */
590 free ((char *) oldwhere);
597 * h a s h _ a p p l y ( )
599 * Use this to scan each entry in symbol table.
600 * For each symbol, this calls (applys) a nominated function supplying the
601 * symbol's value (and the symbol's name).
602 * The idea is you use this to destroy whatever is associted with
603 * any values in the table BEFORE you destroy the table with hash_die.
604 * Of course, you can use it for other jobs; whenever you need to
605 * visit all extant symbols in the table.
607 * We choose to have a call-you-back idea for two reasons:
608 * asthetic: it is a neater idea to use apply than an explicit loop
609 * sensible: if we ever had to grow the symbol table (due to insertions)
610 * then we would lose our place in the table when we re-hashed
611 * symbols into the new table in a different order.
613 * The order symbols are visited depends entirely on the hashing function.
614 * Whenever you insert a (symbol, value) you risk expanding the table. If
615 * you do expand the table, then the hashing function WILL change, so you
616 * MIGHT get a different order of symbols visited. In other words, if you
617 * want the same order of visiting symbols as the last time you used
618 * hash_apply() then you better not have done any hash_insert()s or
619 * hash_jam()s since the last time you used hash_apply().
621 * In future we may use the value returned by your nominated function.
622 * One idea is to abort the scan if, after applying the function to a
623 * certain node, the function returns a certain code.
625 * The function you supply should be of the form:
626 * void myfunct(string,value)
627 * char * string; |* the symbol's name *|
628 * char * value; |* the symbol's value *|
635 hash_apply (handle, function)
636 struct hash_control *handle;
639 struct hash_entry *entry;
640 struct hash_entry *wall;
642 wall = handle->hash_wall;
643 for (entry = handle->hash_where; entry < wall; entry++)
645 if (islive (entry)) /* silly code: tests entry->string twice! */
647 (*function) (entry->hash_string, entry->hash_value);
654 * h a s h _ f i n d ( )
656 * Given symbol string, find value (if any).
657 * Return found value or NULL.
660 hash_find (handle, string)
661 struct hash_control *handle;
664 struct hash_entry *entry;
666 entry = hash_ask (handle, string, STAT__READ);
668 return entry->hash_value;
674 * h a s h _ a s k ( )
676 * Searches for given symbol string.
677 * Return the slot where it OUGHT to live. It may be there.
678 * Return hash_found: TRUE only if symbol is in that slot.
679 * Access argument is to help keep statistics in control block.
682 static struct hash_entry * /* string slot, may be empty or deleted */
683 hash_ask (handle, string, access_type)
684 struct hash_control *handle;
689 struct hash_entry *slot;
690 int collision; /* count collisions */
694 /* start looking here */
695 hcode = hash_code (handle, string);
696 slot = handle->hash_where + (hcode & handle->hash_mask);
698 handle->hash_stat[STAT_ACCESS + access_type] += 1;
699 collision = strcmps = 0;
701 while (((s = slot->hash_string) != NULL) && s != DELETED)
708 if (slot->h == (unsigned long) hcode)
710 if (!strcmp (string, s))
722 * in use: we found string slot
724 * at wall: we fell off: wrap round ????
725 * in table: dig here slot
726 * at DELETED: dig here slot
728 if (slot == handle->hash_wall)
730 slot = handle->hash_where;/* now look again */
731 while (((s = slot->hash_string) != NULL) && s != DELETED)
738 if (slot->h == (unsigned long) hcode)
740 if (!strcmp (string, s))
752 * in use: we found it slot
753 * empty: wall: ERROR IMPOSSIBLE !!!!
754 * in table: dig here slot
755 * DELETED:dig here slot
758 handle->hash_stat[STAT_COLLIDE + access_type] += collision;
759 handle->hash_stat[STAT_STRCMP + access_type] += strcmps;
762 return slot; /* also return hash_found */
768 * Does hashing of symbol string to hash number.
772 hash_code (handle, string)
773 struct hash_control *handle;
776 #if 1 /* There seems to be some interesting property of this function
777 that prevents the bfd version below from being an adequate
778 substitute. @@ Figure out what this property is! */
779 long h; /* hash code built here */
780 long c; /* each character lands here */
781 int n; /* Amount to shift h by */
783 n = (handle->hash_sizelog - 3);
785 while ((c = *string++) != 0)
788 h = (h << 3) + (h >> n) + c;
794 unsigned int len = 0;
797 while ((c = *string++) != 0)
803 h += len + (len << 17);
810 hash_print_statistics (file, name, h)
813 struct hash_control *h;
815 unsigned long sz, used, pct;
820 sz = h->hash_stat[STAT_SIZE];
821 used = h->hash_stat[STAT_USED];
822 pct = (used * 100 + sz / 2) / sz;
824 fprintf (file, "%s hash statistics:\n\t%lu/%lu slots used (%lu%%)\n",
825 name, used, sz, pct);
827 #define P(name, off) \
828 fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name, \
829 h->hash_stat[off+STAT__READ], \
830 h->hash_stat[off+STAT__WRITE], \
831 h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE])
833 P ("accesses:", STAT_ACCESS);
834 P ("collisions:", STAT_COLLIDE);
835 P ("string compares:", STAT_STRCMP);
841 * Here is a test program to exercise above.
845 #define TABLES (6) /* number of hash tables to maintain */
846 /* (at once) in any testing */
847 #define STATBUFSIZE (12) /* we can have 12 statistics */
849 int statbuf[STATBUFSIZE]; /* display statistics here */
850 char answer[100]; /* human farts here */
851 char *hashtable[TABLES]; /* we test many hash tables at once */
852 char *h; /* points to curent hash_control */
860 int number; /* number 0:TABLES-1 of current hashed */
872 printf ("type h <RETURN> for help\n");
875 printf ("hash_test command: ");
878 if (isupper (command))
879 command = tolower (command); /* ecch! */
883 printf ("old hash table #=%d.\n", number);
887 for (pp = hashtable; pp < hashtable + TABLES; pp++)
889 printf ("address of hash table #%d control block is %xx\n"
890 ,pp - hashtable, *pp);
894 hash_apply (h, applicatee);
897 hash_apply (h, destroy);
901 p = hash_find (h, name = what ("symbol"));
902 printf ("value of \"%s\" is \"%s\"\n", name, p ? p : "NOT-PRESENT");
905 printf ("# show old, select new default hash table number\n");
906 printf ("? display all hashtable control block addresses\n");
907 printf ("a apply a simple display-er to each symbol in table\n");
908 printf ("d die: destroy hashtable\n");
909 printf ("f find value of nominated symbol\n");
910 printf ("h this help\n");
911 printf ("i insert value into symbol\n");
912 printf ("j jam value into symbol\n");
913 printf ("n new hashtable\n");
914 printf ("r replace a value with another\n");
915 printf ("s say what %% of table is used\n");
916 printf ("q exit this program\n");
917 printf ("x delete a symbol from table, report its value\n");
920 p = hash_insert (h, name = what ("symbol"), value = what ("value"));
923 printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value,
928 p = hash_jam (h, name = what ("symbol"), value = what ("value"));
931 printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, p);
935 h = hashtable[number] = (char *) hash_new ();
940 p = hash_replace (h, name = what ("symbol"), value = what ("value"));
941 printf ("old value was \"%s\"\n", p ? p : "{}");
944 hash_say (h, statbuf, STATBUFSIZE);
945 for (ip = statbuf; ip < statbuf + STATBUFSIZE; ip++)
952 p = hash_delete (h, name = what ("symbol"));
953 printf ("old value was \"%s\"\n", p ? p : "{}");
956 printf ("I can't understand command \"%c\"\n", command);
969 printf (" %s : ", description);
971 /* will one day clean up answer here */
972 retval = malloc (strlen (answer) + 1);
977 (void) strcpy (retval, answer);
982 destroy (string, value)
992 applicatee (string, value)
996 printf ("%.20s-%.20s\n", string, value);
999 whattable () /* determine number: what hash table to use */
1000 /* also determine h: points to hash_control */
1005 printf (" what hash table (%d:%d) ? ", 0, TABLES - 1);
1007 sscanf (answer, "%d", &number);
1008 if (number >= 0 && number < TABLES)
1010 h = hashtable[number];
1013 printf ("warning: current hash-table-#%d. has no hash-control\n", number);
1019 printf ("invalid hash table number: %d\n", number);
1026 #endif /* #ifdef TEST */