1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * hash.c -- Hash table module.
40 * **********************************************
41 * CMU ARPA Speech Project
43 * Copyright (c) 1999 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
49 * Revision 1.5 2005/06/22 03:04:01 arthchan2003
50 * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword.
52 * Revision 1.9 2005/05/25 06:17:53 archan
53 * Delete the test code in cmd_ln.c and fixed platform specific code of hash.c
55 * Revision 1.8 2005/05/24 01:10:54 archan
56 * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested.
58 * Revision 1.6 2005/05/24 00:00:45 archan
59 * Added basic functionalities to hash_t: 1, display and 2, delete a key from a hash. \n
61 * Revision 1.5 2005/05/11 07:01:38 archan
62 * Added comments on the usage of the current implementation of hash tables.
64 * Revision 1.4 2005/05/03 04:09:11 archan
65 * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks.
67 * Revision 1.3 2005/03/30 01:22:48 archan
68 * Fixed mistakes in last updates. Add
71 * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
72 * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(),
73 * and len attribute to hash_entry_t.
75 * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
76 * Added hash_key2hash().
78 * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
79 * Included case sensitive/insensitive option. Removed local, static
80 * maintenance of all hash tables.
82 * 31-Jul-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
93 #pragma warning (disable: 4018)
96 #include "sphinxbase/hash_table.h"
97 #include "sphinxbase/err.h"
98 #include "sphinxbase/ckd_alloc.h"
99 #include "sphinxbase/case.h"
104 prime_sieve(int32 max)
109 notprime = (char *) ckd_calloc(max + 1, 1);
113 for (pp = p + p; pp <= max; pp += p)
115 for (++p; (p <= max) && notprime[p]; p++);
124 * HACK!! Initial hash table size is restricted by this set of primes. (Of course,
125 * collision resolution by chaining will accommodate more entries indefinitely, but
126 * efficiency will drop.)
128 const int32 prime[] = {
129 101, 211, 307, 401, 503, 601, 701, 809, 907,
130 1009, 1201, 1601, 2003, 2411, 3001, 4001, 5003, 6007, 7001, 8009,
132 10007, 12007, 16001, 20011, 24001, 30011, 40009, 50021, 60013,
134 100003, 120011, 160001, 200003, 240007, 300007, 400009, 500009,
135 600011, 700001, 800011, 900001,
141 * This function returns a very large prime.
144 prime_size(int32 size)
148 for (i = 0; (prime[i] > 0) && (prime[i] < size); i++);
150 E_WARN("Very large hash table requested (%d entries)\n", size);
158 hash_table_new(int32 size, int32 casearg)
162 h = (hash_table_t *) ckd_calloc(1, sizeof(hash_table_t));
163 h->size = prime_size(size + (size >> 1));
164 h->nocase = (casearg == HASH_CASE_NO);
165 h->table = (hash_entry_t *) ckd_calloc(h->size, sizeof(hash_entry_t));
166 /* The above calloc clears h->table[*].key and .next to NULL, i.e. an empty table */
173 * Compute hash value for given key string.
174 * Somewhat tuned for English text word strings.
177 key2hash(hash_table_t * h, const char *key)
180 register const char *cp;
183 [1236322] libutil\str2words special character bgu
184 HACK Apply suggested hack of fixing the hash table such that
185 it can work with extended ascii code . This is a hack because
186 the best way to solve it is to make sure all character
187 representation is unsigned character in the first place. (or
191 /*register char c; */
192 register unsigned char c;
194 register uint32 hash;
200 for (cp = key; *cp; cp++) {
210 for (cp = key; *cp; cp++) {
218 return (hash % h->size);
223 makekey(uint8 * data, int32 len, char *key)
228 key = (char *) ckd_calloc(len * 2 + 1, sizeof(char));
230 for (i = 0, j = 0; i < len; i++, j += 2) {
231 key[j] = 'A' + (data[i] & 0x000f);
232 key[j + 1] = 'J' + ((data[i] >> 4) & 0x000f);
241 keycmp_nocase(hash_entry_t * entry, const char *key)
248 for (i = 0; i < entry->len; i++) {
262 keycmp_case(hash_entry_t * entry, const char *key)
269 for (i = 0; i < entry->len; i++) {
281 * Lookup entry with hash-value hash in table h for given key
282 * Return value: hash_entry_t for key
284 static hash_entry_t *
285 lookup(hash_table_t * h, uint32 hash, const char *key, size_t len)
289 entry = &(h->table[hash]);
290 if (entry->key == NULL)
294 while (entry && ((entry->len != len)
295 || (keycmp_nocase(entry, key) != 0)))
299 while (entry && ((entry->len != len)
300 || (keycmp_case(entry, key) != 0)))
309 hash_table_lookup(hash_table_t * h, const char *key, void ** val)
315 hash = key2hash(h, key);
318 entry = lookup(h, hash, key, len);
329 hash_table_lookup_int32(hash_table_t * h, const char *key, int32 *val)
334 rv = hash_table_lookup(h, key, &vval);
338 *val = (int32)(long)vval;
344 hash_table_lookup_bkey(hash_table_t * h, const char *key, size_t len, void ** val)
350 str = makekey((uint8 *) key, len, NULL);
351 hash = key2hash(h, str);
354 entry = lookup(h, hash, key, len);
365 hash_table_lookup_bkey_int32(hash_table_t * h, const char *key, size_t len, int32 *val)
370 rv = hash_table_lookup_bkey(h, key, len, &vval);
374 *val = (int32)(long)vval;
380 enter(hash_table_t * h, uint32 hash, const char *key, size_t len, void *val, int32 replace)
382 hash_entry_t *cur, *new;
384 if ((cur = lookup(h, hash, key, len)) != NULL) {
386 /* Key already exists. */
389 /* Replace the pointer if replacement is requested,
390 * because this might be a different instance of the same
391 * string (this verges on magic, sorry) */
398 cur = &(h->table[hash]);
399 if (cur->key == NULL) {
400 /* Empty slot at hashed location; add this entry */
405 /* Added by ARCHAN at 20050515. This allows deletion could work. */
410 /* Key collision; create new entry and link to hashed location */
411 new = (hash_entry_t *) ckd_calloc(1, sizeof(hash_entry_t));
415 new->next = cur->next;
423 /* 20050523 Added by ARCHAN to delete a key from a hash table */
425 delete(hash_table_t * h, uint32 hash, const char *key, size_t len)
427 hash_entry_t *entry, *prev;
431 entry = &(h->table[hash]);
432 if (entry->key == NULL)
436 while (entry && ((entry->len != len)
437 || (keycmp_nocase(entry, key) != 0))) {
443 while (entry && ((entry->len != len)
444 || (keycmp_case(entry, key) != 0))) {
453 /* At this point, entry will be the one required to be deleted, prev
454 will contain the previous entry
459 /* That is to say the entry in the hash table (not the chain) matched the key. */
460 /* We will then copy the things from the next entry to the hash table */
462 if (entry->next) { /* There is a next entry, great, copy it. */
464 prev->key = entry->key;
465 prev->len = entry->len;
466 prev->val = entry->val;
467 prev->next = entry->next;
470 else { /* There is not a next entry, just set the key to null */
477 else { /* This case is simple */
478 prev->next = entry->next;
482 /* Do wiring and free the entry */
490 hash_table_empty(hash_table_t *h)
492 hash_entry_t *e, *e2;
495 for (i = 0; i < h->size; i++) {
496 /* Free collision lists. */
497 for (e = h->table[i].next; e; e = e2) {
499 ckd_free((void *) e);
501 memset(&h->table[i], 0, sizeof(h->table[i]));
508 hash_table_enter(hash_table_t * h, const char *key, void *val)
513 hash = key2hash(h, key);
515 return (enter(h, hash, key, len, val, 0));
519 hash_table_replace(hash_table_t * h, const char *key, void *val)
524 hash = key2hash(h, key);
526 return (enter(h, hash, key, len, val, 1));
530 hash_table_delete(hash_table_t * h, const char *key)
535 hash = key2hash(h, key);
538 return (delete(h, hash, key, len));
542 hash_table_enter_bkey(hash_table_t * h, const char *key, size_t len, void *val)
547 str = makekey((uint8 *) key, len, NULL);
548 hash = key2hash(h, str);
551 return (enter(h, hash, key, len, val, 0));
555 hash_table_replace_bkey(hash_table_t * h, const char *key, size_t len, void *val)
560 str = makekey((uint8 *) key, len, NULL);
561 hash = key2hash(h, str);
564 return (enter(h, hash, key, len, val, 1));
568 hash_table_delete_bkey(hash_table_t * h, const char *key, size_t len)
573 str = makekey((uint8 *) key, len, NULL);
574 hash = key2hash(h, str);
576 return (delete(h, hash, key, len));
580 hash_table_display(hash_table_t * h, int32 showdisplay)
586 E_INFOCONT("Hash with chaining representation of the hash table\n");
588 for (i = 0; i < h->size; i++) {
590 if (e->key != NULL) {
593 E_INFOCONT("%s", e->key);
595 E_INFOCONT("%p", e->key);
597 E_INFOCONT("|len:%d|val=%ld|->", e->len, (long)e->val);
598 if (e->next == NULL) {
599 E_INFOCONT("NULL\n");
603 for (e = e->next; e; e = e->next) {
606 E_INFOCONT("%s", e->key);
608 E_INFOCONT("|len:%d|val=%ld|->", e->len, (long)e->val);
609 if (e->next == NULL) {
610 E_INFOCONT("NULL\n");
617 E_INFOCONT("The total number of keys =%d\n", j);
622 hash_table_tolist(hash_table_t * h, int32 * count)
631 for (i = 0; i < h->size; i++) {
634 if (e->key != NULL) {
635 g = glist_add_ptr(g, (void *) e);
638 for (e = e->next; e; e = e->next) {
639 g = glist_add_ptr(g, (void *) e);
652 hash_table_iter(hash_table_t *h)
656 itor = ckd_calloc(1, sizeof(*itor));
658 return hash_table_iter_next(itor);
662 hash_table_iter_next(hash_iter_t *itor)
664 /* If there is an entry, walk down its list. */
666 itor->ent = itor->ent->next;
667 /* If we got to the end of the chain, or we had no entry, scan
668 * forward in the table to find the next non-empty bucket. */
669 if (itor->ent == NULL) {
670 while (itor->idx < itor->ht->size
671 && itor->ht->table[itor->idx].key == NULL)
673 /* If we did not find one then delete the iterator and
675 if (itor->idx == itor->ht->size) {
676 hash_table_iter_free(itor);
679 /* Otherwise use this next entry. */
680 itor->ent = itor->ht->table + itor->idx;
681 /* Increase idx for the next time around. */
688 hash_table_iter_free(hash_iter_t *itor)
694 hash_table_free(hash_table_t * h)
696 hash_entry_t *e, *e2;
702 /* Free additional entries created for key collision cases */
703 for (i = 0; i < h->size; i++) {
704 for (e = h->table[i].next; e; e = e2) {
706 ckd_free((void *) e);
710 ckd_free((void *) h->table);
711 ckd_free((void *) h);