2 * This file is part of ltrace.
3 * Copyright (C) 2012, 2013, 2014 Petr Machata, Red Hat Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 unsigned char taken : 1;
28 unsigned char erased : 1;
31 static struct status_bits *
32 bitp(struct dict *dict, size_t n)
34 return VECT_ELEMENT(&dict->status, struct status_bits, n);
38 dict_init(struct dict *dict,
39 size_t key_size, size_t value_size,
40 size_t (*hash1)(const void *),
41 int (*eq)(const void *, const void *),
42 size_t (*hash2)(size_t))
44 assert(hash1 != NULL);
47 vect_init(&dict->keys, key_size);
48 vect_init(&dict->values, value_size);
49 VECT_INIT(&dict->status, struct status_bits);
59 int (*clone_key)(void *tgt, const void *src, void *data);
60 int (*clone_value)(void *tgt, const void *src, void *data);
61 void (*dtor_key)(void *tgt, void *data);
62 void (*dtor_value)(void *tgt, void *data);
66 static enum callback_status
67 clone_cb(void *key, void *value, void *data)
69 struct clone_data *clone_data = data;
71 char nkey[clone_data->target->keys.elt_size];
72 if (clone_data->clone_key == NULL)
73 memmove(nkey, key, sizeof(nkey));
74 else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
77 char nvalue[clone_data->target->values.elt_size];
78 if (clone_data->clone_value == NULL) {
79 memmove(nvalue, value, sizeof(nvalue));
80 } else if (clone_data->clone_value(&nvalue, value,
81 clone_data->data) < 0) {
83 if (clone_data->clone_key != NULL)
84 clone_data->dtor_key(&nkey, clone_data->data);
88 if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
89 if (clone_data->clone_value != NULL)
90 clone_data->dtor_value(&nvalue, clone_data->data);
98 dict_clone(struct dict *target, const struct dict *source,
99 int (*clone_key)(void *tgt, const void *src, void *data),
100 void (*dtor_key)(void *tgt, void *data),
101 int (*clone_value)(void *tgt, const void *src, void *data),
102 void (*dtor_value)(void *tgt, void *data),
105 assert((clone_key != NULL) == (dtor_key != NULL));
106 assert((clone_value != NULL) == (dtor_value != NULL));
108 dict_init(target, source->keys.elt_size, source->values.elt_size,
109 source->hash1, source->eq, source->hash2);
110 struct clone_data clone_data = {
111 target, clone_key, clone_value, dtor_key, dtor_value, data
113 if (dict_each((struct dict *)source, NULL,
114 clone_cb, &clone_data) != NULL) {
115 dict_destroy(target, dtor_key, dtor_value, data);
122 dict_size(const struct dict *dict)
128 dict_empty(const struct dict *dict)
130 return dict->size == 0;
133 struct destroy_data {
134 void (*dtor_key)(void *tgt, void *data);
135 void (*dtor_value)(void *tgt, void *data);
139 static enum callback_status
140 destroy_cb(void *key, void *value, void *data)
142 struct destroy_data *destroy_data = data;
143 if (destroy_data->dtor_key)
144 destroy_data->dtor_key(key, destroy_data->data);
145 if (destroy_data->dtor_value)
146 destroy_data->dtor_value(value, destroy_data->data);
151 dict_destroy(struct dict *dict,
152 void (*dtor_key)(void *tgt, void *data),
153 void (*dtor_value)(void *tgt, void *data),
156 /* Some slots are unused (the corresponding keys and values
157 * are uninitialized), so we can't call dtors for them.
158 * Iterate DICT instead. */
159 if (dtor_key != NULL || dtor_value != NULL) {
160 struct destroy_data destroy_data = {
161 dtor_key, dtor_value, data
163 dict_each(dict, NULL, destroy_cb, &destroy_data);
166 vect_destroy(&dict->keys, NULL, NULL);
167 vect_destroy(&dict->values, NULL, NULL);
168 vect_destroy(&dict->status, NULL, NULL);
172 default_secondary_hash(size_t pos)
178 small_secondary_hash(size_t pos)
186 return vect_size(&dict->keys);
189 static inline size_t (*
190 hash2(struct dict *dict))(size_t)
192 if (dict->hash2 != NULL)
194 else if (n(dict) < 100)
195 return small_secondary_hash;
197 return default_secondary_hash;
201 getkey(struct dict *dict, size_t pos)
203 return ((unsigned char *)dict->keys.data)
204 + dict->keys.elt_size * pos;
208 getvalue(struct dict *dict, size_t pos)
210 return ((unsigned char *)dict->values.data)
211 + dict->values.elt_size * pos;
215 find_slot(struct dict *dict, const void *key,
216 int *foundp, int *should_rehash, size_t *pi)
219 size_t pos = dict->hash1(key) % n(dict);
221 size_t d = hash2(dict)(pos);
225 /* We skip over any taken or erased slots. But we remember
226 * the first erased that we find, and if we don't find the key
227 * later, we return that position. */
228 for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
229 pos = (pos + d) % n(dict)) {
230 if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
233 /* If there is a loop, but we've seen an erased
234 * element, take that one. Otherwise give up. */
235 if (++i > dict->size) {
236 if (pos0 != (size_t)-1)
241 if (bitp(dict, pos)->taken
242 && dict->eq(getkey(dict, pos), key)) {
248 if (!*foundp && pos0 != (size_t)-1)
251 /* If the hash table degraded into a linked list, request a
253 if (should_rehash != NULL)
254 *should_rehash = i > 10 && i > n(dict) / 10;
261 static enum callback_status
262 rehash_move(void *key, void *value, void *data)
264 if (dict_insert(data, key, value) < 0)
271 rehash(struct dict *dict, size_t nn)
273 assert(nn != n(dict));
277 dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
278 dict->hash1, dict->eq, dict->hash2);
280 /* To honor all invariants (so that we can safely call
281 * dict_destroy), we first make a request to _reserve_ enough
282 * room in all vectors. This has no observable effect on
283 * contents of vectors. */
284 if (vect_reserve(&tmp.keys, nn) < 0
285 || vect_reserve(&tmp.values, nn) < 0
286 || vect_reserve(&tmp.status, nn) < 0)
289 /* Now that we know that there is enough size in vectors, we
290 * simply bump the size. */
292 tmp.values.size = nn;
293 size_t old_size = tmp.status.size;
294 tmp.status.size = nn;
295 memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
296 0, (tmp.status.size - old_size) * tmp.status.elt_size);
298 /* At this point, TMP is once more an empty dictionary with NN
299 * slots. Now move stuff from DICT to TMP. */
300 if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
303 /* And now swap contents of DICT and TMP, and we are done. */
305 struct dict tmp2 = *dict;
313 /* We only want to release the containers, not the actual data
314 * that they hold, so it's fine if we don't pass any dtor. */
315 dict_destroy(&tmp, NULL, NULL, NULL);
320 static const size_t primes[] = {
321 13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
322 8191, 16381, 32749, 65521, 130981, 0
326 larger_size(size_t current)
331 if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
333 for (i = 0; primes[i] != 0; ++i)
334 if (primes[i] > current)
339 /* We ran out of primes, so invent a new one. The following
340 * gives primes until about 17M elements (and then some more
342 return 2 * current + 6585;
346 smaller_size(size_t current)
348 if (current <= primes[0])
351 if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
354 for (i = 0; primes[i] != 0; ++i) {
355 if (primes[i] >= current)
362 return (current - 6585) / 2;
366 dict_insert(struct dict *dict, void *key, void *value)
368 if (n(dict) == 0 || dict->size > 0.7 * n(dict))
370 if (rehash(dict, larger_size(n(dict))) < 0)
375 size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
376 if (slot_n == (size_t)-1)
380 assert(!bitp(dict, slot_n)->taken);
382 /* If rehash was requested, do that, and retry. But just live
383 * with it for apparently sparse tables. No resizing can fix
385 if (should_rehash && dict->size > 0.3 * n(dict))
388 memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
389 memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
391 bitp(dict, slot_n)->taken = 1;
392 bitp(dict, slot_n)->erased = 0;
399 dict_find(struct dict *dict, const void *key)
406 size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
408 return getvalue(dict, slot_n);
414 dict_erase(struct dict *dict, const void *key,
415 void (*dtor_key)(void *tgt, void *data),
416 void (*dtor_value)(void *tgt, void *data),
421 size_t slot_n = find_slot(dict, key, &found, NULL, &i);
425 if (dtor_key != NULL)
426 dtor_key(getkey(dict, slot_n), data);
427 if (dtor_value != NULL)
428 dtor_value(getvalue(dict, slot_n), data);
430 bitp(dict, slot_n)->taken = 0;
431 bitp(dict, slot_n)->erased = 1;
434 if (dict->size < 0.3 * n(dict)) {
435 size_t smaller = smaller_size(n(dict));
436 if (smaller != n(dict))
437 /* Don't mind if it fails when shrinking. */
438 rehash(dict, smaller);
445 dict_each(struct dict *dict, void *start_after,
446 enum callback_status (*cb)(void *, void *, void *), void *data)
449 if (start_after != NULL)
450 i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
454 for (; i < dict->keys.size; ++i)
455 if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
456 void *key = getkey(dict, i);
457 if (cb(key, getvalue(dict, i), data) != CBS_CONT)
465 dict_hash_int(const int *key)
467 return (size_t)(*key * 2654435761U);
471 dict_eq_int(const int *key1, const int *key2)
473 return *key1 == *key2;
477 dict_hash_uint64(const uint64_t *key)
479 int const a = (int) *key;
480 int const b = (int) (*key >> 32);
481 return dict_hash_int (&a) ^ dict_hash_int (&b);
485 dict_eq_uint64(const uint64_t *key1, const uint64_t *key2)
487 return *key1 == *key2;
491 dict_hash_string(const char **key)
494 const char *str = *key;
501 dict_eq_string(const char **key1, const char **key2)
503 return strcmp(*key1, *key2) == 0;
507 dict_dtor_string(const char **key, void *data)
513 dict_clone_string(const char **tgt, const char **src, void *data)
516 return *tgt != NULL ? 0 : -1;
520 static enum callback_status
521 dump(int *key, int *value, void *data)
524 assert(seen[*key] == 0);
526 assert(*value == *key * 2 + 1);
531 dict_hash_int_silly(const int *key)
537 verify(struct dict *di, size_t len, char *seen)
541 for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
544 memset(seen, 0, len);
547 static enum callback_status
548 fill_keys(int *key, int *value, void *data)
551 array[++array[0]] = *key;
559 DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
561 char seen[100000] = {};
563 for (i = 0; i < sizeof(seen); ++i) {
565 int value = 2 * i + 1;
566 DICT_INSERT(&di, &key, &value);
567 int *valp = DICT_FIND_REF(&di, &key, int);
568 assert(valp != NULL);
569 assert(*valp == value);
570 assert(dict_size(&di) == i + 1);
573 verify(&di, sizeof(seen), seen);
576 DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
577 DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
578 verify(&d2, sizeof(seen), seen);
580 /* Now we try to gradually erase all elements. We can't erase
581 * inside a DICT_EACH call, so copy first keys to a separate
582 * memory area first. */
583 int keys[d2.size + 1];
586 DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
587 for (i = 0; i < (size_t)keys[0]; ++i) {
588 assert(DICT_ERASE(&d2, &keys[i + 1], int,
589 NULL, NULL, NULL) == 0);
592 assert(ct == sizeof(seen));
593 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
601 /* To test erase, we need a relatively bad hash function, so
602 * that there are some overlapping chains in the table. */
604 DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
605 const int limit = 500;
606 for (i = 0; i < limit; ++i) {
608 int value = 2 * key + 1;
609 DICT_INSERT(&d2, &key, &value);
612 /* Now we try to delete each of the keys, and verify that none
613 * of the chains was broken. */
614 for (i = 0; i < limit; ++i) {
616 DICT_CLONE(©, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
618 DICT_ERASE(©, &key, int, NULL, NULL, NULL);
619 assert(dict_size(©) == dict_size(&d2) - 1);
622 for (j = 0; j < limit; ++j) {
624 int *valp = DICT_FIND_REF(©, &key, int);
626 assert(valp != NULL);
627 assert(*valp == 2 * key + 1);
629 assert(valp == NULL);
633 DICT_DESTROY(©, int, int, NULL, NULL, NULL);
635 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
638 int main(int argc, char *argv[])