1 /******************************************************************************
4 * Copyright (c) 2010-2014 Seungyoung Kim.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
30 * @file qhasharr.c Static(array) hash-table implementation.
32 * qhasharr implements a hash-table which maps keys to values and stores into
33 * fixed size static memory like shared-memory and memory-mapped file.
34 * The creator qhasharr() initializes static memory to makes small slots in it.
35 * The default slot size factors are defined in _Q_HASHARR_KEYSIZE and
36 * _Q_HASHARR_VALUESIZE. And they are applied at compile time.
38 * The value part of an element will be stored across several slots if it's size
39 * exceeds the slot size. But the key part of an element will be truncated if
40 * the size exceeds and it's length and more complex MD5 hash value will be
41 * stored with the key. So to look up a particular key, first we find an element
42 * which has same hash value. If the key was not truncated, we just do key
43 * comparison. But if the key was truncated because it's length exceeds, we do
44 * both md5 and key comparison(only stored size) to verify that the key is same.
45 * So please be aware of that, theoretically there is a possibility we pick
46 * wrong element in case a key exceeds the limit, has same length and MD5 hash
47 * with lookup key. But this possibility is extreamly low and almost never
48 * happen in practice. If you happpen to want to make sure everything,
49 * you set _Q_HASHARR_KEYSIZE big enough at compile time to make sure all keys
52 * qhasharr hash-table does not support thread-safe. So users should handle
53 * race conditions on application side by raising user lock before calling
54 * functions which modify the table data.
57 * [Data Structure Diagram]
59 * +--[Static Flat Memory Area]-----------------------------------------------+
60 * | +-[Header]---------+ +-[Slot 0]---+ +-[Slot 1]---+ +-[Slot N]---+ |
61 * | |Private table data| |KEY A|DATA A| |KEY B|DATA B| .... |KEY N|DATA N| |
62 * | +------------------+ +------------+ +------------+ +------------+ |
63 * +--------------------------------------------------------------------------+
65 * Below diagram shows how a big value is stored.
66 * +--[Static Flat Memory Area------------------------------------------------+
67 * | +--------+ +-[Slot 0]---+ +-[Slot 1]---+ +-[Slot 2]---+ +-[Slot 3]-----+ |
68 * | |TBL INFO| |KEY A|DATA A| |DATA A cont.| |KEY B|DATA B| |DATA A cont. | |
69 * | +--------+ +------------+ +------------+ +------------+ +--------------+ |
70 * | ^~~link~~^ ^~~~~~~~~~link~~~~~~~~~^ |
71 * +--------------------------------------------------------------------------+
75 * // initialize hash-table.
76 * char memory[1000 * 10];
77 * qhasharr_t *tbl = qhasharr(memory, sizeof(memory));
78 * if(tbl == NULL) return;
80 * // insert elements (key duplication does not allowed)
81 * tbl->putstr(tbl, "e1", "a");
82 * tbl->putstr(tbl, "e2", "b");
83 * tbl->putstr(tbl, "e3", "c");
86 * tbl->//DEBUG(tbl, stdout);
88 * char *e2 = tbl->getstr(tbl, "e2");
90 * printf("getstr('e2') : %s\n", e2);
94 * // Release reference object.
98 * An example for using hash table over shared memory.
102 * int maxslots = 1000;
103 * int memsize = qhasharr_calculate_memsize(maxslots);
105 * // create shared memory
106 * int shmid = qshm_init("/tmp/some_id_file", 'q', memsize, true);
107 * if(shmid < 0) return -1; // creation failed
108 * void *memory = qshm_get(shmid);
110 * // initialize hash-table
111 * qhasharr_t *tbl = qhasharr(memory, memsize);
112 * if(hasharr == NULL) return -1;
114 * (...your codes with your own locking mechanism...)
116 * // Release reference object
119 * // destroy shared memory
123 * int shmid = qshm_getid("/tmp/some_id_file", 'q');
125 * // get shared memory
126 * void *memory = qshm_get(shmid);
128 * // map existing memory into table
129 * qhasharr_t *tbl = qhasharr(memory, 0);
131 * (...your codes with your own locking mechanism...)
133 * // Release reference object
139 * Modified parts of this file by XS Embedded GmbH, 2014
146 #include <inttypes.h>
151 #include "qhasharr.h"
153 #ifndef _DOXYGEN_SKIP
155 static bool put(qhasharr_t *tbl, const char *key, const void *value,
158 static void *get(qhasharr_t *tbl, const char *key, size_t *size);
160 static bool getnext(qhasharr_t *tbl, qnobj_t *obj, int *idx);
162 static bool remove_(qhasharr_t *tbl, const char *key);
164 static int size(qhasharr_t *tbl, int *maxslots, int *usedslots);
166 static void free_(qhasharr_t *tbl);
169 static int _find_empty(qhasharr_t *tbl, int startidx);
170 static int _get_idx(qhasharr_t *tbl, const char *key, unsigned int hash);
171 static void *_get_data(qhasharr_t *tbl, int idx, size_t *size);
172 static bool _put_data(qhasharr_t *tbl, int idx, unsigned int hash,
173 const char *key, const void *value, size_t size,
175 static bool _copy_slot(qhasharr_t *tbl, int idx1, int idx2);
176 static bool _remove_slot(qhasharr_t *tbl, int idx);
177 static bool _remove_data(qhasharr_t *tbl, int idx);
182 * Get how much memory is needed for N slots.
184 * @param max a number of maximum internal slots
186 * @return memory size needed
189 * This can be used for calculating minimum memory size for N slots.
191 size_t qhasharr_calculate_memsize(int max) {
192 size_t memsize = sizeof(qhasharr_data_t)
193 + (sizeof(qhasharr_slot_t) * (max));
198 * Initialize static hash table
200 * @param memory a pointer of data memory.
201 * @param memsize a size of data memory, 0 for using existing data.
203 * @return qhasharr_t container pointer, otherwise returns NULL.
204 * @retval errno will be set in error condition.
205 * - EINVAL : Assigned memory is too small. It must bigger enough to allocate
209 * // initialize hash-table with 100 slots.
210 * // A single element can take several slots.
211 * char memory[112 * 100];
213 * // Initialize new table.
214 * qhasharr_t *tbl = qhasharr(memory, sizeof(memory));
216 * // Use existing table.
217 * qhasharr_t *tbl2 = qhasharr(memory, 0);
220 qhasharr_t *qhasharr(void *memory, size_t memsize) {
222 qhasharr_data_t *data = (qhasharr_data_t *) memory;
224 // Initialize data if memsize is set or use existing data.
227 int maxslots = (memsize - sizeof(qhasharr_data_t))
228 / sizeof(qhasharr_slot_t);
229 if (maxslots < 1 || memsize <= sizeof(qhasharr_t)) {
235 //memset((void *) data, 0, memsize); //TODO check if initialisation is needed
236 data->maxslots = maxslots;
241 // Set data address. Shared memory returns virtul address.
242 data->slots = (qhasharr_slot_t *) (memory + sizeof(qhasharr_data_t));
244 // Create the table object.
245 qhasharr_t *tbl = (qhasharr_t *) malloc(sizeof(qhasharr_t));
250 memset((void *) tbl, 0, sizeof(qhasharr_t));
257 tbl->getnext = getnext;
259 tbl->remove = remove_;
271 * qhasharr->put(): Put an object into this table.
273 * @param tbl qhasharr_t container pointer.
274 * @param key key string
275 * @param value value object data
276 * @param size size of value
278 * @return true if successful, otherwise returns false
279 * @retval errno will be set in error condition.
280 * - ENOBUFS : Table doesn't have enough space to store the object.
281 * - EINVAL : Invalid argument.
282 * - EFAULT : Unexpected error. Data structure is not constant.
284 static bool put(qhasharr_t *tbl, const char *key, const void *value,
286 if (tbl == NULL || key == NULL || value == NULL) {
291 qhasharr_data_t *data = tbl->data;
294 if (data->usedslots >= data->maxslots || ((data->usedslots + (size / 32)) >= data->maxslots)) {
295 //DEBUG("hasharr: put %s - FULL", key);
301 unsigned int hash = qhashmurmur3_32(key, strlen(key)) % data->maxslots;
303 // check, is slot empty
304 if (data->slots[hash].count == 0) { // empty slot
306 if (_put_data(tbl, hash, hash, key, value, size, 1) == false) {
307 //DEBUG("hasharr: FAILED put(new) %s", key);
309 } //DEBUG("hasharr: put(new) %s (idx=%d,hash=%u,tot=%d)",
310 // key, hash, hash, data->usedslots);
311 } else if (data->slots[hash].count > 0) { // same key or hash collision
313 int idx = _get_idx(tbl, key, hash);
314 if (idx >= 0) { // same key
317 //printf("overwriting existent key 2%s\n", key);
318 return put(tbl, key, value, size);
319 } else { // no same key, just hash collision
321 int idx = _find_empty(tbl, hash);
327 // put data. -1 is used for collision resolution (idx != hash);
328 if (_put_data(tbl, idx, hash, key, value, size, -1) == false) {
329 //DEBUG("hasharr: FAILED put(col) %s", key);
333 // increase counter from leading slot
334 data->slots[hash].count++;
336 //DEBUG("hasharr: put(col) %s (idx=%d,hash=%u,tot=%d)",
337 // key, idx, hash, data->usedslots);
340 // in case of -1 or -2, move it. -1 used for collision resolution,
341 // -2 used for oversized value data.
344 int idx = _find_empty(tbl, hash + 1);
350 // move dup slot to empty
351 _copy_slot(tbl, idx, hash);
352 _remove_slot(tbl, hash);
354 // in case of -2, adjust link of mother
355 if (data->slots[idx].count == -2) {
356 data->slots[data->slots[idx].hash].link = idx;
357 if (data->slots[idx].link != -1) {
358 data->slots[data->slots[idx].link].hash = idx;
363 if (_put_data(tbl, hash, hash, key, value, size, 1) == false) {
364 //DEBUG("hasharr: FAILED put(swp) %s", key);
368 //DEBUG("hasharr: put(swp) %s (idx=%u,hash=%u,tot=%d)",
369 // key, hash, hash, data->usedslots);
378 * qhasharr->get(): Get an object from this table
380 * @param tbl qhasharr_t container pointer.
381 * @param key key string
382 * @param size if not NULL, oject size will be stored
384 * @return malloced object pointer if successful, otherwise(not found)
386 * @retval errno will be set in error condition.
387 * - ENOENT : No such key found.
388 * - EINVAL : Invalid argument.
389 * - ENOMEM : Memory allocation failed.
392 * returned object must be freed after done using.
394 static void *get(qhasharr_t *tbl, const char *key, size_t *size) {
395 if (tbl == NULL || key == NULL) {
400 qhasharr_data_t *data = tbl->data;
403 unsigned int hash = qhashmurmur3_32(key, strlen(key)) % data->maxslots;
405 int idx = _get_idx(tbl, key, hash);
411 return _get_data(tbl, idx, size);
415 * qhasharr->getnext(): Get next element.
417 * @param tbl qhasharr_t container pointer.
418 * @param idx index pointer
420 * @return key name string if successful, otherwise(end of table) returns NULL
421 * @retval errno will be set in error condition.
422 * - ENOENT : No next element.
423 * - EINVAL : Invald argument.
424 * - ENOMEM : Memory allocation failed.
429 * while(tbl->getnext(tbl, &obj, &idx) == true) {
430 * printf("NAME=%s, DATA=%s, SIZE=%zu\n",
431 * obj.name, (char*)obj.data, obj.size);
438 * Please be aware a key name will be returned with truncated length
439 * because key name is truncated when it put into the table if it's length is
440 * longer than _Q_HASHARR_KEYSIZE.
442 static bool getnext(qhasharr_t *tbl, qnobj_t *obj, int *idx) {
443 if (tbl == NULL || obj == NULL || idx == NULL) {
448 qhasharr_data_t *data = tbl->data;
450 for (; *idx < data->maxslots; (*idx)++) {
451 if (data->slots[*idx].count == 0 || data->slots[*idx].count == -2) {
455 size_t keylen = data->slots[*idx].data.pair.keylen;
456 if (keylen > _Q_HASHARR_KEYSIZE)
457 keylen = _Q_HASHARR_KEYSIZE;
459 obj->name = (char *) malloc(keylen + 1);
460 if (obj->name == NULL) {
464 memcpy(obj->name, data->slots[*idx].data.pair.key, keylen);
465 obj->name[keylen] = '\0';
467 obj->data = _get_data(tbl, *idx, &obj->size);
468 if (obj->data == NULL) {
483 * qhasharr->remove(): Remove an object from this table.
485 * @param tbl qhasharr_t container pointer.
486 * @param key key string
488 * @return true if successful, otherwise(not found) returns false
489 * @retval errno will be set in error condition.
490 * - ENOENT : No such key found.
491 * - EINVAL : Invald argument.
492 * - EFAULT : Unexpected error. Data structure is not constant.
494 static bool remove_(qhasharr_t *tbl, const char *key) {
495 if (tbl == NULL || key == NULL) {
500 qhasharr_data_t *data = tbl->data;
503 unsigned int hash = qhashmurmur3_32(key, strlen(key)) % data->maxslots;
505 int idx = _get_idx(tbl, key, hash);
507 //DEBUG("not found %s", key);
512 if (data->slots[idx].count == 1) {
514 _remove_data(tbl, idx);
515 //DEBUG("hasharr: rem %s (idx=%d,tot=%d)", key, idx, data->usedslots);
516 } else if (data->slots[idx].count > 1) { // leading slot and has dup
519 for (idx2 = idx + 1;; idx2++)
521 if (idx2 >= data->maxslots)
524 //DEBUG("hasharr: [BUG] failed to remove dup key %s.", key);
528 if (data->slots[idx2].count == -1 && data->slots[idx2].hash == hash)
534 // move to leading slot
535 int backupcount = data->slots[idx].count;
536 _remove_data(tbl, idx); // remove leading data
537 _copy_slot(tbl, idx, idx2); // copy slot
538 _remove_slot(tbl, idx2); // remove moved slot
540 data->slots[idx].count = backupcount - 1; // adjust collision counter
541 if (data->slots[idx].link != -1) {
542 data->slots[data->slots[idx].link].hash = idx;
545 //DEBUG("hasharr: rem(lead) %s (idx=%d,tot=%d)",
546 // key, idx, data->usedslots);
547 } else { // in case of -1. used for collision resolution
548 // decrease counter from leading slot
549 if (data->slots[data->slots[idx].hash].count <= 1) {
550 //DEBUG("hasharr: [BUG] failed to remove %s. "
551 // "counter of leading slot mismatch.", key);
555 data->slots[data->slots[idx].hash].count--;
558 _remove_data(tbl, idx);
559 //DEBUG("hasharr: rem(dup) %s (idx=%d,tot=%d)", key, idx, data->usedslots);
566 * qhasharr->size(): Returns the number of objects in this table.
568 * @param tbl qhasharr_t container pointer.
570 * @return a number of elements stored.
572 static int size(qhasharr_t *tbl, int *maxslots, int *usedslots) {
578 qhasharr_data_t *data = tbl->data;
580 if (maxslots != NULL)
581 *maxslots = data->maxslots;
582 if (usedslots != NULL)
583 *usedslots = data->usedslots;
590 * qhasharr->free(): De-allocate table reference object.
592 * @param tbl qhashtbl_t container pointer.
595 * This does not de-allocate memory but only function reference object.
596 * Data memory such as shared memory must be de-allocated separately.
598 void free_(qhasharr_t *tbl) {
602 #ifndef _DOXYGEN_SKIP
604 // find empty slot : return empty slow number, otherwise returns -1.
605 static int _find_empty(qhasharr_t *tbl, int startidx) {
606 qhasharr_data_t *data = tbl->data;
608 if (startidx >= data->maxslots)
613 if (data->slots[idx].count == 0)
617 if (idx >= data->maxslots)
626 static int _get_idx(qhasharr_t *tbl, const char *key, unsigned int hash) {
627 qhasharr_data_t *data = tbl->data;
629 if (data->slots[hash].count > 0) {
631 for (count = 0, idx = hash; count < data->slots[hash].count;) {
632 if (data->slots[idx].hash == hash
633 && (data->slots[idx].count > 0
634 || data->slots[idx].count == -1)) {
639 size_t keylen = strlen(key);
640 // first check key length
641 if (keylen == data->slots[idx].data.pair.keylen) {
642 if (keylen <= _Q_HASHARR_KEYSIZE) {
643 // original key is stored
644 if (!memcmp(key, data->slots[idx].data.pair.key, keylen))
649 // key is truncated, compare MD5 also.
650 unsigned char keymd5[16];
651 qhashmd5(key, keylen, keymd5);
652 if (!memcmp(key, data->slots[idx].data.pair.key, _Q_HASHARR_KEYSIZE) && !memcmp(keymd5, data->slots[idx].data.pair.keymd5, 16))
662 if (idx >= data->maxslots)
676 static void *_get_data(qhasharr_t *tbl, int idx, size_t *size) {
682 qhasharr_data_t *data = tbl->data;
686 for (newidx = idx, valsize = 0;; newidx = data->slots[newidx].link)
688 valsize += data->slots[newidx].size;
689 if (data->slots[newidx].link == -1)
694 value = malloc(valsize);
700 for (newidx = idx, vp = value;; newidx = data->slots[newidx].link) {
701 if (data->slots[newidx].count == -2) {
702 // extended data block
703 memcpy(vp, (void *) data->slots[newidx].data.ext.value,
704 data->slots[newidx].size);
706 // key/value pair data block
707 memcpy(vp, (void *) data->slots[newidx].data.pair.value,
708 data->slots[newidx].size);
711 vp += data->slots[newidx].size;
712 if (data->slots[newidx].link == -1)
723 static bool _put_data(qhasharr_t *tbl, int idx, unsigned int hash,
724 const char *key, const void *value, size_t size,
726 qhasharr_data_t *data = tbl->data;
729 if (data->slots[idx].count != 0) {
730 //DEBUG("hasharr: BUG found.");
735 size_t keylen = strlen(key);
736 unsigned char keymd5[16];
737 qhashmd5(key, keylen, keymd5);
740 data->slots[idx].count = count;
741 data->slots[idx].hash = hash;
742 strncpy(data->slots[idx].data.pair.key, key, _Q_HASHARR_KEYSIZE);
743 memcpy((char *) data->slots[idx].data.pair.keymd5, (char *) keymd5, 16);
744 data->slots[idx].data.pair.keylen = keylen;
745 data->slots[idx].link = -1;
750 for (newidx = idx, savesize = 0; savesize < size;) {
751 if (savesize > 0) { // find next empty slot
752 int tmpidx = _find_empty(tbl, newidx + 1);
754 //DEBUG("hasharr: Can't expand slot for key %s.", key);
755 _remove_data(tbl, idx);
761 memset((void *) (&data->slots[tmpidx]), '\0',
762 sizeof(qhasharr_slot_t));
764 data->slots[tmpidx].count = -2; // extended data block
765 data->slots[tmpidx].hash = newidx; // prev link
766 data->slots[tmpidx].link = -1; // end block mark
767 data->slots[tmpidx].size = 0;
769 data->slots[newidx].link = tmpidx; // link chain
771 //DEBUG("hasharr: slot %d is linked to slot %d for key %s.",
772 // tmpidx, newidx, key);
777 size_t copysize = size - savesize;
779 if (data->slots[newidx].count == -2) {
781 if (copysize > sizeof(struct _Q_HASHARR_SLOT_EXT)) {
782 copysize = sizeof(struct _Q_HASHARR_SLOT_EXT);
784 memcpy(data->slots[newidx].data.ext.value, value + savesize,
788 if (copysize > _Q_HASHARR_VALUESIZE) {
789 copysize = _Q_HASHARR_VALUESIZE;
791 memcpy(data->slots[newidx].data.pair.value, value + savesize,
794 // increase stored key counter
797 data->slots[newidx].size = copysize;
798 savesize += copysize;
800 // increase used slot counter
807 static bool _copy_slot(qhasharr_t *tbl, int idx1, int idx2) {
808 qhasharr_data_t *data = tbl->data;
810 if (data->slots[idx1].count != 0 || data->slots[idx2].count == 0) {
811 //DEBUG("hasharr: BUG found.");
816 memcpy((void *) (&data->slots[idx1]), (void *) (&data->slots[idx2]),
817 sizeof(qhasharr_slot_t));
819 // increase used slot counter
825 static bool _remove_slot(qhasharr_t *tbl, int idx)
827 qhasharr_data_t *data = tbl->data;
829 if (data->slots[idx].count == 0)
831 //DEBUG("hasharr: BUG found.");
836 data->slots[idx].count = 0;
838 // decrease used slot counter
844 static bool _remove_data(qhasharr_t *tbl, int idx) {
845 qhasharr_data_t *data = tbl->data;
847 if (data->slots[idx].count == 0) {
848 //DEBUG("hasharr: BUG found.");
854 int link = data->slots[idx].link;
855 _remove_slot(tbl, idx);
863 // decrease stored key counter
869 #endif /* _DOXYGEN_SKIP */