1 #ifndef GIM_HASH_TABLE_H_INCLUDED
2 #define GIM_HASH_TABLE_H_INCLUDED
3 /*! \file gim_trimesh_data.h
4 \author Francisco Leon Najera
7 -----------------------------------------------------------------------------
8 This source file is part of GIMPACT Library.
10 For the latest info, see http://gimpact.sourceforge.net/
12 Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
13 email: projectileman@yahoo.com
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of EITHER:
17 (1) The GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 2.1 of the License, or (at
19 your option) any later version. The text of the GNU Lesser
20 General Public License is included with this library in the
21 file GIMPACT-LICENSE-LGPL.TXT.
22 (2) The BSD-style license that is included with this library in
23 the file GIMPACT-LICENSE-BSD.TXT.
24 (3) The zlib/libpng license that is included with this library in
25 the file GIMPACT-LICENSE-ZLIB.TXT.
27 This library is distributed in the hope that it will be useful,
28 but WITHOUT ANY WARRANTY; without even the implied warranty of
29 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
30 GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
32 -----------------------------------------------------------------------------
35 #include "gim_radixsort.h"
37 #define GIM_INVALID_HASH 0xffffffff //!< A very very high value
38 #define GIM_DEFAULT_HASH_TABLE_SIZE 380
39 #define GIM_DEFAULT_HASH_TABLE_NODE_SIZE 4
40 #define GIM_HASH_TABLE_GROW_FACTOR 2
42 #define GIM_MIN_RADIX_SORT_SIZE 860 //!< calibrated on a PIII
45 struct GIM_HASH_TABLE_NODE
53 GIM_HASH_TABLE_NODE(const GIM_HASH_TABLE_NODE& value)
56 m_data = value.m_data;
59 GIM_HASH_TABLE_NODE(GUINT key, const T& data)
65 bool operator<(const GIM_HASH_TABLE_NODE<T>& other) const
67 ///inverse order, further objects are first
68 if (m_key < other.m_key) return true;
72 bool operator>(const GIM_HASH_TABLE_NODE<T>& other) const
74 ///inverse order, further objects are first
75 if (m_key > other.m_key) return true;
79 bool operator==(const GIM_HASH_TABLE_NODE<T>& other) const
81 ///inverse order, further objects are first
82 if (m_key == other.m_key) return true;
87 ///Macro for getting the key
88 class GIM_HASH_NODE_GET_KEY
92 inline GUINT operator()(const T& a)
98 ///Macro for comparing the key and the element
99 class GIM_HASH_NODE_CMP_KEY_MACRO
103 inline int operator()(const T& a, GUINT key)
105 return ((int)(a.m_key - key));
109 ///Macro for comparing Hash nodes
110 class GIM_HASH_NODE_CMP_MACRO
114 inline int operator()(const T& a, const T& b)
116 return ((int)(a.m_key - b.m_key));
120 //! Sorting for hash table
122 switch automatically between quicksort and radixsort
124 template <typename T>
125 void gim_sort_hash_node_array(T* array, GUINT array_count)
127 if (array_count < GIM_MIN_RADIX_SORT_SIZE)
129 gim_heap_sort(array, array_count, GIM_HASH_NODE_CMP_MACRO());
133 memcopy_elements_func cmpfunc;
134 gim_radix_sort(array, array_count, GIM_HASH_NODE_GET_KEY(), cmpfunc);
138 // Note: assumes long is at least 32 bits.
139 #define GIM_NUM_PRIME 28
141 static const GUINT gim_prime_list[GIM_NUM_PRIME] =
143 53ul, 97ul, 193ul, 389ul, 769ul,
144 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
145 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
146 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
147 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
148 1610612741ul, 3221225473ul, 4294967291ul};
150 inline GUINT gim_next_prime(GUINT number)
152 //Find nearest upper prime
153 GUINT result_ind = 0;
154 gim_binary_search(gim_prime_list, 0, (GIM_NUM_PRIME - 2), number, result_ind);
156 // inv: result_ind < 28
157 return gim_prime_list[result_ind];
160 //! A compact hash table implementation
162 A memory aligned compact hash table that coud be treated as an array.
163 It could be a simple sorted array without the overhead of the hash key bucked, or could
164 be a formely hash table with an array of keys.
165 You can use switch_to_hashtable() and switch_to_sorted_array for saving space or increase speed.
169 <li> if node_size = 0, then this container becomes a simple sorted array allocator. reserve_size is used for reserve memory in m_nodes.
170 When the array size reaches the size equivalent to 'min_hash_table_size', then it becomes a hash table by calling check_for_switching_to_hashtable.
171 <li> If node_size != 0, then this container becomes a hash table for ever
179 typedef GIM_HASH_TABLE_NODE<T> _node_type;
182 //array< _node_type, SuperAllocator<_node_type> > m_nodes;
183 gim_array<_node_type> m_nodes;
184 //SuperBufferedArray< _node_type > m_nodes;
187 ///Hash table data management. The hash table has the indices to the corresponding m_nodes array
188 GUINT* m_hash_table; //!<
189 GUINT m_table_size; //!<
190 GUINT m_node_size; //!<
191 GUINT m_min_hash_table_size;
193 //! Returns the cell index
194 inline GUINT _find_cell(GUINT hashkey)
196 _node_type* nodesptr = m_nodes.pointer();
197 GUINT start_index = (hashkey % m_table_size) * m_node_size;
198 GUINT end_index = start_index + m_node_size;
200 while (start_index < end_index)
202 GUINT value = m_hash_table[start_index];
203 if (value != GIM_INVALID_HASH)
205 if (nodesptr[value].m_key == hashkey) return start_index;
209 return GIM_INVALID_HASH;
212 //! Find the avaliable cell for the hashkey, and return an existing cell if it has the same hash key
213 inline GUINT _find_avaliable_cell(GUINT hashkey)
215 _node_type* nodesptr = m_nodes.pointer();
216 GUINT avaliable_index = GIM_INVALID_HASH;
217 GUINT start_index = (hashkey % m_table_size) * m_node_size;
218 GUINT end_index = start_index + m_node_size;
220 while (start_index < end_index)
222 GUINT value = m_hash_table[start_index];
223 if (value == GIM_INVALID_HASH)
225 if (avaliable_index == GIM_INVALID_HASH)
227 avaliable_index = start_index;
230 else if (nodesptr[value].m_key == hashkey)
236 return avaliable_index;
239 //! reserves the memory for the hash table.
241 \pre hash table must be empty
242 \post reserves the memory for the hash table, an initializes all elements to GIM_INVALID_HASH.
244 inline void _reserve_table_memory(GUINT newtablesize)
246 if (newtablesize == 0) return;
247 if (m_node_size == 0) return;
251 m_table_size = gim_next_prime(newtablesize);
253 GUINT datasize = m_table_size * m_node_size;
254 //Alloc the data buffer
255 m_hash_table = (GUINT*)gim_alloc(datasize * sizeof(GUINT));
258 inline void _invalidate_keys()
260 GUINT datasize = m_table_size * m_node_size;
261 for (GUINT i = 0; i < datasize; i++)
263 m_hash_table[i] = GIM_INVALID_HASH; // invalidate keys
267 //! Clear all memory for the hash table
268 inline void _clear_table_memory()
270 if (m_hash_table == NULL) return;
271 gim_free(m_hash_table);
276 //! Invalidates the keys (Assigning GIM_INVALID_HASH to all) Reorders the hash keys
277 inline void _rehash()
281 _node_type* nodesptr = m_nodes.pointer();
282 for (GUINT i = 0; i < (GUINT)m_nodes.size(); i++)
284 GUINT nodekey = nodesptr[i].m_key;
285 if (nodekey != GIM_INVALID_HASH)
287 //Search for the avaliable cell in buffer
288 GUINT index = _find_avaliable_cell(nodekey);
290 if (m_hash_table[index] != GIM_INVALID_HASH)
291 { //The new index is alreade used... discard this new incomming object, repeated key
292 btAssert(m_hash_table[index] == nodekey);
293 nodesptr[i].m_key = GIM_INVALID_HASH;
298 //Assign the value for alloc
299 m_hash_table[index] = i;
305 //! Resize hash table indices
306 inline void _resize_table(GUINT newsize)
309 _clear_table_memory();
311 _reserve_table_memory(newsize);
312 //Invalidate keys and rehash
316 //! Destroy hash table memory
317 inline void _destroy()
319 if (m_hash_table == NULL) return;
320 _clear_table_memory();
323 //! Finds an avaliable hash table cell, and resizes the table if there isn't space
324 inline GUINT _assign_hash_table_cell(GUINT hashkey)
326 GUINT cell_index = _find_avaliable_cell(hashkey);
328 if (cell_index == GIM_INVALID_HASH)
331 _resize_table(m_table_size + 1);
332 GUINT cell_index = _find_avaliable_cell(hashkey);
333 btAssert(cell_index != GIM_INVALID_HASH);
338 //! erase by index in hash table
339 inline bool _erase_by_index_hash_table(GUINT index)
341 if (index >= m_nodes.size()) return false;
342 if (m_nodes[index].m_key != GIM_INVALID_HASH)
344 //Search for the avaliable cell in buffer
345 GUINT cell_index = _find_cell(m_nodes[index].m_key);
347 btAssert(cell_index != GIM_INVALID_HASH);
348 btAssert(m_hash_table[cell_index] == index);
350 m_hash_table[cell_index] = GIM_INVALID_HASH;
353 return this->_erase_unsorted(index);
356 //! erase by key in hash table
357 inline bool _erase_hash_table(GUINT hashkey)
359 if (hashkey == GIM_INVALID_HASH) return false;
361 //Search for the avaliable cell in buffer
362 GUINT cell_index = _find_cell(hashkey);
363 if (cell_index == GIM_INVALID_HASH) return false;
365 GUINT index = m_hash_table[cell_index];
366 m_hash_table[cell_index] = GIM_INVALID_HASH;
368 return this->_erase_unsorted(index);
371 //! insert an element in hash table
373 If the element exists, this won't insert the element
374 \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
375 If so, the element has been inserted at the last position of the array.
377 inline GUINT _insert_hash_table(GUINT hashkey, const T& value)
379 if (hashkey == GIM_INVALID_HASH)
382 _insert_unsorted(hashkey, value);
383 return GIM_INVALID_HASH;
386 GUINT cell_index = _assign_hash_table_cell(hashkey);
388 GUINT value_key = m_hash_table[cell_index];
390 if (value_key != GIM_INVALID_HASH) return value_key; // Not overrited
392 m_hash_table[cell_index] = m_nodes.size();
394 _insert_unsorted(hashkey, value);
395 return GIM_INVALID_HASH;
398 //! insert an element in hash table.
400 If the element exists, this replaces the element.
401 \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
402 If so, the element has been inserted at the last position of the array.
404 inline GUINT _insert_hash_table_replace(GUINT hashkey, const T& value)
406 if (hashkey == GIM_INVALID_HASH)
409 _insert_unsorted(hashkey, value);
410 return GIM_INVALID_HASH;
413 GUINT cell_index = _assign_hash_table_cell(hashkey);
415 GUINT value_key = m_hash_table[cell_index];
417 if (value_key != GIM_INVALID_HASH)
418 { //replaces the existing
419 m_nodes[value_key] = _node_type(hashkey, value);
420 return value_key; // index of the replaced element
423 m_hash_table[cell_index] = m_nodes.size();
425 _insert_unsorted(hashkey, value);
426 return GIM_INVALID_HASH;
429 ///Sorted array data management. The hash table has the indices to the corresponding m_nodes array
430 inline bool _erase_sorted(GUINT index)
432 if (index >= (GUINT)m_nodes.size()) return false;
433 m_nodes.erase_sorted(index);
434 if (m_nodes.size() < 2) m_sorted = false;
438 //! faster, but unsorted
439 inline bool _erase_unsorted(GUINT index)
441 if (index >= m_nodes.size()) return false;
443 GUINT lastindex = m_nodes.size() - 1;
444 if (index < lastindex && m_hash_table != 0)
446 GUINT hashkey = m_nodes[lastindex].m_key;
447 if (hashkey != GIM_INVALID_HASH)
449 //update the new position of the last element
450 GUINT cell_index = _find_cell(hashkey);
451 btAssert(cell_index != GIM_INVALID_HASH);
452 //new position of the last element which will be swaped
453 m_hash_table[cell_index] = index;
456 m_nodes.erase(index);
461 //! Insert in position ordered
463 Also checks if it is needed to transform this container to a hash table, by calling check_for_switching_to_hashtable
465 inline void _insert_in_pos(GUINT hashkey, const T& value, GUINT pos)
467 m_nodes.insert(_node_type(hashkey, value), pos);
468 this->check_for_switching_to_hashtable();
471 //! Insert an element in an ordered array
472 inline GUINT _insert_sorted(GUINT hashkey, const T& value)
474 if (hashkey == GIM_INVALID_HASH || size() == 0)
476 m_nodes.push_back(_node_type(hashkey, value));
477 return GIM_INVALID_HASH;
479 //Insert at last position
482 GUINT result_ind = 0;
483 GUINT last_index = m_nodes.size() - 1;
484 _node_type* ptr = m_nodes.pointer();
486 bool found = gim_binary_search_ex(
487 ptr, 0, last_index, result_ind, hashkey, GIM_HASH_NODE_CMP_KEY_MACRO());
489 //Insert before found index
496 _insert_in_pos(hashkey, value, result_ind);
498 return GIM_INVALID_HASH;
501 inline GUINT _insert_sorted_replace(GUINT hashkey, const T& value)
503 if (hashkey == GIM_INVALID_HASH || size() == 0)
505 m_nodes.push_back(_node_type(hashkey, value));
506 return GIM_INVALID_HASH;
508 //Insert at last position
511 GUINT last_index = m_nodes.size() - 1;
512 _node_type* ptr = m_nodes.pointer();
514 bool found = gim_binary_search_ex(
515 ptr, 0, last_index, result_ind, hashkey, GIM_HASH_NODE_CMP_KEY_MACRO());
517 //Insert before found index
520 m_nodes[result_ind] = _node_type(hashkey, value);
524 _insert_in_pos(hashkey, value, result_ind);
529 //! Fast insertion in m_nodes array
530 inline GUINT _insert_unsorted(GUINT hashkey, const T& value)
532 m_nodes.push_back(_node_type(hashkey, value));
534 return GIM_INVALID_HASH;
539 <li> if node_size = 0, then this container becomes a simple sorted array allocator. reserve_size is used for reserve memory in m_nodes.
540 When the array size reaches the size equivalent to 'min_hash_table_size', then it becomes a hash table by calling check_for_switching_to_hashtable.
541 <li> If node_size != 0, then this container becomes a hash table for ever
544 gim_hash_table(GUINT reserve_size = GIM_DEFAULT_HASH_TABLE_SIZE,
545 GUINT node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE,
546 GUINT min_hash_table_size = GIM_INVALID_HASH)
551 m_node_size = node_size;
552 m_min_hash_table_size = min_hash_table_size;
554 if (m_node_size != 0)
556 if (reserve_size != 0)
558 m_nodes.reserve(reserve_size);
559 _reserve_table_memory(reserve_size);
564 m_nodes.reserve(GIM_DEFAULT_HASH_TABLE_SIZE);
565 _reserve_table_memory(GIM_DEFAULT_HASH_TABLE_SIZE);
569 else if (reserve_size != 0)
571 m_nodes.reserve(reserve_size);
580 inline bool is_hash_table()
582 if (m_hash_table) return true;
586 inline bool is_sorted()
588 if (size() < 2) return true;
594 if (is_sorted()) return true;
595 if (m_nodes.size() < 2) return false;
597 _node_type* ptr = m_nodes.pointer();
598 GUINT siz = m_nodes.size();
599 gim_sort_hash_node_array(ptr, siz);
609 bool switch_to_hashtable()
611 if (m_hash_table) return false;
612 if (m_node_size == 0) m_node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE;
613 if (m_nodes.size() < GIM_DEFAULT_HASH_TABLE_SIZE)
615 _resize_table(GIM_DEFAULT_HASH_TABLE_SIZE);
619 _resize_table(m_nodes.size() + 1);
625 bool switch_to_sorted_array()
627 if (m_hash_table == NULL) return true;
628 _clear_table_memory();
632 //!If the container reaches the
633 bool check_for_switching_to_hashtable()
635 if (this->m_hash_table) return true;
637 if (!(m_nodes.size() < m_min_hash_table_size))
639 if (m_node_size == 0)
641 m_node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE;
644 _resize_table(m_nodes.size() + 1);
650 inline void set_sorted(bool value)
655 //! Retrieves the amount of keys.
656 inline GUINT size() const
658 return m_nodes.size();
661 //! Retrieves the hash key.
662 inline GUINT get_key(GUINT index) const
664 return m_nodes[index].m_key;
667 //! Retrieves the value by index
670 inline T* get_value_by_index(GUINT index)
672 return &m_nodes[index].m_data;
675 inline const T& operator[](GUINT index) const
677 return m_nodes[index].m_data;
680 inline T& operator[](GUINT index)
682 return m_nodes[index].m_data;
685 //! Finds the index of the element with the key
687 \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
688 If so, the element has been inserted at the last position of the array.
690 inline GUINT find(GUINT hashkey)
694 GUINT cell_index = _find_cell(hashkey);
695 if (cell_index == GIM_INVALID_HASH) return GIM_INVALID_HASH;
696 return m_hash_table[cell_index];
698 GUINT last_index = m_nodes.size();
701 if (last_index == 0) return GIM_INVALID_HASH;
702 if (m_nodes[0].m_key == hashkey) return 0;
703 return GIM_INVALID_HASH;
708 GUINT result_ind = 0;
710 _node_type* ptr = m_nodes.pointer();
712 bool found = gim_binary_search_ex(ptr, 0, last_index, result_ind, hashkey, GIM_HASH_NODE_CMP_KEY_MACRO());
714 if (found) return result_ind;
716 return GIM_INVALID_HASH;
719 //! Retrieves the value associated with the index
721 \return the found element, or null
723 inline T* get_value(GUINT hashkey)
725 GUINT index = find(hashkey);
726 if (index == GIM_INVALID_HASH) return NULL;
727 return &m_nodes[index].m_data;
732 inline bool erase_by_index(GUINT index)
734 if (index > m_nodes.size()) return false;
736 if (m_hash_table == NULL)
740 return this->_erase_sorted(index);
744 return this->_erase_unsorted(index);
749 return this->_erase_by_index_hash_table(index);
754 inline bool erase_by_index_unsorted(GUINT index)
756 if (index > m_nodes.size()) return false;
758 if (m_hash_table == NULL)
760 return this->_erase_unsorted(index);
764 return this->_erase_by_index_hash_table(index);
772 inline bool erase_by_key(GUINT hashkey)
774 if (size() == 0) return false;
778 return this->_erase_hash_table(hashkey);
782 if (is_sorted() == false) return false;
784 GUINT result_ind = find(hashkey);
785 if (result_ind != GIM_INVALID_HASH)
787 return this->_erase_sorted(result_ind);
796 if (m_hash_table == NULL) return;
797 GUINT datasize = m_table_size * m_node_size;
798 //Initialize the hashkeys.
800 for (i = 0; i < datasize; i++)
802 m_hash_table[i] = GIM_INVALID_HASH; // invalidate keys
807 //! Insert an element into the hash
809 \return If GIM_INVALID_HASH, the object has been inserted succesfully. Else it returns the position
810 of the existing element.
812 inline GUINT insert(GUINT hashkey, const T& element)
816 return this->_insert_hash_table(hashkey, element);
818 if (this->is_sorted())
820 return this->_insert_sorted(hashkey, element);
822 return this->_insert_unsorted(hashkey, element);
825 //! Insert an element into the hash, and could overrite an existing object with the same hash.
827 \return If GIM_INVALID_HASH, the object has been inserted succesfully. Else it returns the position
828 of the replaced element.
830 inline GUINT insert_override(GUINT hashkey, const T& element)
834 return this->_insert_hash_table_replace(hashkey, element);
836 if (this->is_sorted())
838 return this->_insert_sorted_replace(hashkey, element);
840 this->_insert_unsorted(hashkey, element);
841 return m_nodes.size();
844 //! Insert an element into the hash,But if this container is a sorted array, this inserts it unsorted
847 inline GUINT insert_unsorted(GUINT hashkey, const T& element)
851 return this->_insert_hash_table(hashkey, element);
853 return this->_insert_unsorted(hashkey, element);
857 #endif // GIM_CONTAINERS_H_INCLUDED