1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
7 // JitHashTable implements a mapping from a Key type to a Value type,
10 // JitHashTable takes four template parameters:
11 // Key, KeyFuncs, Value, Allocator and Behavior.
12 // We don't assume that Key has hash or equality functions specific names;
13 // rather, we assume that KeyFuncs has the following static methods
14 // int GetHashCode(Key)
15 // bool Equals(Key, Key)
16 // and use those. An instantiator can thus make a small "adaptor class"
17 // to invoke existing instance method hash and/or equality functions.
18 // If the implementor of a candidate Key class K understands this convention,
19 // these static methods can be implemented by K, so that K can be used
20 // as the actual arguments for the both Key and KeyFuncs template parameters.
22 // The "Behavior" parameter must provide the following static members:
24 // s_growth_factor_numerator
25 // s_growth_factor_denominator Factor to grow allocation (numerator/denominator).
26 // Typically inherited from default traits (3/2)
28 // s_density_factor_numerator
29 // s_density_factor_denominator Maxium occupied density of table before growth
30 // occurs (num/denom). Typically inherited (3/4).
32 // s_minimum_allocation Minimum table allocation count (size on first growth.) It is
33 // probably preferable to call Reallocate on initialization rather
34 // than override this from the default traits.
36 // NoMemory() Called when the hash table is unable to grow due to potential
37 // overflow or the lack of a sufficiently large prime.
39 class JitHashTableBehavior
42 static const unsigned s_growth_factor_numerator = 3;
43 static const unsigned s_growth_factor_denominator = 2;
45 static const unsigned s_density_factor_numerator = 3;
46 static const unsigned s_density_factor_denominator = 4;
48 static const unsigned s_minimum_allocation = 7;
50 inline static void DECLSPEC_NORETURN NoMemory()
56 // Stores info about primes, including the magic number and shift amount needed
57 // to implement a divide without using the divide instruction
61 JitPrimeInfo() : prime(0), magic(0), shift(0)
64 JitPrimeInfo(unsigned p, unsigned m, unsigned s) : prime(p), magic(m), shift(s)
71 // Compute `numerator` / `prime` using magic division
72 unsigned magicNumberDivide(unsigned numerator) const
74 unsigned __int64 num = numerator;
75 unsigned __int64 mag = magic;
76 unsigned __int64 product = (num * mag) >> (32 + shift);
77 return (unsigned)product;
80 // Compute `numerator` % `prime` using magic division
81 unsigned magicNumberRem(unsigned numerator) const
83 unsigned div = magicNumberDivide(numerator);
84 unsigned result = numerator - (div * prime);
85 assert(result == numerator % prime);
90 // Table of primes and their magic-number-divide constant.
91 // For more info see the book "Hacker's Delight" chapter 10.9 "Unsigned Division by Divisors >= 1"
92 // These were selected by looking for primes, each roughly twice as big as the next, having
93 // 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower).
96 SELECTANY const JitPrimeInfo jitPrimeInfo[]
98 JitPrimeInfo(9, 0x38e38e39, 1),
99 JitPrimeInfo(23, 0xb21642c9, 4),
100 JitPrimeInfo(59, 0x22b63cbf, 3),
101 JitPrimeInfo(131, 0xfa232cf3, 7),
102 JitPrimeInfo(239, 0x891ac73b, 7),
103 JitPrimeInfo(433, 0x975a751, 4),
104 JitPrimeInfo(761, 0x561e46a5, 8),
105 JitPrimeInfo(1399, 0xbb612aa3, 10),
106 JitPrimeInfo(2473, 0x6a009f01, 10),
107 JitPrimeInfo(4327, 0xf2555049, 12),
108 JitPrimeInfo(7499, 0x45ea155f, 11),
109 JitPrimeInfo(12973, 0x1434f6d3, 10),
110 JitPrimeInfo(22433, 0x2ebe18db, 12),
111 JitPrimeInfo(46559, 0xb42bebd5, 15),
112 JitPrimeInfo(96581, 0xadb61b1b, 16),
113 JitPrimeInfo(200341, 0x29df2461, 15),
114 JitPrimeInfo(415517, 0xa181c46d, 18),
115 JitPrimeInfo(861719, 0x4de0bde5, 18),
116 JitPrimeInfo(1787021, 0x9636c46f, 20),
117 JitPrimeInfo(3705617, 0x4870adc1, 20),
118 JitPrimeInfo(7684087, 0x8bbc5b83, 22),
119 JitPrimeInfo(15933877, 0x86c65361, 23),
120 JitPrimeInfo(33040633, 0x40fec79b, 23),
121 JitPrimeInfo(68513161, 0x7d605cd1, 25),
122 JitPrimeInfo(142069021, 0xf1da390b, 27),
123 JitPrimeInfo(294594427, 0x74a2507d, 27),
124 JitPrimeInfo(733045421, 0x5dbec447, 28),
128 // Hash table class definition
130 template <typename Key,
133 typename Allocator = CompAllocator,
134 typename Behavior = JitHashTableBehavior>
140 //------------------------------------------------------------------------
141 // JitHashTable: Construct an empty JitHashTable object.
144 // alloc - the allocator to be used by the new JitHashTable object
147 // JitHashTable always starts out empty, with no allocation overhead.
148 // Call Reallocate to prime with an initial size if desired.
150 JitHashTable(Allocator* alloc) : m_alloc(alloc), m_table(nullptr), m_tableSizeInfo(), m_tableCount(0), m_tableMax(0)
152 assert(m_alloc != nullptr);
154 #ifndef __GNUC__ // these crash GCC
155 static_assert_no_msg(Behavior::s_growth_factor_numerator > Behavior::s_growth_factor_denominator);
156 static_assert_no_msg(Behavior::s_density_factor_numerator < Behavior::s_density_factor_denominator);
160 //------------------------------------------------------------------------
161 // ~JitHashTable: Destruct the JitHashTable object.
164 // Destructs all keys and values stored in the table and frees all
172 //------------------------------------------------------------------------
173 // Lookup: Get the value associated to the specified key, if any.
177 // pVal - pointer to a location used to store the associated value
180 // `true` if the key exists, `false` otherwise
183 // If the key does not exist *pVal is not updated. pVal may be nullptr
184 // so this function can be used to simply check if the key exists.
186 bool Lookup(Key k, Value* pVal = nullptr) const
188 Node* pN = FindNode(k);
204 //------------------------------------------------------------------------
205 // Lookup: Get a pointer to the value associated to the specified key.
212 // A pointer to the value associated with the specified key or nullptr
213 // if the key is not found
216 // This is similar to `Lookup` but avoids copying the value and allows
217 // updating the value without using `Set`.
219 Value* LookupPointer(Key k) const
221 Node* pN = FindNode(k);
233 //------------------------------------------------------------------------
234 // Set: Associate the specified value with the specified key.
241 // `true` if the key already exists, `false` otherwise.
244 // If the key already exists then its associated value is updated to
247 bool Set(Key k, Value v)
251 assert(m_tableSizeInfo.prime != 0);
253 unsigned index = GetIndexForKey(k);
255 Node* pN = m_table[index];
256 while ((pN != nullptr) && !KeyFuncs::Equals(k, pN->m_key))
267 Node* pNewNode = new (m_alloc) Node(m_table[index], k, v);
268 m_table[index] = pNewNode;
274 //------------------------------------------------------------------------
275 // Emplace: Associates the specified key with a value constructed in-place
276 // using the supplied args if the key is not already present.
280 // args - the args used to construct the value
283 // A pointer to the existing or newly constructed value.
285 template <class... Args>
286 Value* Emplace(Key k, Args&&... args)
290 assert(m_tableSizeInfo.prime != 0);
292 unsigned index = GetIndexForKey(k);
294 Node* n = m_table[index];
295 while ((n != nullptr) && !KeyFuncs::Equals(k, n->m_key))
302 n = new (m_alloc) Node(m_table[index], k, jitstd::forward<Args>(args)...);
311 //------------------------------------------------------------------------
312 // Remove: Remove the specified key and its associated value.
318 // `true` if the key exists, `false` otherwise.
321 // Removing a inexistent key is not an error.
325 unsigned index = GetIndexForKey(k);
327 Node* pN = m_table[index];
328 Node** ppN = &m_table[index];
329 while ((pN != nullptr) && !KeyFuncs::Equals(k, pN->m_key))
338 Node::operator delete(pN, m_alloc);
347 //------------------------------------------------------------------------
348 // RemoveAll: Remove all keys and their associated values.
351 // This also frees all the memory owned by the table.
355 for (unsigned i = 0; i < m_tableSizeInfo.prime; i++)
357 for (Node* pN = m_table[i]; pN != nullptr;)
359 Node* pNext = pN->m_next;
360 Node::operator delete(pN, m_alloc);
364 m_alloc->Free(m_table);
367 m_tableSizeInfo = JitPrimeInfo();
374 // Get an iterator to the first key in the table.
375 KeyIterator Begin() const
377 KeyIterator i(this, TRUE);
381 // Get an iterator following the last key in the table.
382 KeyIterator End() const
384 return KeyIterator(this, FALSE);
387 // Get the number of keys currently stored in the table.
388 unsigned GetCount() const
393 // Get the allocator used by this hash table.
394 Allocator* GetAllocator()
402 //------------------------------------------------------------------------
403 // GetIndexForKey: Get the bucket index for the specified key.
411 unsigned GetIndexForKey(Key k) const
413 unsigned hash = KeyFuncs::GetHashCode(k);
415 unsigned index = m_tableSizeInfo.magicNumberRem(hash);
420 //------------------------------------------------------------------------
421 // FindNode: Return a pointer to the node having the specified key, if any.
427 // A pointer to the node or `nullptr` if the key is not found.
429 Node* FindNode(Key k) const
431 if (m_tableSizeInfo.prime == 0)
436 unsigned index = GetIndexForKey(k);
438 Node* pN = m_table[index];
445 while ((pN != nullptr) && !KeyFuncs::Equals(k, pN->m_key))
450 assert((pN == nullptr) || KeyFuncs::Equals(k, pN->m_key));
452 // If pN != nullptr, it's the node for the key, else the key isn't mapped.
456 //------------------------------------------------------------------------
457 // Grow: Increase the size of the bucket table.
460 // The new size is computed based on the current population, growth factor,
461 // and maximum density factor.
466 (unsigned)(m_tableCount * Behavior::s_growth_factor_numerator / Behavior::s_growth_factor_denominator *
467 Behavior::s_density_factor_denominator / Behavior::s_density_factor_numerator);
469 if (newSize < Behavior::s_minimum_allocation)
471 newSize = Behavior::s_minimum_allocation;
474 // handle potential overflow
475 if (newSize < m_tableCount)
477 Behavior::NoMemory();
483 //------------------------------------------------------------------------
484 // CheckGrowth: Check if the maximum hashtable density has been reached
485 // and increase the size of the bucket table if necessary.
489 if (m_tableCount == m_tableMax)
496 //------------------------------------------------------------------------
497 // CheckGrowth: Replace the bucket table with a larger one and copy all nodes
498 // from the existing bucket table.
501 // The new size must be large enough to hold all existing keys in
502 // the table without exceeding the density. Note that the actual
503 // table size must always be a prime number; the specified size
504 // will be increased to the next prime if necessary.
506 void Reallocate(unsigned newTableSize)
508 assert(newTableSize >=
509 (GetCount() * Behavior::s_density_factor_denominator / Behavior::s_density_factor_numerator));
511 // Allocation size must be a prime number. This is necessary so that hashes uniformly
512 // distribute to all indices, and so that chaining will visit all indices in the hash table.
513 JitPrimeInfo newPrime = NextPrime(newTableSize);
514 newTableSize = newPrime.prime;
516 Node** newTable = (Node**)m_alloc->ArrayAlloc(newTableSize, sizeof(Node*));
518 for (unsigned i = 0; i < newTableSize; i++)
520 newTable[i] = nullptr;
523 // Move all entries over to new table (re-using the Node structures.)
525 for (unsigned i = 0; i < m_tableSizeInfo.prime; i++)
527 Node* pN = m_table[i];
528 while (pN != nullptr)
530 Node* pNext = pN->m_next;
532 unsigned newIndex = newPrime.magicNumberRem(KeyFuncs::GetHashCode(pN->m_key));
533 pN->m_next = newTable[newIndex];
534 newTable[newIndex] = pN;
540 if (m_table != nullptr)
542 m_alloc->Free(m_table);
546 m_tableSizeInfo = newPrime;
548 (unsigned)(newTableSize * Behavior::s_density_factor_numerator / Behavior::s_density_factor_denominator);
551 // For iteration, we use a pattern similar to the STL "forward
552 // iterator" pattern. It basically consists of wrapping an
553 // "iteration variable" in an object, and providing pointer-like
554 // operators on the iterator. Example usage:
556 // for (JitHashTable::KeyIterator iter = foo->Begin(), end = foo->End(); !iter.Equal(end); iter++)
560 // iter.Get() will yield (a reference to) the
561 // current key. It will assert the equivalent of "iter != end."
565 friend class JitHashTable;
569 unsigned m_tableSize;
573 //------------------------------------------------------------------------
574 // KeyIterator: Construct an iterator for the specified JitHashTable.
577 // hash - the hashtable
578 // begin - `true` to construct an "begin" iterator,
579 // `false` to construct an "end" iterator
581 KeyIterator(const JitHashTable* hash, BOOL begin)
582 : m_table(hash->m_table)
584 , m_tableSize(hash->m_tableSizeInfo.prime)
585 , m_index(begin ? 0 : m_tableSize)
587 if (begin && (hash->m_tableCount > 0))
589 assert(m_table != nullptr);
590 while ((m_index < m_tableSize) && (m_table[m_index] == nullptr))
595 if (m_index >= m_tableSize)
601 m_node = m_table[m_index];
603 assert(m_node != nullptr);
607 //------------------------------------------------------------------------
608 // Get: Get a reference to this iterator's key.
611 // A reference to this iterator's key.
614 // This must not be the "end" iterator.
616 const Key& Get() const
618 assert(m_node != nullptr);
620 return m_node->m_key;
623 //------------------------------------------------------------------------
624 // GetValue: Get a reference to this iterator's value.
627 // A reference to this iterator's value.
630 // This must not be the "end" iterator.
632 Value& GetValue() const
634 assert(m_node != nullptr);
636 return m_node->m_val;
639 //------------------------------------------------------------------------
640 // SetValue: Assign a new value to this iterator's key
643 // value - the value to assign
646 // This must not be the "end" iterator.
648 void SetValue(const Value& value) const
650 assert(m_node != nullptr);
652 m_node->m_val = value;
655 //------------------------------------------------------------------------
656 // Next: Advance the iterator to the next node.
659 // Advancing the end iterator has no effect.
663 if (m_node != nullptr)
665 m_node = m_node->m_next;
666 if (m_node != nullptr)
674 while ((m_index < m_tableSize) && (m_table[m_index] == nullptr))
679 if (m_index >= m_tableSize)
686 m_node = m_table[m_index];
688 assert(m_node != nullptr);
691 // Return `true` if the specified iterator has the same position as this iterator
692 bool Equal(const KeyIterator& i) const
694 return i.m_node == m_node;
697 // Advance the iterator to the next node
703 // Advance the iterator to the next node
710 //------------------------------------------------------------------------
711 // operator[]: Get a reference to the value associated with the specified key.
717 // A reference to the value associated with the specified key.
720 // The specified key must exist.
722 Value& operator[](Key k) const
724 Value* p = LookupPointer(k);
730 //------------------------------------------------------------------------
731 // NextPrime: Get a prime number greater than or equal to the specified number.
734 // number - the minimum value
739 static JitPrimeInfo NextPrime(unsigned number)
741 for (int i = 0; i < (int)(_countof(jitPrimeInfo)); i++)
743 if (jitPrimeInfo[i].prime >= number)
745 return jitPrimeInfo[i];
750 Behavior::NoMemory();
756 Node* m_next; // Assume that the alignment requirement of Key and Value are no greater than Node*,
757 // so put m_next first to avoid unnecessary padding.
761 template <class... Args>
762 Node(Node* next, Key k, Args&&... args) : m_next(next), m_key(k), m_val(jitstd::forward<Args>(args)...)
766 void* operator new(size_t sz, Allocator* alloc)
768 return alloc->Alloc(sz);
771 void operator delete(void* p, Allocator* alloc)
778 Allocator* m_alloc; // Allocator to use in this table.
779 Node** m_table; // pointer to table
780 JitPrimeInfo m_tableSizeInfo; // size of table (a prime) and information about it
781 unsigned m_tableCount; // number of elements in table
782 unsigned m_tableMax; // maximum occupied count
785 // Commonly used KeyFuncs types:
787 // Base class for types whose equality function is the same as their "==".
788 template <typename T>
789 struct JitKeyFuncsDefEquals
791 static bool Equals(const T& x, const T& y)
797 template <typename T>
798 struct JitPtrKeyFuncs : public JitKeyFuncsDefEquals<const T*>
801 static unsigned GetHashCode(const T* ptr)
803 // Using the lower 32 bits of a pointer as a hashcode should be good enough.
804 // In fact, this should result in an unique hash code unless we allocate
805 // more than 4 gigabytes or if the virtual address space is fragmented.
806 return static_cast<unsigned>(reinterpret_cast<uintptr_t>(ptr));
810 template <typename T> // Must be coercible to "unsigned" with no loss of information.
811 struct JitSmallPrimitiveKeyFuncs : public JitKeyFuncsDefEquals<T>
813 static unsigned GetHashCode(const T& val)
815 return static_cast<unsigned>(val);
819 template <typename T> // Assumed to be of size sizeof(UINT64).
820 struct JitLargePrimitiveKeyFuncs : public JitKeyFuncsDefEquals<T>
822 static unsigned GetHashCode(const T val)
824 // A static cast when T is a float or a double converts the value (i.e. 0.25 converts to 0)
826 // Instead we want to use all of the bits of a float to create the hash value
827 // So we cast the address of val to a pointer to an equivalent sized unsigned int
828 // This allows us to read the actual bit representation of a float type
830 // We can't read beyond the end of val, so we use sizeof(T) to determine
831 // exactly how many bytes to read
835 // cast &val to (UINT64 *) then deref to get the bits
836 UINT64 asUINT64 = *(reinterpret_cast<const UINT64*>(&val));
838 // Get the upper and lower 32-bit values from the 64-bit value
839 UINT32 upper32 = static_cast<UINT32>(asUINT64 >> 32);
840 UINT32 lower32 = static_cast<UINT32>(asUINT64 & 0xFFFFFFFF);
842 // Exclusive-Or the upper32 and the lower32 values
843 return static_cast<unsigned>(upper32 ^ lower32);
845 else if (sizeof(T) == 4)
847 // cast &val to (UINT32 *) then deref to get the bits
848 UINT32 asUINT32 = *(reinterpret_cast<const UINT32*>(&val));
850 // Just return the 32-bit value
851 return static_cast<unsigned>(asUINT32);
853 else if ((sizeof(T) == 2) || (sizeof(T) == 1))
855 // For small sizes we must have an integer type
856 // so we can just use the static_cast.
858 return static_cast<unsigned>(val);
862 // Only support Hashing for types that are 8,4,2 or 1 bytes in size
863 assert(!"Unsupported size");
864 return static_cast<unsigned>(val); // compile-time error here when we have a illegal size