1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 #ifndef _SMALLHASHTABLE_H_
6 #define _SMALLHASHTABLE_H_
8 // Since compiler depends on valuenum which depends on smallhash, forward declare
9 // a wrapper for comp->compGetMem here (implemented in compiler.hpp) that can be used below.
11 void* compGetMem(Compiler* comp, size_t sz);
13 // genLog2 is defined in compiler.hpp
14 unsigned genLog2(unsigned value);
16 //------------------------------------------------------------------------
17 // HashTableInfo: a concept that provides equality and hashing methods for
18 // a particular key type. Used by HashTableBase and its
20 template <typename TKey>
23 // static bool Equals(const TKey& x, const TKey& y);
24 // static unsigned GetHashCode(const TKey& key);
27 //------------------------------------------------------------------------
28 // HashTableInfo<TKey*>: specialized version of HashTableInfo for pointer-
30 template <typename TKey>
31 struct HashTableInfo<TKey*>
33 static bool Equals(const TKey* x, const TKey* y)
38 static unsigned GetHashCode(const TKey* key)
40 // Shift off bits that are not likely to be significant
41 size_t keyval = reinterpret_cast<size_t>(key) >> ConstLog2<__alignof(TKey)>::value;
43 // Truncate and return the result
44 return static_cast<unsigned>(keyval);
48 //------------------------------------------------------------------------
49 // HashTableInfo<int>: specialized version of HashTableInfo for int-
52 struct HashTableInfo<int>
54 static bool Equals(int x, int y)
59 static unsigned GetHashCode(int key)
61 // Cast and return the key
62 return static_cast<unsigned>(key);
66 //------------------------------------------------------------------------
67 // HashTableInfo<unsigned>: specialized version of HashTableInfo for unsigned-
70 struct HashTableInfo<unsigned>
72 static bool Equals(unsigned x, unsigned y)
77 static unsigned GetHashCode(unsigned key)
79 // Return the key itself
84 //------------------------------------------------------------------------
85 // HashTableBase: base type for HashTable and SmallHashTable. This class
86 // provides the vast majority of the implementation. The
87 // subclasses differ in the storage they use at the time of
88 // construction: HashTable allocates the initial bucket
89 // array on the heap; SmallHashTable contains a small inline
92 // This implementation is based on the ideas presented in Herlihy, Shavit,
93 // and Tzafrir '08 (http://mcg.cs.tau.ac.il/papers/disc2008-hopscotch.pdf),
94 // though it does not currently implement the hopscotch algorithm.
96 // The approach taken is intended to perform well in both space and speed.
97 // This approach is a hybrid of separate chaining and open addressing with
98 // linear probing: collisions are resolved using a bucket chain, but that
99 // chain is stored in the bucket array itself.
101 // Resolving collisions using a bucket chain avoids the primary clustering
102 // issue common in linearly-probed open addressed hash tables, while using
103 // buckets as chain nodes avoids the allocaiton traffic typical of chained
104 // tables. Applying the hopscotch algorithm in the aforementioned paper
105 // could further improve performance by optimizing access patterns for
106 // better cache usage.
108 // Template parameters:
109 // TKey - The type of the table's keys.
110 // TValue - The type of the table's values.
111 // TKeyInfo - A type that conforms to the HashTableInfo<TKey> concept.
112 template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
115 friend class KeyValuePair;
116 friend class Iterator;
120 InitialNumBuckets = 8
124 //------------------------------------------------------------------------
125 // HashTableBase::Bucket: provides storage for the key-value pairs that
126 // make up the contents of the table.
128 // The "home" bucket for a particular key is the bucket indexed by the
129 // key's hash code modulo the size of the bucket array (the "home index").
131 // The home bucket is always considered to be part of the chain that it
132 // roots, even if it is also part of the chain rooted at a different
133 // bucket. `m_firstOffset` indicates the offset of the first non-home
134 // bucket in the home bucket's chain. If the `m_firstOffset` of a bucket
135 // is 0, the chain rooted at that bucket is empty.
137 // The index of the next bucket in a chain is calculated by adding the
138 // value in `m_nextOffset` to the index of the current bucket. If
139 // `m_nextOffset` is 0, the current bucket is the end of its chain. Each
140 // bucket in a chain must be occupied (i.e. `m_isFull` will be true).
143 bool m_isFull; // True if the bucket is occupied; false otherwise.
145 unsigned m_firstOffset; // The offset to the first node in the chain for this bucket index.
146 unsigned m_nextOffset; // The offset to the next node in the chain for this bucket index.
148 unsigned m_hash; // The hash code for the element stored in this bucket.
149 TKey m_key; // The key for the element stored in this bucket.
150 TValue m_value; // The value for the element stored in this bucket.
154 Compiler* m_compiler; // The compiler context to use for allocations.
155 Bucket* m_buckets; // The bucket array.
156 unsigned m_numBuckets; // The number of buckets in the bucket array.
157 unsigned m_numFullBuckets; // The number of occupied buckets.
159 //------------------------------------------------------------------------
160 // HashTableBase::Insert: inserts a key-value pair into a bucket array.
163 // buckets - The bucket array in which to insert the key-value pair.
164 // numBuckets - The number of buckets in the bucket array.
165 // hash - The hash code of the key to insert.
166 // key - The key to insert.
167 // value - The value to insert.
170 // True if the key-value pair was successfully inserted; false
172 static bool Insert(Bucket* buckets, unsigned numBuckets, unsigned hash, const TKey& key, const TValue& value)
174 const unsigned mask = numBuckets - 1;
175 unsigned homeIndex = hash & mask;
177 Bucket* home = &buckets[homeIndex];
180 // The home bucket is empty; use it.
182 // Note that `m_firstOffset` does not need to be updated: whether or not it is non-zero,
183 // it is already correct, since we're inserting at the head of the list. `m_nextOffset`
184 // must be 0, however, since this node should not be part of a list.
185 assert(home->m_nextOffset == 0);
187 home->m_isFull = true;
190 home->m_value = value;
194 // If the home bucket is full, probe to find the next empty bucket.
195 unsigned precedingIndexInChain = homeIndex;
196 unsigned nextIndexInChain = (homeIndex + home->m_firstOffset) & mask;
197 for (unsigned j = 1; j < numBuckets; j++)
199 unsigned bucketIndex = (homeIndex + j) & mask;
200 Bucket* bucket = &buckets[bucketIndex];
201 if (bucketIndex == nextIndexInChain)
203 assert(bucket->m_isFull);
204 precedingIndexInChain = bucketIndex;
205 nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
207 else if (!bucket->m_isFull)
209 bucket->m_isFull = true;
210 if (precedingIndexInChain == nextIndexInChain)
212 bucket->m_nextOffset = 0;
216 assert(((nextIndexInChain - bucketIndex) & mask) > 0);
217 bucket->m_nextOffset = (nextIndexInChain - bucketIndex) & mask;
220 unsigned offset = (bucketIndex - precedingIndexInChain) & mask;
223 if (precedingIndexInChain == homeIndex)
225 buckets[precedingIndexInChain].m_firstOffset = offset;
229 buckets[precedingIndexInChain].m_nextOffset = offset;
232 bucket->m_hash = hash;
234 bucket->m_value = value;
239 // No more free buckets.
243 //------------------------------------------------------------------------
244 // HashTableBase::TryGetBucket: attempts to get the bucket that holds a
248 // hash - The hash code of the key to find.
249 // key - The key to find.
250 // precedingIndex - An output parameter that will hold the index of the
251 // preceding bucket in the chain for the key. May be
252 // equal to `bucketIndex` if the key is stored in its
254 // bucketIndex - An output parameter that will hold the index of the
255 // bucket that stores the key.
258 // True if the key was successfully found; false otherwise.
259 bool TryGetBucket(unsigned hash, const TKey& key, unsigned* precedingIndex, unsigned* bucketIndex) const
261 if (m_numBuckets == 0)
266 const unsigned mask = m_numBuckets - 1;
267 unsigned index = hash & mask;
269 Bucket* bucket = &m_buckets[index];
270 if (bucket->m_isFull && bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
272 *precedingIndex = index;
273 *bucketIndex = index;
277 for (unsigned offset = bucket->m_firstOffset; offset != 0; offset = bucket->m_nextOffset)
279 unsigned precedingIndexInChain = index;
281 index = (index + offset) & mask;
282 bucket = &m_buckets[index];
284 assert(bucket->m_isFull);
285 if (bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
287 *precedingIndex = precedingIndexInChain;
288 *bucketIndex = index;
296 //------------------------------------------------------------------------
297 // HashTableBase::Resize: allocates a new bucket array twice the size of
298 // the current array and copies the key-value pairs
299 // from the current bucket array into the new array.
302 Bucket* currentBuckets = m_buckets;
304 unsigned newNumBuckets = m_numBuckets == 0 ? InitialNumBuckets : m_numBuckets * 2;
305 size_t allocSize = sizeof(Bucket) * newNumBuckets;
306 assert((sizeof(Bucket) * m_numBuckets) < allocSize);
308 auto* newBuckets = reinterpret_cast<Bucket*>(compGetMem(m_compiler, allocSize));
309 memset(newBuckets, 0, allocSize);
311 for (unsigned currentIndex = 0; currentIndex < m_numBuckets; currentIndex++)
313 Bucket* currentBucket = ¤tBuckets[currentIndex];
314 if (!currentBucket->m_isFull)
320 Insert(newBuckets, newNumBuckets, currentBucket->m_hash, currentBucket->m_key, currentBucket->m_value);
321 (assert(inserted), (void)inserted);
324 m_numBuckets = newNumBuckets;
325 m_buckets = newBuckets;
329 HashTableBase(Compiler* compiler, Bucket* buckets, unsigned numBuckets)
330 : m_compiler(compiler), m_buckets(buckets), m_numBuckets(numBuckets), m_numFullBuckets(0)
332 assert(compiler != nullptr);
336 assert((numBuckets & (numBuckets - 1)) == 0); // Size must be a power of 2
337 assert(m_buckets != nullptr);
339 memset(m_buckets, 0, sizeof(Bucket) * numBuckets);
347 class KeyValuePair final
349 friend class HashTableBase<TKey, TValue, TKeyInfo>::Iterator;
353 KeyValuePair(Bucket* bucket) : m_bucket(bucket)
355 assert(m_bucket != nullptr);
359 KeyValuePair() : m_bucket(nullptr)
365 return m_bucket->m_key;
368 inline TValue& Value()
370 return m_bucket->m_value;
374 // NOTE: HashTableBase only provides iterators in debug builds because the order in which
375 // the iterator type produces values is undefined (e.g. it is not related to the order in
376 // which key-value pairs were inserted).
379 friend class HashTableBase<TKey, TValue, TKeyInfo>;
382 unsigned m_numBuckets;
385 Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
386 : m_buckets(buckets), m_numBuckets(numBuckets), m_index(index)
388 assert((buckets != nullptr) || (numBuckets == 0));
389 assert(index <= numBuckets);
391 // Advance to the first occupied bucket
392 while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull)
399 Iterator() : m_buckets(nullptr), m_numBuckets(0), m_index(0)
403 KeyValuePair operator*() const
405 if (m_index >= m_numBuckets)
407 return KeyValuePair();
410 Bucket* bucket = &m_buckets[m_index];
411 assert(bucket->m_isFull);
412 return KeyValuePair(bucket);
415 KeyValuePair operator->() const
417 return this->operator*();
420 bool operator==(const Iterator& other) const
422 return (m_buckets == other.m_buckets) && (m_index == other.m_index);
425 bool operator!=(const Iterator& other) const
427 return (m_buckets != other.m_buckets) || (m_index != other.m_index);
430 Iterator& operator++()
435 } while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull);
441 Iterator begin() const
443 return Iterator(m_buckets, m_numBuckets, 0);
448 return Iterator(m_buckets, m_numBuckets, m_numBuckets);
452 unsigned Count() const
454 return m_numFullBuckets;
459 if (m_numBuckets > 0)
461 memset(m_buckets, 0, sizeof(Bucket) * m_numBuckets);
462 m_numFullBuckets = 0;
466 //------------------------------------------------------------------------
467 // HashTableBase::AddOrUpdate: adds a key-value pair to the hash table if
468 // the key does not already exist in the
469 // table, or updates the value if the key
473 // key - The key for which to add or update a value.
474 // value - The value.
477 // True if the value was added; false if it was updated.
478 bool AddOrUpdate(const TKey& key, const TValue& value)
480 unsigned hash = TKeyInfo::GetHashCode(key);
482 unsigned unused, index;
483 if (TryGetBucket(hash, key, &unused, &index))
485 m_buckets[index].m_value = value;
489 // If the load is greater than 0.8, resize the table before inserting.
490 if ((m_numFullBuckets * 5) >= (m_numBuckets * 4))
495 bool inserted = Insert(m_buckets, m_numBuckets, hash, key, value);
496 (assert(inserted), (void)inserted);
503 //------------------------------------------------------------------------
504 // HashTableBase::TryRemove: removes a key from the hash table and returns
505 // its value if the key exists in the table.
508 // key - The key to remove from the table.
509 // value - An output parameter that will hold the value for the removed
513 // True if the key was removed from the table; false otherwise.
514 bool TryRemove(const TKey& key, TValue* value)
516 unsigned hash = TKeyInfo::GetHashCode(key);
518 unsigned precedingIndexInChain, bucketIndex;
519 if (!TryGetBucket(hash, key, &precedingIndexInChain, &bucketIndex))
524 Bucket* bucket = &m_buckets[bucketIndex];
526 if (precedingIndexInChain != bucketIndex)
528 const unsigned mask = m_numBuckets - 1;
529 unsigned homeIndex = hash & mask;
532 if (bucket->m_nextOffset == 0)
538 unsigned nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
539 nextOffset = (nextIndexInChain - precedingIndexInChain) & mask;
542 if (precedingIndexInChain == homeIndex)
544 m_buckets[precedingIndexInChain].m_firstOffset = nextOffset;
548 m_buckets[precedingIndexInChain].m_nextOffset = nextOffset;
552 bucket->m_isFull = false;
553 bucket->m_nextOffset = 0;
557 *value = bucket->m_value;
561 //------------------------------------------------------------------------
562 // HashTableBase::TryGetValue: retrieves the value for a key if the key
563 // exists in the table.
566 // key - The key to find from the table.
567 // value - An output parameter that will hold the value for the key.
570 // True if the key was found in the table; false otherwise.
571 bool TryGetValue(const TKey& key, TValue* value) const
573 unsigned unused, index;
574 if (!TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index))
579 *value = m_buckets[index].m_value;
583 //------------------------------------------------------------------------
584 // HashTableBase::Contains: returns true if a key exists in the table and
588 // key - The key to find from the table.
591 // True if the key was found in the table; false otherwise.
592 bool Contains(const TKey& key) const
594 unsigned unused, index;
595 return TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index);
599 //------------------------------------------------------------------------
600 // HashTable: a simple subclass of `HashTableBase` that always uses heap
601 // storage for its bucket array.
602 template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
603 class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
605 typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
607 static unsigned RoundUp(unsigned initialSize)
609 return 1 << genLog2(initialSize);
613 HashTable(Compiler* compiler) : TBase(compiler, nullptr, 0)
617 HashTable(Compiler* compiler, unsigned initialSize)
619 reinterpret_cast<typename TBase::Bucket*>(
620 compGetMem(compiler, RoundUp(initialSize) * sizeof(typename TBase::Bucket))),
621 RoundUp(initialSize))
626 //------------------------------------------------------------------------
627 // SmallHashTable: an alternative to `HashTable` that stores the initial
628 // bucket array inline. Most useful for situations where
629 // the number of key-value pairs that will be stored in
630 // the map at any given time falls below a certain
631 // threshold. Switches to heap storage once the initial
632 // inline storage is exhausted.
633 template <typename TKey, typename TValue, unsigned NumInlineBuckets = 8, typename TKeyInfo = HashTableInfo<TKey>>
634 class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
636 typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
640 RoundedNumInlineBuckets = 1 << ConstLog2<NumInlineBuckets>::value
643 typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
646 SmallHashTable(Compiler* compiler) : TBase(compiler, m_inlineBuckets, RoundedNumInlineBuckets)
651 #endif // _SMALLHASHTABLE_H_