From 262948a945acd16a6e355e981158e005c66dc2ba Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 31 May 2020 18:47:49 -0400 Subject: [PATCH] Rewrite HashSet's implementation based on Dictionary's (#37180) * Move HashSet to Corelib * Small style cleanups in Dictionary And factor out InsertionBehavior into its own file * Rewrite HashSet based on Dictionary's implementation This effectively deletes HashSet's data structure and replaces it with the one used by Dictionary, then updated for the differences (e.g. just a value rather than a key and a value). HashSet used to have the same implementation, but Dictionary has evolved significantly and HashSet hasn't; this brings them to basic parity on implementation. Based on perf tests, I veered away from Dictionary's implementation in a few places (e.g. a goto-based implementation in the core find method led to a significant regression for Int32-based Contains operations), and we should follow-up to understand whether Dictionary should be changed as well, or why there's a difference between the two. Functionally, bringing over Dictionary's implementation yields a few notable changes, namely that Remove and Clear no longer invalidate enumerations. The tests have been updated accordingly. * Make a corresponding cleanup change to Dictionary * Use HashSet in Corelib * Address PR feedback * Clean up HashSetEqualityComparer * Port Dictionary's comparer serialization test to HashSet * Address PR feedback --- .../src/System/RuntimeType.CoreCLR.cs | 27 +- .../src/System/Collections/Generic/BitHelper.cs | 0 .../System.Collections/src/Resources/Strings.resx | 3 - .../src/System.Collections.csproj | 5 +- .../src/System/Collections/Generic/HashSet.cs | 2070 -------------------- .../Collections/Generic/HashSetEqualityComparer.cs | 58 - ...ashSet.Generic.Tests.AsNonGenericIEnumerable.cs | 4 + .../tests/Generic/HashSet/HashSet.Generic.Tests.cs | 58 + .../src/System.Private.CoreLib.Shared.projitems | 6 + .../src/System/Collections/Generic/Dictionary.cs | 361 ++-- .../src/System/Collections/Generic/HashSet.cs | 1553 +++++++++++++++ .../Collections/Generic/HashSetEqualityComparer.cs | 78 + .../Collections/Generic/InsertionBehavior.cs | 27 + .../src/System/Globalization/DateTimeFormat.cs | 4 +- .../Globalization/DateTimeFormatInfoScanner.cs | 148 +- 15 files changed, 2003 insertions(+), 2399 deletions(-) rename src/libraries/{System.Collections => Common}/src/System/Collections/Generic/BitHelper.cs (100%) delete mode 100644 src/libraries/System.Collections/src/System/Collections/Generic/HashSet.cs delete mode 100644 src/libraries/System.Collections/src/System/Collections/Generic/HashSetEqualityComparer.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Collections/Generic/InsertionBehavior.cs diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs index ddcf030..3215635 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs @@ -989,7 +989,7 @@ namespace System } else { - List al = new List(); + var al = new HashSet(); // Get all constraints Type[] constraints = declaringType.GetGenericParameterConstraints(); @@ -1003,31 +1003,16 @@ namespace System Type[] temp = constraint.GetInterfaces(); for (int j = 0; j < temp.Length; j++) - al.Add(temp[j] as RuntimeType); + al.Add((RuntimeType)temp[j]); } - // Remove duplicates - Dictionary ht = new Dictionary(); - for (int i = 0; i < al.Count; i++) + // Populate list, without duplicates + foreach (RuntimeType rt in al) { - RuntimeType constraint = al[i]!; - if (!ht.ContainsKey(constraint)) - ht[constraint] = constraint; - } - - RuntimeType[] interfaces = new RuntimeType[ht.Values.Count]; - ht.Values.CopyTo(interfaces, 0); - - // Populate link-list - for (int i = 0; i < interfaces.Length; i++) - { - if (filter.RequiresStringComparison()) + if (!filter.RequiresStringComparison() || filter.Match(RuntimeTypeHandle.GetUtf8Name(rt))) { - if (!filter.Match(RuntimeTypeHandle.GetUtf8Name(interfaces[i]))) - continue; + list.Add(rt); } - - list.Add(interfaces[i]); } } diff --git a/src/libraries/System.Collections/src/System/Collections/Generic/BitHelper.cs b/src/libraries/Common/src/System/Collections/Generic/BitHelper.cs similarity index 100% rename from src/libraries/System.Collections/src/System/Collections/Generic/BitHelper.cs rename to src/libraries/Common/src/System/Collections/Generic/BitHelper.cs diff --git a/src/libraries/System.Collections/src/Resources/Strings.resx b/src/libraries/System.Collections/src/Resources/Strings.resx index d9e1f70..d37809a 100644 --- a/src/libraries/System.Collections/src/Resources/Strings.resx +++ b/src/libraries/System.Collections/src/Resources/Strings.resx @@ -111,9 +111,6 @@ Only supported array types for CopyTo on BitArrays are Boolean[], Int32[] and Byte[]. - - HashSet capacity is too big. - Hashtable's capacity overflowed and went negative. Check load factor, capacity and the current size of the table. diff --git a/src/libraries/System.Collections/src/System.Collections.csproj b/src/libraries/System.Collections/src/System.Collections.csproj index 50eb24e..3595ca2 100644 --- a/src/libraries/System.Collections/src/System.Collections.csproj +++ b/src/libraries/System.Collections/src/System.Collections.csproj @@ -11,14 +11,11 @@ - - - @@ -33,6 +30,8 @@ + diff --git a/src/libraries/System.Collections/src/System/Collections/Generic/HashSet.cs b/src/libraries/System.Collections/src/System/Collections/Generic/HashSet.cs deleted file mode 100644 index ffe34e2..0000000 --- a/src/libraries/System.Collections/src/System/Collections/Generic/HashSet.cs +++ /dev/null @@ -1,2070 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; -using System.Runtime.CompilerServices; -using System.Runtime.Serialization; - -namespace System.Collections.Generic -{ - /// - /// Implementation notes: - /// This uses an array-based implementation similar to , using a buckets array - /// to map hash values to the Slots array. Items in the Slots array that hash to the same value - /// are chained together through the "next" indices. - /// - /// The capacity is always prime; so during resizing, the capacity is chosen as the next prime - /// greater than double the last capacity. - /// - /// The underlying data structures are lazily initialized. Because of the observation that, - /// in practice, hashtables tend to contain only a few elements, the initial capacity is - /// set very small (3 elements) unless the ctor with a collection is used. - /// - /// The +/- 1 modifications in methods that add, check for containment, etc allow us to - /// distinguish a hash code of 0 from an uninitialized bucket. This saves us from having to - /// reset each bucket to -1 when resizing. See Contains, for example. - /// - /// Set methods such as UnionWith, IntersectWith, ExceptWith, and SymmetricExceptWith modify - /// this set. - /// - /// Some operations can perform faster if we can assume "other" contains unique elements - /// according to this equality comparer. The only times this is efficient to check is if - /// other is a hashset. Note that checking that it's a hashset alone doesn't suffice; we - /// also have to check that the hashset is using the same equality comparer. If other - /// has a different equality comparer, it will have unique elements according to its own - /// equality comparer, but not necessarily according to ours. Therefore, to go these - /// optimized routes we check that other is a hashset using the same equality comparer. - /// - /// A HashSet with no elements has the properties of the empty set. (See IsSubset, etc. for - /// special empty set checks.) - /// - /// A couple of methods have a special case if other is this (e.g. SymmetricExceptWith). - /// If we didn't have these checks, we could be iterating over the set and modifying at - /// the same time. - /// - /// - [DebuggerTypeProxy(typeof(ICollectionDebugView<>))] - [DebuggerDisplay("Count = {Count}")] - [Serializable] - [System.Runtime.CompilerServices.TypeForwardedFrom("System.Core, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")] - public class HashSet : ICollection, ISet, IReadOnlyCollection, IReadOnlySet, ISerializable, IDeserializationCallback - { - // store lower 31 bits of hash code - private const int Lower31BitMask = 0x7FFFFFFF; - // cutoff point, above which we won't do stackallocs. This corresponds to 100 integers. - private const int StackAllocThreshold = 100; - // when constructing a hashset from an existing collection, it may contain duplicates, - // so this is used as the max acceptable excess ratio of capacity to count. Note that - // this is only used on the ctor and not to automatically shrink if the hashset has, e.g, - // a lot of adds followed by removes. Users must explicitly shrink by calling TrimExcess. - // This is set to 3 because capacity is acceptable as 2x rounded up to nearest prime. - private const int ShrinkThreshold = 3; - - // constants for serialization - private const string CapacityName = "Capacity"; // Do not rename (binary serialization) - private const string ElementsName = "Elements"; // Do not rename (binary serialization) - private const string ComparerName = "Comparer"; // Do not rename (binary serialization) - private const string VersionName = "Version"; // Do not rename (binary serialization) - - private int[]? _buckets; - private Slot[] _slots = default!; // TODO-NULLABLE: This should be Slot[]?, but the resulting annotations causes GenPartialFacadeSource to blow up: error : Unable to cast object of type 'Microsoft.CodeAnalysis.CSharp.Syntax.CompilationUnitSyntax' to type 'Microsoft.CodeAnalysis.CSharp.Syntax.BaseTypeDeclarationSyntax' - private int _count; - private int _lastIndex; - private int _freeList; - private IEqualityComparer? _comparer; - private int _version; - - private SerializationInfo? _siInfo; // temporary variable needed during deserialization - - #region Constructors - - public HashSet() - : this((IEqualityComparer?)null) - { } - - public HashSet(IEqualityComparer? comparer) - { - if (comparer == EqualityComparer.Default) - { - comparer = null; - } - - _comparer = comparer; - _lastIndex = 0; - _count = 0; - _freeList = -1; - _version = 0; - } - - public HashSet(int capacity) - : this(capacity, null) - { } - - public HashSet(IEnumerable collection) - : this(collection, null) - { } - - /// - /// Implementation Notes: - /// Since resizes are relatively expensive (require rehashing), this attempts to minimize - /// the need to resize by setting the initial capacity based on size of collection. - /// - /// - /// - public HashSet(IEnumerable collection, IEqualityComparer? comparer) - : this(comparer) - { - if (collection == null) - { - throw new ArgumentNullException(nameof(collection)); - } - - var otherAsHashSet = collection as HashSet; - if (otherAsHashSet != null && AreEqualityComparersEqual(this, otherAsHashSet)) - { - CopyFrom(otherAsHashSet); - } - else - { - // to avoid excess resizes, first set size based on collection's count. Collection - // may contain duplicates, so call TrimExcess if resulting hashset is larger than - // threshold - ICollection? coll = collection as ICollection; - int suggestedCapacity = coll == null ? 0 : coll.Count; - Initialize(suggestedCapacity); - - UnionWith(collection); - - if (_count > 0 && _slots.Length / _count > ShrinkThreshold) - { - TrimExcess(); - } - } - } - - protected HashSet(SerializationInfo info, StreamingContext context) - { - // We can't do anything with the keys and values until the entire graph has been - // deserialized and we have a reasonable estimate that GetHashCode is not going to - // fail. For the time being, we'll just cache this. The graph is not valid until - // OnDeserialization has been called. - _siInfo = info; - } - - // Initializes the HashSet from another HashSet with the same element type and - // equality comparer. - private void CopyFrom(HashSet source) - { - int count = source._count; - if (count == 0) - { - // As well as short-circuiting on the rest of the work done, - // this avoids errors from trying to access otherAsHashSet._buckets - // or otherAsHashSet._slots when they aren't initialized. - return; - } - - int capacity = source._buckets!.Length; - int threshold = HashHelpers.ExpandPrime(count + 1); - - if (threshold >= capacity) - { - _buckets = (int[])source._buckets.Clone(); - _slots = (Slot[])source._slots.Clone(); - - _lastIndex = source._lastIndex; - _freeList = source._freeList; - } - else - { - int lastIndex = source._lastIndex; - Slot[] slots = source._slots; - Initialize(count); - int index = 0; - for (int i = 0; i < lastIndex; ++i) - { - int hashCode = slots[i].hashCode; - if (hashCode >= 0) - { - AddValue(index, hashCode, slots[i].value); - ++index; - } - } - Debug.Assert(index == count); - _lastIndex = index; - } - _count = count; - } - - public HashSet(int capacity, IEqualityComparer? comparer) - : this(comparer) - { - if (capacity < 0) - { - throw new ArgumentOutOfRangeException(nameof(capacity)); - } - - if (capacity > 0) - { - Initialize(capacity); - } - } - - #endregion - - #region ICollection methods - - /// - /// Add item to this hashset. This is the explicit implementation of the - /// interface. The other Add method returns bool indicating whether item was added. - /// - /// item to add - void ICollection.Add(T item) - { - AddIfNotPresent(item); - } - - /// - /// Remove all items from this set. This clears the elements but not the underlying - /// buckets and slots array. Follow this call by TrimExcess to release these. - /// - public void Clear() - { - if (_lastIndex > 0) - { - Debug.Assert(_buckets != null, "_buckets was null but _lastIndex > 0"); - - // clear the elements so that the gc can reclaim the references. - // clear only up to _lastIndex for _slots - Array.Clear(_slots, 0, _lastIndex); - Array.Clear(_buckets, 0, _buckets.Length); - _lastIndex = 0; - _count = 0; - _freeList = -1; - } - _version++; - } - - /// - /// Checks if this hashset contains the item - /// - /// item to check for containment - /// true if item contained; false if not - public bool Contains(T item) - { - int[]? buckets = _buckets; - - if (buckets != null) - { - int collisionCount = 0; - Slot[] slots = _slots; - IEqualityComparer? comparer = _comparer; - - if (comparer == null) - { - int hashCode = item == null ? 0 : InternalGetHashCode(item.GetHashCode()); - - if (typeof(T).IsValueType) - { - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && EqualityComparer.Default.Equals(slots[i].value, item)) - { - return true; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - else - { - // Object type: Shared Generic, EqualityComparer.Default won't devirtualize - // https://github.com/dotnet/runtime/issues/10050 - // So cache in a local rather than get EqualityComparer per loop iteration - EqualityComparer defaultComparer = EqualityComparer.Default; - - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, item)) - { - return true; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - } - else - { - int hashCode = item == null ? 0 : InternalGetHashCode(comparer.GetHashCode(item)); - - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, item)) - { - return true; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - } - - // either _buckets is null or wasn't found - return false; - } - - /// - /// Copy items in this hashset to array, starting at arrayIndex - /// - /// array to add items to - /// index to start at - public void CopyTo(T[] array, int arrayIndex) - { - CopyTo(array, arrayIndex, _count); - } - - /// - /// Remove item from this hashset - /// - /// item to remove - /// true if removed; false if not (i.e. if the item wasn't in the HashSet) - public bool Remove(T item) - { - int hashCode; - int bucket; - int last = -1; - int collisionCount = 0; - int i; - Slot[] slots; - IEqualityComparer? comparer = _comparer; - - if (_buckets != null) - { - slots = _slots; - - if (comparer == null) - { - hashCode = item == null ? 0 : InternalGetHashCode(item.GetHashCode()); - bucket = hashCode % _buckets!.Length; - - if (typeof(T).IsValueType) - { - for (i = _buckets[bucket] - 1; i >= 0; last = i, i = slots[i].next) - { - if (slots[i].hashCode == hashCode && EqualityComparer.Default.Equals(slots[i].value, item)) - { - goto ReturnFound; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException("SR.InvalidOperation_ConcurrentOperationsNotSupported"); - } - collisionCount++; - } - } - else - { - // Object type: Shared Generic, EqualityComparer.Default won't devirtualize - // https://github.com/dotnet/runtime/issues/10050 - // So cache in a local rather than get EqualityComparer per loop iteration - EqualityComparer defaultComparer = EqualityComparer.Default; - - for (i = _buckets[bucket] - 1; i >= 0; last = i, i = slots[i].next) - { - if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, item)) - { - goto ReturnFound; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException("SR.InvalidOperation_ConcurrentOperationsNotSupported"); - } - collisionCount++; - } - } - } - else - { - hashCode = item == null ? 0 : InternalGetHashCode(comparer.GetHashCode(item)); - bucket = hashCode % _buckets!.Length; - - for (i = _buckets[bucket] - 1; i >= 0; last = i, i = slots[i].next) - { - if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, item)) - { - goto ReturnFound; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException("SR.InvalidOperation_ConcurrentOperationsNotSupported"); - } - collisionCount++; - } - } - } - // either _buckets is null or wasn't found - return false; - - ReturnFound: - if (last < 0) - { - // first iteration; update buckets - _buckets[bucket] = slots[i].next + 1; - } - else - { - // subsequent iterations; update 'next' pointers - slots[last].next = slots[i].next; - } - slots[i].hashCode = -1; - if (RuntimeHelpers.IsReferenceOrContainsReferences()) - { - slots[i].value = default!; - } - slots[i].next = _freeList; - - _count--; - _version++; - if (_count == 0) - { - _lastIndex = 0; - _freeList = -1; - } - else - { - _freeList = i; - } - return true; - } - - /// - /// Number of elements in this hashset - /// - public int Count - { - get { return _count; } - } - - /// - /// Whether this is readonly - /// - bool ICollection.IsReadOnly - { - get { return false; } - } - - #endregion - - #region IEnumerable methods - - public Enumerator GetEnumerator() - { - return new Enumerator(this); - } - - IEnumerator IEnumerable.GetEnumerator() - { - return new Enumerator(this); - } - - IEnumerator IEnumerable.GetEnumerator() - { - return new Enumerator(this); - } - - #endregion - - #region ISerializable methods - - public virtual void GetObjectData(SerializationInfo info, StreamingContext context) - { - if (info == null) - { - throw new ArgumentNullException(nameof(info)); - } - - info.AddValue(VersionName, _version); // need to serialize version to avoid problems with serializing while enumerating - info.AddValue(ComparerName, _comparer ?? EqualityComparer.Default, typeof(IEqualityComparer)); - info.AddValue(CapacityName, _buckets == null ? 0 : _buckets.Length); - - if (_buckets != null) - { - T[] array = new T[_count]; - CopyTo(array); - info.AddValue(ElementsName, array, typeof(T[])); - } - } - - #endregion - - #region IDeserializationCallback methods - - public virtual void OnDeserialization(object? sender) - { - if (_siInfo == null) - { - // It might be necessary to call OnDeserialization from a container if the - // container object also implements OnDeserialization. We can return immediately - // if this function is called twice. Note we set _siInfo to null at the end of this method. - return; - } - - int capacity = _siInfo.GetInt32(CapacityName); - _comparer = (IEqualityComparer)_siInfo.GetValue(ComparerName, typeof(IEqualityComparer))!; - _freeList = -1; - - if (capacity != 0) - { - _buckets = new int[capacity]; - _slots = new Slot[capacity]; - - T[]? array = (T[]?)_siInfo.GetValue(ElementsName, typeof(T[])); - - if (array == null) - { - throw new SerializationException(SR.Serialization_MissingKeys); - } - - // there are no resizes here because we already set capacity above - for (int i = 0; i < array.Length; i++) - { - AddIfNotPresent(array[i]); - } - } - else - { - _buckets = null; - } - - _version = _siInfo.GetInt32(VersionName); - _siInfo = null; - } - - #endregion - - #region HashSet methods - - /// - /// Add item to this HashSet. Returns bool indicating whether item was added (won't be - /// added if already present) - /// - /// - /// true if added, false if already present - public bool Add(T item) - { - return AddIfNotPresent(item); - } - - /// - /// Searches the set for a given value and returns the equal value it finds, if any. - /// - /// The value to search for. - /// The value from the set that the search found, or the default value of when the search yielded no match. - /// A value indicating whether the search was successful. - /// - /// This can be useful when you want to reuse a previously stored reference instead of - /// a newly constructed one (so that more sharing of references can occur) or to look up - /// a value that has more complete data than the value you currently have, although their - /// comparer functions indicate they are equal. - /// - public bool TryGetValue(T equalValue, [MaybeNullWhen(false)] out T actualValue) - { - if (_buckets != null) - { - int i = InternalIndexOf(equalValue); - if (i >= 0) - { - actualValue = _slots[i].value; - return true; - } - } - actualValue = default!; - return false; - } - - /// - /// Take the union of this HashSet with other. Modifies this set. - /// - /// Implementation note: GetSuggestedCapacity (to increase capacity in advance avoiding - /// multiple resizes ended up not being useful in practice; quickly gets to the - /// point where it's a wasteful check. - /// - /// enumerable with items to add - public void UnionWith(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - foreach (T item in other) - { - AddIfNotPresent(item); - } - } - - /// - /// Takes the intersection of this set with other. Modifies this set. - /// - /// Implementation Notes: - /// We get better perf if other is a hashset using same equality comparer, because we - /// get constant contains check in other. Resulting cost is O(n1) to iterate over this. - /// - /// If we can't go above route, iterate over the other and mark intersection by checking - /// contains in this. Then loop over and delete any unmarked elements. Total cost is n2+n1. - /// - /// Attempts to return early based on counts alone, using the property that the - /// intersection of anything with the empty set is the empty set. - /// - /// enumerable with items to add - public void IntersectWith(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // intersection of anything with empty set is empty set, so return if count is 0 - if (_count == 0) - { - return; - } - - // set intersecting with itself is the same set - if (other == this) - { - return; - } - - // if other is empty, intersection is empty set; remove all elements and we're done - // can only figure this out if implements ICollection. (IEnumerable has no count) - ICollection? otherAsCollection = other as ICollection; - if (otherAsCollection != null) - { - if (otherAsCollection.Count == 0) - { - Clear(); - return; - } - - HashSet? otherAsSet = other as HashSet; - // faster if other is a hashset using same equality comparer; so check - // that other is a hashset using the same equality comparer. - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - IntersectWithHashSetWithSameEC(otherAsSet); - return; - } - } - - IntersectWithEnumerable(other); - } - - /// - /// Remove items in other from this set. Modifies this set. - /// - /// enumerable with items to remove - public void ExceptWith(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // this is already the empty set; return - if (_count == 0) - { - return; - } - - // special case if other is this; a set minus itself is the empty set - if (other == this) - { - Clear(); - return; - } - - // remove every element in other from this - foreach (T element in other) - { - Remove(element); - } - } - - /// - /// Takes symmetric difference (XOR) with other and this set. Modifies this set. - /// - /// enumerable with items to XOR - public void SymmetricExceptWith(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // if set is empty, then symmetric difference is other - if (_count == 0) - { - UnionWith(other); - return; - } - - // special case this; the symmetric difference of a set with itself is the empty set - if (other == this) - { - Clear(); - return; - } - - HashSet? otherAsSet = other as HashSet; - // If other is a HashSet, it has unique elements according to its equality comparer, - // but if they're using different equality comparers, then assumption of uniqueness - // will fail. So first check if other is a hashset using the same equality comparer; - // symmetric except is a lot faster and avoids bit array allocations if we can assume - // uniqueness - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - SymmetricExceptWithUniqueHashSet(otherAsSet); - } - else - { - SymmetricExceptWithEnumerable(other); - } - } - - /// - /// Checks if this is a subset of other. - /// - /// Implementation Notes: - /// The following properties are used up-front to avoid element-wise checks: - /// 1. If this is the empty set, then it's a subset of anything, including the empty set - /// 2. If other has unique elements according to this equality comparer, and this has more - /// elements than other, then it can't be a subset. - /// - /// Furthermore, if other is a hashset using the same equality comparer, we can use a - /// faster element-wise check. - /// - /// - /// true if this is a subset of other; false if not - public bool IsSubsetOf(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // The empty set is a subset of any set - if (_count == 0) - { - return true; - } - - // Set is always a subset of itself - if (other == this) - { - return true; - } - - HashSet? otherAsSet = other as HashSet; - // faster if other has unique elements according to this equality comparer; so check - // that other is a hashset using the same equality comparer. - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - // if this has more elements then it can't be a subset - if (_count > otherAsSet.Count) - { - return false; - } - - // already checked that we're using same equality comparer. simply check that - // each element in this is contained in other. - return IsSubsetOfHashSetWithSameEC(otherAsSet); - } - else - { - ElementCount result = CheckUniqueAndUnfoundElements(other, false); - return (result.uniqueCount == _count && result.unfoundCount >= 0); - } - } - - /// - /// Checks if this is a proper subset of other (i.e. strictly contained in) - /// - /// Implementation Notes: - /// The following properties are used up-front to avoid element-wise checks: - /// 1. If this is the empty set, then it's a proper subset of a set that contains at least - /// one element, but it's not a proper subset of the empty set. - /// 2. If other has unique elements according to this equality comparer, and this has >= - /// the number of elements in other, then this can't be a proper subset. - /// - /// Furthermore, if other is a hashset using the same equality comparer, we can use a - /// faster element-wise check. - /// - /// - /// true if this is a proper subset of other; false if not - public bool IsProperSubsetOf(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // no set is a proper subset of itself. - if (other == this) - { - return false; - } - - ICollection? otherAsCollection = other as ICollection; - if (otherAsCollection != null) - { - // no set is a proper subset of an empty set - if (otherAsCollection.Count == 0) - { - return false; - } - - // the empty set is a proper subset of anything but the empty set - if (_count == 0) - { - return otherAsCollection.Count > 0; - } - HashSet? otherAsSet = other as HashSet; - // faster if other is a hashset (and we're using same equality comparer) - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - if (_count >= otherAsSet.Count) - { - return false; - } - // this has strictly less than number of items in other, so the following - // check suffices for proper subset. - return IsSubsetOfHashSetWithSameEC(otherAsSet); - } - } - - ElementCount result = CheckUniqueAndUnfoundElements(other, false); - return (result.uniqueCount == _count && result.unfoundCount > 0); - } - - /// - /// Checks if this is a superset of other - /// - /// Implementation Notes: - /// The following properties are used up-front to avoid element-wise checks: - /// 1. If other has no elements (it's the empty set), then this is a superset, even if this - /// is also the empty set. - /// 2. If other has unique elements according to this equality comparer, and this has less - /// than the number of elements in other, then this can't be a superset - /// - /// - /// - /// true if this is a superset of other; false if not - public bool IsSupersetOf(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // a set is always a superset of itself - if (other == this) - { - return true; - } - - // try to fall out early based on counts - ICollection? otherAsCollection = other as ICollection; - if (otherAsCollection != null) - { - // if other is the empty set then this is a superset - if (otherAsCollection.Count == 0) - { - return true; - } - HashSet? otherAsSet = other as HashSet; - // try to compare based on counts alone if other is a hashset with - // same equality comparer - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - if (otherAsSet.Count > _count) - { - return false; - } - } - } - - return ContainsAllElements(other); - } - - /// - /// Checks if this is a proper superset of other (i.e. other strictly contained in this) - /// - /// Implementation Notes: - /// This is slightly more complicated than above because we have to keep track if there - /// was at least one element not contained in other. - /// - /// The following properties are used up-front to avoid element-wise checks: - /// 1. If this is the empty set, then it can't be a proper superset of any set, even if - /// other is the empty set. - /// 2. If other is an empty set and this contains at least 1 element, then this is a proper - /// superset. - /// 3. If other has unique elements according to this equality comparer, and other's count - /// is greater than or equal to this count, then this can't be a proper superset - /// - /// Furthermore, if other has unique elements according to this equality comparer, we can - /// use a faster element-wise check. - /// - /// - /// true if this is a proper superset of other; false if not - public bool IsProperSupersetOf(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // the empty set isn't a proper superset of any set. - if (_count == 0) - { - return false; - } - - // a set is never a strict superset of itself - if (other == this) - { - return false; - } - - ICollection? otherAsCollection = other as ICollection; - if (otherAsCollection != null) - { - // if other is the empty set then this is a superset - if (otherAsCollection.Count == 0) - { - // note that this has at least one element, based on above check - return true; - } - HashSet? otherAsSet = other as HashSet; - // faster if other is a hashset with the same equality comparer - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - if (otherAsSet.Count >= _count) - { - return false; - } - // now perform element check - return ContainsAllElements(otherAsSet); - } - } - // couldn't fall out in the above cases; do it the long way - ElementCount result = CheckUniqueAndUnfoundElements(other, true); - return (result.uniqueCount < _count && result.unfoundCount == 0); - } - - /// - /// Checks if this set overlaps other (i.e. they share at least one item) - /// - /// - /// true if these have at least one common element; false if disjoint - public bool Overlaps(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - if (_count == 0) - { - return false; - } - - // set overlaps itself - if (other == this) - { - return true; - } - - foreach (T element in other) - { - if (Contains(element)) - { - return true; - } - } - return false; - } - - /// - /// Checks if this and other contain the same elements. This is set equality: - /// duplicates and order are ignored - /// - /// - /// - public bool SetEquals(IEnumerable other) - { - if (other == null) - { - throw new ArgumentNullException(nameof(other)); - } - - // a set is equal to itself - if (other == this) - { - return true; - } - - HashSet? otherAsSet = other as HashSet; - // faster if other is a hashset and we're using same equality comparer - if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) - { - // attempt to return early: since both contain unique elements, if they have - // different counts, then they can't be equal - if (_count != otherAsSet.Count) - { - return false; - } - - // already confirmed that the sets have the same number of distinct elements, so if - // one is a superset of the other then they must be equal - return ContainsAllElements(otherAsSet); - } - else - { - ICollection? otherAsCollection = other as ICollection; - if (otherAsCollection != null) - { - // if this count is 0 but other contains at least one element, they can't be equal - if (_count == 0 && otherAsCollection.Count > 0) - { - return false; - } - } - ElementCount result = CheckUniqueAndUnfoundElements(other, true); - return (result.uniqueCount == _count && result.unfoundCount == 0); - } - } - - public void CopyTo(T[] array) - { - CopyTo(array, 0, _count); - } - - public void CopyTo(T[] array, int arrayIndex, int count) - { - if (array == null) - { - throw new ArgumentNullException(nameof(array)); - } - - // check array index valid index into array - if (arrayIndex < 0) - { - throw new ArgumentOutOfRangeException(nameof(arrayIndex), arrayIndex, SR.ArgumentOutOfRange_NeedNonNegNum); - } - - // also throw if count less than 0 - if (count < 0) - { - throw new ArgumentOutOfRangeException(nameof(count), count, SR.ArgumentOutOfRange_NeedNonNegNum); - } - - // will array, starting at arrayIndex, be able to hold elements? Note: not - // checking arrayIndex >= array.Length (consistency with list of allowing - // count of 0; subsequent check takes care of the rest) - if (arrayIndex > array.Length || count > array.Length - arrayIndex) - { - throw new ArgumentException(SR.Arg_ArrayPlusOffTooSmall); - } - - int numCopied = 0; - for (int i = 0; i < _lastIndex && numCopied < count; i++) - { - if (_slots[i].hashCode >= 0) - { - array[arrayIndex + numCopied] = _slots[i].value; - numCopied++; - } - } - } - - /// - /// Remove elements that match specified predicate. Returns the number of elements removed - /// - /// - /// - public int RemoveWhere(Predicate match) - { - if (match == null) - { - throw new ArgumentNullException(nameof(match)); - } - - int numRemoved = 0; - for (int i = 0; i < _lastIndex; i++) - { - if (_slots[i].hashCode >= 0) - { - // cache value in case delegate removes it - T value = _slots[i].value; - if (match(value)) - { - // check again that remove actually removed it - if (Remove(value)) - { - numRemoved++; - } - } - } - } - return numRemoved; - } - - /// - /// Gets the IEqualityComparer that is used to determine equality of keys for - /// the HashSet. - /// - public IEqualityComparer Comparer - { - get - { - return _comparer ?? EqualityComparer.Default; - } - } - - /// - /// Ensures that the hash set can hold up to 'capacity' entries without any further expansion of its backing storage. - /// - public int EnsureCapacity(int capacity) - { - if (capacity < 0) - throw new ArgumentOutOfRangeException(nameof(capacity)); - int currentCapacity = _slots == null ? 0 : _slots.Length; - if (currentCapacity >= capacity) - return currentCapacity; - if (_buckets == null) - return Initialize(capacity); - - int newSize = HashHelpers.GetPrime(capacity); - SetCapacity(newSize); - return newSize; - } - - /// - /// Sets the capacity of this list to the size of the list (rounded up to nearest prime), - /// unless count is 0, in which case we release references. - /// - /// This method can be used to minimize a list's memory overhead once it is known that no - /// new elements will be added to the list. To completely clear a list and release all - /// memory referenced by the list, execute the following statements: - /// - /// list.Clear(); - /// list.TrimExcess(); - /// - public void TrimExcess() - { - Debug.Assert(_count >= 0, "_count is negative"); - - if (_count == 0) - { - // if count is zero, clear references - _buckets = null; - _slots = null!; - _version++; - } - else - { - Debug.Assert(_buckets != null, "_buckets was null but _count > 0"); - - // similar to IncreaseCapacity but moves down elements in case add/remove/etc - // caused fragmentation - int newSize = HashHelpers.GetPrime(_count); - Slot[] newSlots = new Slot[newSize]; - int[] newBuckets = new int[newSize]; - - // move down slots and rehash at the same time. newIndex keeps track of current - // position in newSlots array - int newIndex = 0; - for (int i = 0; i < _lastIndex; i++) - { - if (_slots[i].hashCode >= 0) - { - newSlots[newIndex] = _slots[i]; - - // rehash - int bucket = newSlots[newIndex].hashCode % newSize; - newSlots[newIndex].next = newBuckets[bucket] - 1; - newBuckets[bucket] = newIndex + 1; - - newIndex++; - } - } - - Debug.Assert(newSlots.Length <= _slots.Length, "capacity increased after TrimExcess"); - - _lastIndex = newIndex; - _slots = newSlots; - _buckets = newBuckets; - _freeList = -1; - } - } - - #endregion - - #region Helper methods - - /// - /// Used for deep equality of HashSet testing - /// - /// - public static IEqualityComparer> CreateSetComparer() - { - return new HashSetEqualityComparer(); - } - - /// - /// Initializes buckets and slots arrays. Uses suggested capacity by finding next prime - /// greater than or equal to capacity. - /// - /// - private int Initialize(int capacity) - { - Debug.Assert(_buckets == null, "Initialize was called but _buckets was non-null"); - - int size = HashHelpers.GetPrime(capacity); - - _buckets = new int[size]; - _slots = new Slot[size]; - return size; - } - - /// - /// Expand to new capacity. New capacity is next prime greater than or equal to suggested - /// size. This is called when the underlying array is filled. This performs no - /// defragmentation, allowing faster execution; note that this is reasonable since - /// AddIfNotPresent attempts to insert new elements in re-opened spots. - /// - private void IncreaseCapacity() - { - Debug.Assert(_buckets != null, "IncreaseCapacity called on a set with no elements"); - - int newSize = HashHelpers.ExpandPrime(_count); - if (newSize <= _count) - { - throw new ArgumentException(SR.Arg_HSCapacityOverflow); - } - - // Able to increase capacity; copy elements to larger array and rehash - SetCapacity(newSize); - } - - /// - /// Set the underlying buckets array to size newSize and rehash. Note that newSize - /// *must* be a prime. It is very likely that you want to call IncreaseCapacity() - /// instead of this method. - /// - private void SetCapacity(int newSize) - { - Debug.Assert(HashHelpers.IsPrime(newSize), "New size is not prime!"); - Debug.Assert(_buckets != null, "SetCapacity called on a set with no elements"); - - Slot[] newSlots = new Slot[newSize]; - if (_slots != null) - { - Array.Copy(_slots, newSlots, _lastIndex); - } - - int[] newBuckets = new int[newSize]; - for (int i = 0; i < _lastIndex; i++) - { - int hashCode = newSlots[i].hashCode; - if (hashCode >= 0) - { - int bucket = hashCode % newSize; - newSlots[i].next = newBuckets[bucket] - 1; - newBuckets[bucket] = i + 1; - } - } - _slots = newSlots; - _buckets = newBuckets; - } - - /// - /// Adds value to HashSet if not contained already - /// Returns true if added and false if already present - /// - /// value to find - /// - private bool AddIfNotPresent(T value) - { - if (_buckets == null) - { - Initialize(0); - } - - int hashCode; - int bucket; - int collisionCount = 0; - Slot[] slots = _slots; - - IEqualityComparer? comparer = _comparer; - - if (comparer == null) - { - hashCode = value == null ? 0 : InternalGetHashCode(value.GetHashCode()); - bucket = hashCode % _buckets!.Length; - - if (typeof(T).IsValueType) - { - for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && EqualityComparer.Default.Equals(slots[i].value, value)) - { - return false; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - else - { - // Object type: Shared Generic, EqualityComparer.Default won't devirtualize - // https://github.com/dotnet/runtime/issues/10050 - // So cache in a local rather than get EqualityComparer per loop iteration - EqualityComparer defaultComparer = EqualityComparer.Default; - - for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, value)) - { - return false; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - } - else - { - hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value)); - bucket = hashCode % _buckets!.Length; - - for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, value)) - { - return false; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - - int index; - if (_freeList >= 0) - { - index = _freeList; - _freeList = slots[index].next; - } - else - { - if (_lastIndex == slots.Length) - { - IncreaseCapacity(); - // this will change during resize - slots = _slots; - bucket = hashCode % _buckets.Length; - } - index = _lastIndex; - _lastIndex++; - } - slots[index].hashCode = hashCode; - slots[index].value = value; - slots[index].next = _buckets[bucket] - 1; - _buckets[bucket] = index + 1; - _count++; - _version++; - - return true; - } - - // Add value at known index with known hash code. Used only - // when constructing from another HashSet. - private void AddValue(int index, int hashCode, T value) - { - int bucket = hashCode % _buckets!.Length; - -#if DEBUG - IEqualityComparer comparer = _comparer ?? EqualityComparer.Default; - Debug.Assert(InternalGetHashCode(value, comparer) == hashCode); - for (int i = _buckets[bucket] - 1; i >= 0; i = _slots[i].next) - { - Debug.Assert(!comparer.Equals(_slots[i].value, value)); - } -#endif - - Debug.Assert(_freeList == -1); - _slots[index].hashCode = hashCode; - _slots[index].value = value; - _slots[index].next = _buckets[bucket] - 1; - _buckets[bucket] = index + 1; - } - - /// - /// Checks if this contains of other's elements. Iterates over other's elements and - /// returns false as soon as it finds an element in other that's not in this. - /// Used by SupersetOf, ProperSupersetOf, and SetEquals. - /// - /// - /// - private bool ContainsAllElements(IEnumerable other) - { - foreach (T element in other) - { - if (!Contains(element)) - { - return false; - } - } - return true; - } - - /// - /// Implementation Notes: - /// If other is a hashset and is using same equality comparer, then checking subset is - /// faster. Simply check that each element in this is in other. - /// - /// Note: if other doesn't use same equality comparer, then Contains check is invalid, - /// which is why callers must take are of this. - /// - /// If callers are concerned about whether this is a proper subset, they take care of that. - /// - /// - /// - /// - private bool IsSubsetOfHashSetWithSameEC(HashSet other) - { - foreach (T item in this) - { - if (!other.Contains(item)) - { - return false; - } - } - return true; - } - - /// - /// If other is a hashset that uses same equality comparer, intersect is much faster - /// because we can use other's Contains - /// - /// - private void IntersectWithHashSetWithSameEC(HashSet other) - { - for (int i = 0; i < _lastIndex; i++) - { - if (_slots[i].hashCode >= 0) - { - T item = _slots[i].value; - if (!other.Contains(item)) - { - Remove(item); - } - } - } - } - - /// - /// Iterate over other. If contained in this, mark an element in bit array corresponding to - /// its position in _slots. If anything is unmarked (in bit array), remove it. - /// - /// This attempts to allocate on the stack, if below StackAllocThreshold. - /// - /// - private unsafe void IntersectWithEnumerable(IEnumerable other) - { - Debug.Assert(_buckets != null, "_buckets shouldn't be null; callers should check first"); - - // keep track of current last index; don't want to move past the end of our bit array - // (could happen if another thread is modifying the collection) - int originalLastIndex = _lastIndex; - int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex); - - Span span = stackalloc int[StackAllocThreshold]; - BitHelper bitHelper = intArrayLength <= StackAllocThreshold ? - new BitHelper(span.Slice(0, intArrayLength), clear: true) : - new BitHelper(new int[intArrayLength], clear: false); - - // mark if contains: find index of in slots array and mark corresponding element in bit array - foreach (T item in other) - { - int index = InternalIndexOf(item); - if (index >= 0) - { - bitHelper.MarkBit(index); - } - } - - // if anything unmarked, remove it. Perf can be optimized here if BitHelper had a - // FindFirstUnmarked method. - for (int i = 0; i < originalLastIndex; i++) - { - if (_slots[i].hashCode >= 0 && !bitHelper.IsMarked(i)) - { - Remove(_slots[i].value); - } - } - } - - /// - /// Used internally by set operations which have to rely on bit array marking. This is like - /// Contains but returns index in slots array. - /// - /// - /// - private int InternalIndexOf(T item) - { - Debug.Assert(_buckets != null, "_buckets was null; callers should check first"); - - int[]? buckets = _buckets; - int collisionCount = 0; - Slot[] slots = _slots; - IEqualityComparer? comparer = _comparer; - - if (comparer == null) - { - int hashCode = item == null ? 0 : InternalGetHashCode(item.GetHashCode()); - - if (typeof(T).IsValueType) - { - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && EqualityComparer.Default.Equals(slots[i].value, item)) - { - return i; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - else - { - // Object type: Shared Generic, EqualityComparer.Default won't devirtualize - // https://github.com/dotnet/runtime/issues/10050 - // So cache in a local rather than get EqualityComparer per loop iteration - EqualityComparer defaultComparer = EqualityComparer.Default; - - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, item)) - { - return i; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - } - else - { - int hashCode = item == null ? 0 : InternalGetHashCode(comparer.GetHashCode(item)); - - // see note at "HashSet" level describing why "- 1" appears in for loop - for (int i = buckets[hashCode % buckets.Length] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, item)) - { - return i; - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - } - // wasn't found - return -1; - } - - /// - /// if other is a set, we can assume it doesn't have duplicate elements, so use this - /// technique: if can't remove, then it wasn't present in this set, so add. - /// - /// As with other methods, callers take care of ensuring that other is a hashset using the - /// same equality comparer. - /// - /// - private void SymmetricExceptWithUniqueHashSet(HashSet other) - { - foreach (T item in other) - { - if (!Remove(item)) - { - AddIfNotPresent(item); - } - } - } - - /// - /// Implementation notes: - /// - /// Used for symmetric except when other isn't a HashSet. This is more tedious because - /// other may contain duplicates. HashSet technique could fail in these situations: - /// 1. Other has a duplicate that's not in this: HashSet technique would add then - /// remove it. - /// 2. Other has a duplicate that's in this: HashSet technique would remove then add it - /// back. - /// In general, its presence would be toggled each time it appears in other. - /// - /// This technique uses bit marking to indicate whether to add/remove the item. If already - /// present in collection, it will get marked for deletion. If added from other, it will - /// get marked as something not to remove. - /// - /// - /// - private unsafe void SymmetricExceptWithEnumerable(IEnumerable other) - { - int originalLastIndex = _lastIndex; - int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex); - - Span itemsToRemoveSpan = stackalloc int[StackAllocThreshold / 2]; - BitHelper itemsToRemove = intArrayLength <= StackAllocThreshold / 2 ? - new BitHelper(itemsToRemoveSpan.Slice(0, intArrayLength), clear: true) : - new BitHelper(new int[intArrayLength], clear: false); - - Span itemsAddedFromOtherSpan = stackalloc int[StackAllocThreshold / 2]; - BitHelper itemsAddedFromOther = intArrayLength <= StackAllocThreshold / 2 ? - new BitHelper(itemsAddedFromOtherSpan.Slice(0, intArrayLength), clear: true) : - new BitHelper(new int[intArrayLength], clear: false); - - foreach (T item in other) - { - int location = 0; - bool added = AddOrGetLocation(item, out location); - if (added) - { - // wasn't already present in collection; flag it as something not to remove - // *NOTE* if location is out of range, we should ignore. BitHelper will - // detect that it's out of bounds and not try to mark it. But it's - // expected that location could be out of bounds because adding the item - // will increase _lastIndex as soon as all the free spots are filled. - itemsAddedFromOther.MarkBit(location); - } - else - { - // already there...if not added from other, mark for remove. - // *NOTE* Even though BitHelper will check that location is in range, we want - // to check here. There's no point in checking items beyond originalLastIndex - // because they could not have been in the original collection - if (location < originalLastIndex && !itemsAddedFromOther.IsMarked(location)) - { - itemsToRemove.MarkBit(location); - } - } - } - - // if anything marked, remove it - for (int i = 0; i < originalLastIndex; i++) - { - if (itemsToRemove.IsMarked(i)) - { - Remove(_slots[i].value); - } - } - } - - /// - /// Add if not already in hashset. Returns an out param indicating index where added. This - /// is used by SymmetricExcept because it needs to know the following things: - /// - whether the item was already present in the collection or added from other - /// - where it's located (if already present, it will get marked for removal, otherwise - /// marked for keeping) - /// - /// - /// - /// - private bool AddOrGetLocation(T value, out int location) - { - Debug.Assert(_buckets != null, "_buckets is null, callers should have checked"); - - IEqualityComparer? comparer = _comparer; - int hashCode = InternalGetHashCode(value, comparer); - int bucket = hashCode % _buckets.Length; - int collisionCount = 0; - Slot[] slots = _slots; - for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) - { - if (slots[i].hashCode == hashCode && (comparer?.Equals(slots[i].value, value) ?? EqualityComparer.Default.Equals(slots[i].value, value))) - { - location = i; - return false; //already present - } - - if (collisionCount >= slots.Length) - { - // The chain of entries forms a loop, which means a concurrent update has happened. - throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); - } - collisionCount++; - } - int index; - if (_freeList >= 0) - { - index = _freeList; - _freeList = slots[index].next; - } - else - { - if (_lastIndex == slots.Length) - { - IncreaseCapacity(); - // this will change during resize - slots = _slots; - bucket = hashCode % _buckets.Length; - } - index = _lastIndex; - _lastIndex++; - } - slots[index].hashCode = hashCode; - slots[index].value = value; - slots[index].next = _buckets[bucket] - 1; - _buckets[bucket] = index + 1; - _count++; - _version++; - location = index; - return true; - } - - /// - /// Determines counts that can be used to determine equality, subset, and superset. This - /// is only used when other is an IEnumerable and not a HashSet. If other is a HashSet - /// these properties can be checked faster without use of marking because we can assume - /// other has no duplicates. - /// - /// The following count checks are performed by callers: - /// 1. Equals: checks if unfoundCount = 0 and uniqueFoundCount = _count; i.e. everything - /// in other is in this and everything in this is in other - /// 2. Subset: checks if unfoundCount >= 0 and uniqueFoundCount = _count; i.e. other may - /// have elements not in this and everything in this is in other - /// 3. Proper subset: checks if unfoundCount > 0 and uniqueFoundCount = _count; i.e - /// other must have at least one element not in this and everything in this is in other - /// 4. Proper superset: checks if unfound count = 0 and uniqueFoundCount strictly less - /// than _count; i.e. everything in other was in this and this had at least one element - /// not contained in other. - /// - /// An earlier implementation used delegates to perform these checks rather than returning - /// an ElementCount struct; however this was changed due to the perf overhead of delegates. - /// - /// - /// Allows us to finish faster for equals and proper superset - /// because unfoundCount must be 0. - /// - private unsafe ElementCount CheckUniqueAndUnfoundElements(IEnumerable other, bool returnIfUnfound) - { - ElementCount result; - - // need special case in case this has no elements. - if (_count == 0) - { - int numElementsInOther = 0; - foreach (T item in other) - { - numElementsInOther++; - // break right away, all we want to know is whether other has 0 or 1 elements - break; - } - result.uniqueCount = 0; - result.unfoundCount = numElementsInOther; - return result; - } - - Debug.Assert((_buckets != null) && (_count > 0), "_buckets was null but count greater than 0"); - - int originalLastIndex = _lastIndex; - int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex); - - Span span = stackalloc int[StackAllocThreshold]; - BitHelper bitHelper = intArrayLength <= StackAllocThreshold ? - new BitHelper(span.Slice(0, intArrayLength), clear: true) : - new BitHelper(new int[intArrayLength], clear: false); - - // count of items in other not found in this - int unfoundCount = 0; - // count of unique items in other found in this - int uniqueFoundCount = 0; - - foreach (T item in other) - { - int index = InternalIndexOf(item); - if (index >= 0) - { - if (!bitHelper.IsMarked(index)) - { - // item hasn't been seen yet - bitHelper.MarkBit(index); - uniqueFoundCount++; - } - } - else - { - unfoundCount++; - if (returnIfUnfound) - { - break; - } - } - } - - result.uniqueCount = uniqueFoundCount; - result.unfoundCount = unfoundCount; - return result; - } - - - /// - /// Internal method used for HashSetEqualityComparer. Compares set1 and set2 according - /// to specified comparer. - /// - /// Because items are hashed according to a specific equality comparer, we have to resort - /// to n^2 search if they're using different equality comparers. - /// - /// - /// - /// - /// - internal static bool HashSetEquals(HashSet? set1, HashSet? set2, IEqualityComparer comparer) - { - // handle null cases first - if (set1 == null) - { - return (set2 == null); - } - else if (set2 == null) - { - // set1 != null - return false; - } - - // all comparers are the same; this is faster - if (AreEqualityComparersEqual(set1, set2)) - { - if (set1.Count != set2.Count) - { - return false; - } - // suffices to check subset - foreach (T item in set2) - { - if (!set1.Contains(item)) - { - return false; - } - } - return true; - } - else - { // n^2 search because items are hashed according to their respective ECs - foreach (T set2Item in set2) - { - bool found = false; - foreach (T set1Item in set1) - { - if (comparer.Equals(set2Item, set1Item)) - { - found = true; - break; - } - } - if (!found) - { - return false; - } - } - return true; - } - } - - /// - /// Checks if equality comparers are equal. This is used for algorithms that can - /// speed up if it knows the other item has unique elements. I.e. if they're using - /// different equality comparers, then uniqueness assumption between sets break. - /// - /// - /// - /// - private static bool AreEqualityComparersEqual(HashSet set1, HashSet set2) - { - return set1.Comparer.Equals(set2.Comparer); - } - - /// - /// Workaround Comparers that throw ArgumentNullException for GetHashCode(null). - /// - /// - /// - /// hash code - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int InternalGetHashCode(T item, IEqualityComparer? comparer) - { - if (item == null) - { - return 0; - } - - int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode(); - return hashCode & Lower31BitMask; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private int InternalGetHashCode(int hashCode) - { - return hashCode & Lower31BitMask; - } - - #endregion - - // used for set checking operations (using enumerables) that rely on counting - internal struct ElementCount - { - internal int uniqueCount; - internal int unfoundCount; - } - - internal struct Slot - { - internal int hashCode; // Lower 31 bits of hash code, -1 if unused - internal int next; // Index of next entry, -1 if last - internal T value; - } - - public struct Enumerator : IEnumerator, IEnumerator - { - private readonly HashSet _set; - private int _index; - private readonly int _version; - [AllowNull] private T _current; - - internal Enumerator(HashSet set) - { - _set = set; - _index = 0; - _version = set._version; - _current = default; - } - - public void Dispose() - { - } - - public bool MoveNext() - { - if (_version != _set._version) - { - throw new InvalidOperationException(SR.InvalidOperation_EnumFailedVersion); - } - - while (_index < _set._lastIndex) - { - if (_set._slots[_index].hashCode >= 0) - { - _current = _set._slots[_index].value; - _index++; - return true; - } - _index++; - } - _index = _set._lastIndex + 1; - _current = default; - return false; - } - - public T Current - { - get - { - return _current; - } - } - - object? IEnumerator.Current - { - get - { - if (_index == 0 || _index == _set._lastIndex + 1) - { - throw new InvalidOperationException(SR.InvalidOperation_EnumOpCantHappen); - } - return Current; - } - } - - void IEnumerator.Reset() - { - if (_version != _set._version) - { - throw new InvalidOperationException(SR.InvalidOperation_EnumFailedVersion); - } - - _index = 0; - _current = default; - } - } - } -} diff --git a/src/libraries/System.Collections/src/System/Collections/Generic/HashSetEqualityComparer.cs b/src/libraries/System.Collections/src/System/Collections/Generic/HashSetEqualityComparer.cs deleted file mode 100644 index 8415e53..0000000 --- a/src/libraries/System.Collections/src/System/Collections/Generic/HashSetEqualityComparer.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace System.Collections.Generic -{ - /// - /// Equality comparer for hashsets of hashsets - /// - /// - internal sealed class HashSetEqualityComparer : IEqualityComparer?> - { - private readonly IEqualityComparer _comparer; - - public HashSetEqualityComparer() - { - _comparer = EqualityComparer.Default; - } - - // using m_comparer to keep equals properties in tact; don't want to choose one of the comparers - public bool Equals(HashSet? x, HashSet? y) - { - return HashSet.HashSetEquals(x, y, _comparer); - } - - public int GetHashCode(HashSet? obj) - { - int hashCode = 0; - if (obj != null) - { - foreach (T t in obj) - { - if (t != null) - { - hashCode ^= (_comparer.GetHashCode(t) & 0x7FFFFFFF); - } - } - } // else returns hashcode of 0 for null hashsets - return hashCode; - } - - // Equals method for the comparer itself. - public override bool Equals(object? obj) - { - HashSetEqualityComparer? comparer = obj as HashSetEqualityComparer; - if (comparer == null) - { - return false; - } - return (_comparer == comparer._comparer); - } - - public override int GetHashCode() - { - return _comparer.GetHashCode(); - } - } -} diff --git a/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.AsNonGenericIEnumerable.cs b/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.AsNonGenericIEnumerable.cs index e2d736c..0cda6ff 100644 --- a/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.AsNonGenericIEnumerable.cs +++ b/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.AsNonGenericIEnumerable.cs @@ -19,6 +19,10 @@ namespace System.Collections.Tests protected override bool Enumerator_Current_UndefinedOperation_Throws => true; + protected override ModifyOperation ModifyEnumeratorThrows => PlatformDetection.IsNetFramework ? base.ModifyEnumeratorThrows : (base.ModifyEnumeratorAllowed & ~ModifyOperation.Remove); + + protected override ModifyOperation ModifyEnumeratorAllowed => PlatformDetection.IsNetFramework ? base.ModifyEnumeratorAllowed : ModifyOperation.Overwrite | ModifyOperation.Remove; + /// /// Returns a set of ModifyEnumerable delegates that modify the enumerable passed to them. /// diff --git a/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.cs b/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.cs index ef52730..eb9a920c 100644 --- a/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.cs +++ b/src/libraries/System.Collections/tests/Generic/HashSet/HashSet.Generic.Tests.cs @@ -3,7 +3,9 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; +using System.IO; using System.Linq; +using System.Runtime.Serialization.Formatters.Binary; using Xunit; namespace System.Collections.Tests @@ -17,6 +19,10 @@ namespace System.Collections.Tests protected override bool ResetImplemented => true; + protected override ModifyOperation ModifyEnumeratorThrows => PlatformDetection.IsNetFramework ? base.ModifyEnumeratorThrows : (base.ModifyEnumeratorAllowed & ~(ModifyOperation.Remove | ModifyOperation.Clear)); + + protected override ModifyOperation ModifyEnumeratorAllowed => PlatformDetection.IsNetFramework ? base.ModifyEnumeratorAllowed : ModifyOperation.Overwrite | ModifyOperation.Remove | ModifyOperation.Clear; + protected override ISet GenericISetFactory() { return new HashSet(); @@ -650,5 +656,57 @@ namespace System.Collections.Tests } #endregion + + #region Serialization + + [Fact] + public void ComparerSerialization() + { + // Strings switch between randomized and non-randomized comparers, + // however this should never be observable externally. + TestComparerSerialization(EqualityComparer.Default); + + // OrdinalCaseSensitiveComparer is internal and (de)serializes as OrdinalComparer + TestComparerSerialization(StringComparer.Ordinal, "System.OrdinalComparer"); + + // OrdinalIgnoreCaseComparer is internal and (de)serializes as OrdinalComparer + TestComparerSerialization(StringComparer.OrdinalIgnoreCase, "System.OrdinalComparer"); + TestComparerSerialization(StringComparer.CurrentCulture); + TestComparerSerialization(StringComparer.CurrentCultureIgnoreCase); + TestComparerSerialization(StringComparer.InvariantCulture); + TestComparerSerialization(StringComparer.InvariantCultureIgnoreCase); + + // Check other types while here, IEquatable valuetype, nullable valuetype, and non IEquatable object + TestComparerSerialization(EqualityComparer.Default); + TestComparerSerialization(EqualityComparer.Default); + TestComparerSerialization(EqualityComparer.Default); + + static void TestComparerSerialization(IEqualityComparer equalityComparer, string internalTypeName = null) + { + var bf = new BinaryFormatter(); + var s = new MemoryStream(); + + var dict = new HashSet(equalityComparer); + + Assert.Same(equalityComparer, dict.Comparer); + + bf.Serialize(s, dict); + s.Position = 0; + dict = (HashSet)bf.Deserialize(s); + + if (internalTypeName == null) + { + Assert.IsType(equalityComparer.GetType(), dict.Comparer); + } + else + { + Assert.Equal(internalTypeName, dict.Comparer.GetType().ToString()); + } + + Assert.True(equalityComparer.Equals(dict.Comparer)); + } + } + + #endregion } } diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 6576b093..92fb367 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -134,6 +134,8 @@ + + @@ -145,6 +147,7 @@ + @@ -1062,6 +1065,9 @@ Common\System\Collections\Generic\ReferenceEqualityComparer.cs + + Common\System\Collections\Generic\BitHelper.cs + Common\System\Diagnostics\CodeAnalysis\ExcludeFromCodeCoverageAttribute.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/Dictionary.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/Dictionary.cs index 84ea4bf..bddfc67 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/Dictionary.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/Dictionary.cs @@ -11,43 +11,17 @@ using Internal.Runtime.CompilerServices; namespace System.Collections.Generic { - /// - /// Used internally to control behavior of insertion into a . - /// - internal enum InsertionBehavior : byte - { - /// - /// The default insertion behavior. - /// - None = 0, - - /// - /// Specifies that an existing entry with the same key should be overwritten if encountered. - /// - OverwriteExisting = 1, - - /// - /// Specifies that if an existing entry with the same key is encountered, an exception should be thrown. - /// - ThrowOnExisting = 2 - } - [DebuggerTypeProxy(typeof(IDictionaryDebugView<,>))] [DebuggerDisplay("Count = {Count}")] [Serializable] [TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")] public class Dictionary : IDictionary, IDictionary, IReadOnlyDictionary, ISerializable, IDeserializationCallback where TKey : notnull { - private struct Entry - { - public uint hashCode; - // 0-based index of next entry in chain: -1 means end of chain - // also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3, - // so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc. - public int next; - public TKey key; // Key of entry - public TValue value; // Value of entry - } + // constants for serialization + private const string VersionName = "Version"; // Do not rename (binary serialization) + private const string HashSizeName = "HashSize"; // Do not rename (binary serialization). Must save buckets.Length + private const string KeyValuePairsName = "KeyValuePairs"; // Do not rename (binary serialization) + private const string ComparerName = "Comparer"; // Do not rename (binary serialization) private int[]? _buckets; private Entry[]? _entries; @@ -63,12 +37,6 @@ namespace System.Collections.Generic private ValueCollection? _values; private const int StartOfFreeList = -3; - // constants for serialization - private const string VersionName = "Version"; // Do not rename (binary serialization) - private const string HashSizeName = "HashSize"; // Do not rename (binary serialization). Must save buckets.Length - private const string KeyValuePairsName = "KeyValuePairs"; // Do not rename (binary serialization) - private const string ComparerName = "Comparer"; // Do not rename (binary serialization) - public Dictionary() : this(0, null) { } public Dictionary(int capacity) : this(capacity, null) { } @@ -77,8 +45,16 @@ namespace System.Collections.Generic public Dictionary(int capacity, IEqualityComparer? comparer) { - if (capacity < 0) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); - if (capacity > 0) Initialize(capacity); + if (capacity < 0) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); + } + + if (capacity > 0) + { + Initialize(capacity); + } + if (comparer != null && comparer != EqualityComparer.Default) // first check for null to avoid forcing default comparer instantiation unnecessarily { _comparer = comparer; @@ -159,15 +135,15 @@ namespace System.Collections.Generic public KeyCollection Keys => _keys ??= new KeyCollection(this); - ICollection IDictionary.Keys => _keys ??= new KeyCollection(this); + ICollection IDictionary.Keys => Keys; - IEnumerable IReadOnlyDictionary.Keys => _keys ??= new KeyCollection(this); + IEnumerable IReadOnlyDictionary.Keys => Keys; public ValueCollection Values => _values ??= new ValueCollection(this); - ICollection IDictionary.Values => _values ??= new ValueCollection(this); + ICollection IDictionary.Values => Values; - IEnumerable IReadOnlyDictionary.Values => _values ??= new ValueCollection(this); + IEnumerable IReadOnlyDictionary.Values => Values; public TValue this[TKey key] { @@ -178,6 +154,7 @@ namespace System.Collections.Generic { return value; } + ThrowHelper.ThrowKeyNotFoundException(key); return default; } @@ -194,8 +171,8 @@ namespace System.Collections.Generic Debug.Assert(modified); // If there was an existing key and the Add failed, an exception will already have been thrown. } - void ICollection>.Add(KeyValuePair keyValuePair) - => Add(keyValuePair.Key, keyValuePair.Value); + void ICollection>.Add(KeyValuePair keyValuePair) => + Add(keyValuePair.Key, keyValuePair.Value); bool ICollection>.Contains(KeyValuePair keyValuePair) { @@ -204,6 +181,7 @@ namespace System.Collections.Generic { return true; } + return false; } @@ -215,6 +193,7 @@ namespace System.Collections.Generic Remove(keyValuePair.Key); return true; } + return false; } @@ -235,8 +214,8 @@ namespace System.Collections.Generic } } - public bool ContainsKey(TKey key) - => !Unsafe.IsNullRef(ref FindValue(key)); + public bool ContainsKey(TKey key) => + !Unsafe.IsNullRef(ref FindValue(key)); public bool ContainsValue(TValue value) { @@ -245,31 +224,38 @@ namespace System.Collections.Generic { for (int i = 0; i < _count; i++) { - if (entries![i].next >= -1 && entries[i].value == null) return true; + if (entries![i].next >= -1 && entries[i].value == null) + { + return true; + } } } - else + else if (typeof(TValue).IsValueType) { - if (typeof(TValue).IsValueType) + // ValueType: Devirtualize with EqualityComparer.Default intrinsic + for (int i = 0; i < _count; i++) { - // ValueType: Devirtualize with EqualityComparer.Default intrinsic - for (int i = 0; i < _count; i++) + if (entries![i].next >= -1 && EqualityComparer.Default.Equals(entries[i].value, value)) { - if (entries![i].next >= -1 && EqualityComparer.Default.Equals(entries[i].value, value)) return true; + return true; } } - else + } + else + { + // Object type: Shared Generic, EqualityComparer.Default won't devirtualize + // https://github.com/dotnet/runtime/issues/10050 + // So cache in a local rather than get EqualityComparer per loop iteration + EqualityComparer defaultComparer = EqualityComparer.Default; + for (int i = 0; i < _count; i++) { - // Object type: Shared Generic, EqualityComparer.Default won't devirtualize - // https://github.com/dotnet/runtime/issues/10050 - // So cache in a local rather than get EqualityComparer per loop iteration - EqualityComparer defaultComparer = EqualityComparer.Default; - for (int i = 0; i < _count; i++) + if (entries![i].next >= -1 && defaultComparer.Equals(entries[i].value, value)) { - if (entries![i].next >= -1 && defaultComparer.Equals(entries[i].value, value)) return true; + return true; } } } + return false; } @@ -301,11 +287,10 @@ namespace System.Collections.Generic } } - public Enumerator GetEnumerator() - => new Enumerator(this, Enumerator.KeyValuePair); + public Enumerator GetEnumerator() => new Enumerator(this, Enumerator.KeyValuePair); - IEnumerator> IEnumerable>.GetEnumerator() - => new Enumerator(this, Enumerator.KeyValuePair); + IEnumerator> IEnumerable>.GetEnumerator() => + new Enumerator(this, Enumerator.KeyValuePair); public virtual void GetObjectData(SerializationInfo info, StreamingContext context) { @@ -348,8 +333,7 @@ namespace System.Collections.Generic { // ValueType: Devirtualize with EqualityComparer.Default intrinsic - // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. - i--; + i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 @@ -369,6 +353,7 @@ namespace System.Collections.Generic collisionCount++; } while (collisionCount <= (uint)entries.Length); + // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; @@ -380,8 +365,7 @@ namespace System.Collections.Generic // So cache in a local rather than get EqualityComparer per loop iteration EqualityComparer defaultComparer = EqualityComparer.Default; - // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. - i--; + i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 @@ -401,6 +385,7 @@ namespace System.Collections.Generic collisionCount++; } while (collisionCount <= (uint)entries.Length); + // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; @@ -412,8 +397,7 @@ namespace System.Collections.Generic int i = GetBucket(hashCode); Entry[]? entries = _entries; uint collisionCount = 0; - // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. - i--; + i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 @@ -433,6 +417,7 @@ namespace System.Collections.Generic collisionCount++; } while (collisionCount <= (uint)entries.Length); + // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; @@ -490,8 +475,7 @@ namespace System.Collections.Generic uint collisionCount = 0; ref int bucket = ref GetBucket(hashCode); - // Value in _buckets is 1-based - int i = bucket - 1; + int i = bucket - 1; // Value in _buckets is 1-based if (comparer == null) { @@ -616,12 +600,12 @@ namespace System.Collections.Generic } } - bool updateFreeList = false; int index; if (_freeCount > 0) { index = _freeList; - updateFreeList = true; + Debug.Assert((StartOfFreeList - entries[_freeList].next) >= -1, "shouldn't overflow because `next` cannot underflow"); + _freeList = StartOfFreeList - entries[_freeList].next; _freeCount--; } else @@ -638,19 +622,10 @@ namespace System.Collections.Generic } ref Entry entry = ref entries![index]; - - if (updateFreeList) - { - Debug.Assert((StartOfFreeList - entries[_freeList].next) >= -1, "shouldn't overflow because `next` cannot underflow"); - - _freeList = StartOfFreeList - entries[_freeList].next; - } entry.hashCode = hashCode; - // Value in _buckets is 1-based - entry.next = bucket - 1; + entry.next = bucket - 1; // Value in _buckets is 1-based entry.key = key; - entry.value = value; - // Value in _buckets is 1-based + entry.value = value; // Value in _buckets is 1-based bucket = index + 1; _version++; @@ -699,6 +674,7 @@ namespace System.Collections.Generic { ThrowHelper.ThrowSerializationException(ExceptionResource.Serialization_NullKey); } + Add(array[i].Key, array[i].Value); } } @@ -711,8 +687,7 @@ namespace System.Collections.Generic HashHelpers.SerializationInfoTable.Remove(this); } - private void Resize() - => Resize(HashHelpers.ExpandPrime(_count), false); + private void Resize() => Resize(HashHelpers.ExpandPrime(_count), false); private void Resize(int newSize, bool forceNewHashCodes) { @@ -748,9 +723,7 @@ namespace System.Collections.Generic if (entries[i].next >= -1) { ref int bucket = ref GetBucket(entries[i].hashCode); - // Value in _buckets is 1-based - entries[i].next = bucket - 1; - // Value in _buckets is 1-based + entries[i].next = bucket - 1; // Value in _buckets is 1-based bucket = i + 1; } } @@ -758,11 +731,12 @@ namespace System.Collections.Generic _entries = entries; } - // The overload Remove(TKey key, out TValue value) is a copy of this method with one additional - // statement to copy the value for entry being removed into the output parameter. - // Code has been intentionally duplicated for performance reasons. public bool Remove(TKey key) { + // The overload Remove(TKey key, out TValue value) is a copy of this method with one additional + // statement to copy the value for entry being removed into the output parameter. + // Code has been intentionally duplicated for performance reasons. + if (key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); @@ -776,8 +750,7 @@ namespace System.Collections.Generic ref int bucket = ref GetBucket(hashCode); Entry[]? entries = _entries; int last = -1; - // Value in buckets is 1-based - int i = bucket - 1; + int i = bucket - 1; // Value in buckets is 1-based while (i >= 0) { ref Entry entry = ref entries[i]; @@ -786,8 +759,7 @@ namespace System.Collections.Generic { if (last < 0) { - // Value in buckets is 1-based - bucket = entry.next + 1; + bucket = entry.next + 1; // Value in buckets is 1-based } else { @@ -795,17 +767,18 @@ namespace System.Collections.Generic } Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); - entry.next = StartOfFreeList - _freeList; if (RuntimeHelpers.IsReferenceOrContainsReferences()) { entry.key = default!; } + if (RuntimeHelpers.IsReferenceOrContainsReferences()) { entry.value = default!; } + _freeList = i; _freeCount++; return true; @@ -826,11 +799,12 @@ namespace System.Collections.Generic return false; } - // This overload is a copy of the overload Remove(TKey key) with one additional - // statement to copy the value for entry being removed into the output parameter. - // Code has been intentionally duplicated for performance reasons. public bool Remove(TKey key, [MaybeNullWhen(false)] out TValue value) { + // This overload is a copy of the overload Remove(TKey key) with one additional + // statement to copy the value for entry being removed into the output parameter. + // Code has been intentionally duplicated for performance reasons. + if (key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); @@ -844,8 +818,7 @@ namespace System.Collections.Generic ref int bucket = ref GetBucket(hashCode); Entry[]? entries = _entries; int last = -1; - // Value in buckets is 1-based - int i = bucket - 1; + int i = bucket - 1; // Value in buckets is 1-based while (i >= 0) { ref Entry entry = ref entries[i]; @@ -854,8 +827,7 @@ namespace System.Collections.Generic { if (last < 0) { - // Value in buckets is 1-based - bucket = entry.next + 1; + bucket = entry.next + 1; // Value in buckets is 1-based } else { @@ -865,17 +837,18 @@ namespace System.Collections.Generic value = entry.value; Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); - entry.next = StartOfFreeList - _freeList; if (RuntimeHelpers.IsReferenceOrContainsReferences()) { entry.key = default!; } + if (RuntimeHelpers.IsReferenceOrContainsReferences()) { entry.value = default!; } + _freeList = i; _freeCount++; return true; @@ -893,7 +866,8 @@ namespace System.Collections.Generic } } } - value = default!; + + value = default; return false; } @@ -905,30 +879,45 @@ namespace System.Collections.Generic value = valRef; return true; } - value = default!; + + value = default; return false; } - public bool TryAdd(TKey key, TValue value) - => TryInsert(key, value, InsertionBehavior.None); + public bool TryAdd(TKey key, TValue value) => + TryInsert(key, value, InsertionBehavior.None); bool ICollection>.IsReadOnly => false; - void ICollection>.CopyTo(KeyValuePair[] array, int index) - => CopyTo(array, index); + void ICollection>.CopyTo(KeyValuePair[] array, int index) => + CopyTo(array, index); void ICollection.CopyTo(Array array, int index) { if (array == null) + { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } + if (array.Rank != 1) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_RankMultiDimNotSupported); + } + if (array.GetLowerBound(0) != 0) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_NonZeroLowerBound); + } + if ((uint)index > (uint)array.Length) + { ThrowHelper.ThrowIndexArgumentOutOfRange_NeedNonNegNumException(); + } + if (array.Length - index < Count) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall); + } if (array is KeyValuePair[] pairs) { @@ -972,8 +961,7 @@ namespace System.Collections.Generic } } - IEnumerator IEnumerable.GetEnumerator() - => new Enumerator(this, Enumerator.KeyValuePair); + IEnumerator IEnumerable.GetEnumerator() => new Enumerator(this, Enumerator.KeyValuePair); /// /// Ensures that the dictionary can hold up to 'capacity' entries without any further expansion of its backing storage @@ -981,13 +969,22 @@ namespace System.Collections.Generic public int EnsureCapacity(int capacity) { if (capacity < 0) + { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); + } + int currentCapacity = _entries == null ? 0 : _entries.Length; if (currentCapacity >= capacity) + { return currentCapacity; + } + _version++; + if (_buckets == null) + { return Initialize(capacity); + } int newSize = HashHelpers.GetPrime(capacity); Resize(newSize, forceNewHashCodes: false); @@ -996,7 +993,8 @@ namespace System.Collections.Generic /// /// Sets the capacity of this dictionary to what it would be if it had been originally initialized with all its entries - /// + /// + /// /// This method can be used to minimize the memory overhead /// once it is known that no new elements will be added. /// @@ -1004,26 +1002,30 @@ namespace System.Collections.Generic /// /// dictionary.Clear(); /// dictionary.TrimExcess(); - /// - public void TrimExcess() - => TrimExcess(Count); + /// + public void TrimExcess() => TrimExcess(Count); /// /// Sets the capacity of this dictionary to hold up 'capacity' entries without any further expansion of its backing storage - /// + /// + /// /// This method can be used to minimize the memory overhead /// once it is known that no new elements will be added. - /// + /// public void TrimExcess(int capacity) { if (capacity < Count) + { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); + } int newSize = HashHelpers.GetPrime(capacity); Entry[]? oldEntries = _entries; int currentCapacity = oldEntries == null ? 0 : oldEntries.Length; if (newSize >= currentCapacity) + { return; + } int oldCount = _count; _version++; @@ -1038,13 +1040,12 @@ namespace System.Collections.Generic ref Entry entry = ref entries![count]; entry = oldEntries[i]; ref int bucket = ref GetBucket(hashCode); - // Value in _buckets is 1-based - entry.next = bucket - 1; - // Value in _buckets is 1-based + entry.next = bucket - 1; // Value in _buckets is 1-based bucket = count + 1; count++; } } + _count = count; _freeCount = 0; } @@ -1057,9 +1058,9 @@ namespace System.Collections.Generic bool IDictionary.IsReadOnly => false; - ICollection IDictionary.Keys => (ICollection)Keys; + ICollection IDictionary.Keys => Keys; - ICollection IDictionary.Values => (ICollection)Values; + ICollection IDictionary.Values => Values; object? IDictionary.this[object key] { @@ -1073,6 +1074,7 @@ namespace System.Collections.Generic return value; } } + return null; } set @@ -1148,8 +1150,7 @@ namespace System.Collections.Generic return false; } - IDictionaryEnumerator IDictionary.GetEnumerator() - => new Enumerator(this, Enumerator.DictEntry); + IDictionaryEnumerator IDictionary.GetEnumerator() => new Enumerator(this, Enumerator.DictEntry); void IDictionary.Remove(object key) { @@ -1170,8 +1171,20 @@ namespace System.Collections.Generic #endif } - public struct Enumerator : IEnumerator>, - IDictionaryEnumerator + private struct Entry + { + public uint hashCode; + /// + /// 0-based index of next entry in chain: -1 means end of chain + /// also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3, + /// so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc. + /// + public int next; + public TKey key; // Key of entry + public TValue value; // Value of entry + } + + public struct Enumerator : IEnumerator>, IDictionaryEnumerator { private readonly Dictionary _dictionary; private readonly int _version; @@ -1218,9 +1231,7 @@ namespace System.Collections.Generic public KeyValuePair Current => _current; - public void Dispose() - { - } + public void Dispose() { } object? IEnumerator.Current { @@ -1235,10 +1246,8 @@ namespace System.Collections.Generic { return new DictionaryEntry(_current.Key, _current.Value); } - else - { - return new KeyValuePair(_current.Key, _current.Value); - } + + return new KeyValuePair(_current.Key, _current.Value); } } @@ -1305,11 +1314,11 @@ namespace System.Collections.Generic { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.dictionary); } + _dictionary = dictionary; } - public Enumerator GetEnumerator() - => new Enumerator(_dictionary); + public Enumerator GetEnumerator() => new Enumerator(_dictionary); public void CopyTo(TKey[] array, int index) { @@ -1340,14 +1349,14 @@ namespace System.Collections.Generic bool ICollection.IsReadOnly => true; - void ICollection.Add(TKey item) - => ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_KeyCollectionSet); + void ICollection.Add(TKey item) => + ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_KeyCollectionSet); - void ICollection.Clear() - => ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_KeyCollectionSet); + void ICollection.Clear() => + ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_KeyCollectionSet); - bool ICollection.Contains(TKey item) - => _dictionary.ContainsKey(item); + bool ICollection.Contains(TKey item) => + _dictionary.ContainsKey(item); bool ICollection.Remove(TKey item) { @@ -1355,24 +1364,36 @@ namespace System.Collections.Generic return false; } - IEnumerator IEnumerable.GetEnumerator() - => new Enumerator(_dictionary); + IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_dictionary); - IEnumerator IEnumerable.GetEnumerator() - => new Enumerator(_dictionary); + IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_dictionary); void ICollection.CopyTo(Array array, int index) { if (array == null) + { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } + if (array.Rank != 1) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_RankMultiDimNotSupported); + } + if (array.GetLowerBound(0) != 0) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_NonZeroLowerBound); + } + if ((uint)index > (uint)array.Length) + { ThrowHelper.ThrowIndexArgumentOutOfRange_NeedNonNegNumException(); + } + if (array.Length - index < _dictionary.Count) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall); + } if (array is TKey[] keys) { @@ -1421,9 +1442,7 @@ namespace System.Collections.Generic _currentKey = default; } - public void Dispose() - { - } + public void Dispose() { } public bool MoveNext() { @@ -1488,11 +1507,11 @@ namespace System.Collections.Generic { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.dictionary); } + _dictionary = dictionary; } - public Enumerator GetEnumerator() - => new Enumerator(_dictionary); + public Enumerator GetEnumerator() => new Enumerator(_dictionary); public void CopyTo(TValue[] array, int index) { @@ -1523,8 +1542,8 @@ namespace System.Collections.Generic bool ICollection.IsReadOnly => true; - void ICollection.Add(TValue item) - => ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_ValueCollectionSet); + void ICollection.Add(TValue item) => + ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_ValueCollectionSet); bool ICollection.Remove(TValue item) { @@ -1532,30 +1551,41 @@ namespace System.Collections.Generic return false; } - void ICollection.Clear() - => ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_ValueCollectionSet); + void ICollection.Clear() => + ThrowHelper.ThrowNotSupportedException(ExceptionResource.NotSupported_ValueCollectionSet); - bool ICollection.Contains(TValue item) - => _dictionary.ContainsValue(item); + bool ICollection.Contains(TValue item) => _dictionary.ContainsValue(item); - IEnumerator IEnumerable.GetEnumerator() - => new Enumerator(_dictionary); + IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_dictionary); - IEnumerator IEnumerable.GetEnumerator() - => new Enumerator(_dictionary); + IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_dictionary); void ICollection.CopyTo(Array array, int index) { if (array == null) + { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } + if (array.Rank != 1) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_RankMultiDimNotSupported); + } + if (array.GetLowerBound(0) != 0) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_NonZeroLowerBound); + } + if ((uint)index > (uint)array.Length) + { ThrowHelper.ThrowIndexArgumentOutOfRange_NeedNonNegNumException(); + } + if (array.Length - index < _dictionary.Count) + { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall); + } if (array is TValue[] values) { @@ -1604,9 +1634,7 @@ namespace System.Collections.Generic _currentValue = default; } - public void Dispose() - { - } + public void Dispose() { } public bool MoveNext() { @@ -1651,6 +1679,7 @@ namespace System.Collections.Generic { ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); } + _index = 0; _currentValue = default; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs new file mode 100644 index 0000000..2fb8884 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs @@ -0,0 +1,1553 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Serialization; + +using Internal.Runtime.CompilerServices; + +namespace System.Collections.Generic +{ + [DebuggerTypeProxy(typeof(ICollectionDebugView<>))] + [DebuggerDisplay("Count = {Count}")] + [Serializable] + [TypeForwardedFrom("System.Core, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")] + public class HashSet : ICollection, ISet, IReadOnlyCollection, IReadOnlySet, ISerializable, IDeserializationCallback + { + // This uses the same array-based implementation as Dictionary. + + // Constants for serialization + private const string CapacityName = "Capacity"; // Do not rename (binary serialization) + private const string ElementsName = "Elements"; // Do not rename (binary serialization) + private const string ComparerName = "Comparer"; // Do not rename (binary serialization) + private const string VersionName = "Version"; // Do not rename (binary serialization) + + /// Cutoff point for stackallocs. This corresponds to the number of ints. + private const int StackAllocThreshold = 100; + + /// + /// When constructing a hashset from an existing collection, it may contain duplicates, + /// so this is used as the max acceptable excess ratio of capacity to count. Note that + /// this is only used on the ctor and not to automatically shrink if the hashset has, e.g, + /// a lot of adds followed by removes. Users must explicitly shrink by calling TrimExcess. + /// This is set to 3 because capacity is acceptable as 2x rounded up to nearest prime. + /// + private const int ShrinkThreshold = 3; + private const int StartOfFreeList = -3; + + private int[]? _buckets; + private Entry[]? _entries; +#if TARGET_64BIT + private ulong _fastModMultiplier; +#endif + private int _count; + private int _freeList; + private int _freeCount; + private int _version; + private IEqualityComparer? _comparer; + private SerializationInfo? _siInfo; // temporary variable needed during deserialization + + #region Constructors + + public HashSet() : this((IEqualityComparer?)null) { } + + public HashSet(IEqualityComparer? comparer) + { + if (comparer != null && comparer != EqualityComparer.Default) // first check for null to avoid forcing default comparer instantiation unnecessarily + { + _comparer = comparer; + } + + if (typeof(T) == typeof(string) && _comparer == null) + { + // To start, move off default comparer for string which is randomized. + _comparer = (IEqualityComparer)NonRandomizedStringEqualityComparer.Default; + } + } + + public HashSet(int capacity) : this(capacity, null) { } + + public HashSet(IEnumerable collection) : this(collection, null) { } + + public HashSet(IEnumerable collection, IEqualityComparer? comparer) : this(comparer) + { + if (collection == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collection); + } + + if (collection is HashSet otherAsHashSet && EqualityComparersAreEqual(this, otherAsHashSet)) + { + ConstructFrom(otherAsHashSet); + } + else + { + // To avoid excess resizes, first set size based on collection's count. The collection may + // contain duplicates, so call TrimExcess if resulting HashSet is larger than the threshold. + if (collection is ICollection coll) + { + int count = coll.Count; + if (count > 0) + { + Initialize(count); + } + } + + UnionWith(collection); + + if (_count > 0 && _entries!.Length / _count > ShrinkThreshold) + { + TrimExcess(); + } + } + } + + public HashSet(int capacity, IEqualityComparer? comparer) : this(comparer) + { + if (capacity < 0) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); + } + + if (capacity > 0) + { + Initialize(capacity); + } + } + + protected HashSet(SerializationInfo info, StreamingContext context) + { + // We can't do anything with the keys and values until the entire graph has been + // deserialized and we have a reasonable estimate that GetHashCode is not going to + // fail. For the time being, we'll just cache this. The graph is not valid until + // OnDeserialization has been called. + _siInfo = info; + } + + /// Initializes the HashSet from another HashSet with the same element type and equality comparer. + private void ConstructFrom(HashSet source) + { + if (source.Count == 0) + { + // As well as short-circuiting on the rest of the work done, + // this avoids errors from trying to access source._buckets + // or source._entries when they aren't initialized. + return; + } + + int capacity = source._buckets!.Length; + int threshold = HashHelpers.ExpandPrime(source.Count + 1); + + if (threshold >= capacity) + { + _buckets = (int[])source._buckets.Clone(); + _entries = (Entry[])source._entries!.Clone(); + _freeList = source._freeList; + _freeCount = source._freeCount; + _count = source._count; +#if TARGET_64BIT + _fastModMultiplier = source._fastModMultiplier; +#endif + } + else + { + Initialize(source.Count); + + Entry[]? entries = source._entries; + for (int i = 0; i < source._count; i++) + { + ref Entry entry = ref entries![i]; + if (entry.Next >= -1) + { + AddIfNotPresent(entry.Value, out _); + } + } + } + + Debug.Assert(Count == source.Count); + } + + #endregion + + #region ICollection methods + + void ICollection.Add(T item) => AddIfNotPresent(item, out _); + + /// Removes all elements from the object. + public void Clear() + { + int count = _count; + if (count > 0) + { + Debug.Assert(_buckets != null, "_buckets should be non-null"); + Debug.Assert(_entries != null, "_entries should be non-null"); + + Array.Clear(_buckets, 0, _buckets.Length); + _count = 0; + _freeList = -1; + _freeCount = 0; + Array.Clear(_entries, 0, count); + } + } + + /// Determines whether the contains the specified element. + /// The element to locate in the object. + /// true if the object contains the specified element; otherwise, false. + public bool Contains(T item) => FindItemIndex(item) >= 0; + + /// Gets the index of the item in , or -1 if it's not in the set. + private int FindItemIndex(T item) + { + int[]? buckets = _buckets; + if (buckets != null) + { + Entry[]? entries = _entries; + Debug.Assert(entries != null, "Expected _entries to be initialized"); + + uint collisionCount = 0; + IEqualityComparer? comparer = _comparer; + + if (comparer == null) + { + int hashCode = item != null ? item.GetHashCode() : 0; + if (typeof(T).IsValueType) + { + // ValueType: Devirtualize with EqualityComparer.Default intrinsic + int i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && EqualityComparer.Default.Equals(entry.Value, item)) + { + return i; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + else + { + // Object type: Shared Generic, EqualityComparer.Default won't devirtualize (https://github.com/dotnet/runtime/issues/10050), + // so cache in a local rather than get EqualityComparer per loop iteration. + EqualityComparer defaultComparer = EqualityComparer.Default; + int i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && defaultComparer.Equals(entry.Value, item)) + { + return i; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + } + else + { + int hashCode = item != null ? comparer.GetHashCode(item) : 0; + int i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && comparer.Equals(entry.Value, item)) + { + return i; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + } + + return -1; + } + + /// Gets a reference to the specified hashcode's bucket, containing an index into . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref int GetBucketRef(int hashCode) + { + int[] buckets = _buckets!; +#if TARGET_64BIT + return ref buckets[HashHelpers.FastMod((uint)hashCode, (uint)buckets.Length, _fastModMultiplier)]; +#else + return ref buckets[(uint)hashCode % (uint)buckets.Length]; +#endif + } + + public bool Remove(T item) + { + if (_buckets != null) + { + Entry[]? entries = _entries; + Debug.Assert(entries != null, "entries should be non-null"); + + uint collisionCount = 0; + int last = -1; + int hashCode = item != null ? (_comparer?.GetHashCode(item) ?? item.GetHashCode()) : 0; + + ref int bucket = ref GetBucketRef(hashCode); + int i = bucket - 1; // Value in buckets is 1-based + + while (i >= 0) + { + ref Entry entry = ref entries[i]; + + if (entry.HashCode == hashCode && (_comparer?.Equals(entry.Value, item) ?? EqualityComparer.Default.Equals(entry.Value, item))) + { + if (last < 0) + { + bucket = entry.Next + 1; // Value in buckets is 1-based + } + else + { + entries[last].Next = entry.Next; + } + + Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); + entry.Next = StartOfFreeList - _freeList; + + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + entry.Value = default!; + } + + _freeList = i; + _freeCount++; + return true; + } + + last = i; + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop; which means a concurrent update has happened. + // Break out of the loop and throw, rather than looping forever. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + + return false; + } + + /// Gets the number of elements that are contained in the set. + public int Count => _count - _freeCount; + + bool ICollection.IsReadOnly => false; + + #endregion + + #region IEnumerable methods + + public Enumerator GetEnumerator() => new Enumerator(this); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + #endregion + + #region ISerializable methods + + public virtual void GetObjectData(SerializationInfo info, StreamingContext context) + { + if (info == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.info); + } + + info.AddValue(VersionName, _version); // need to serialize version to avoid problems with serializing while enumerating + info.AddValue(ComparerName, _comparer ?? EqualityComparer.Default, typeof(IEqualityComparer)); + info.AddValue(CapacityName, _buckets == null ? 0 : _buckets.Length); + + if (_buckets != null) + { + var array = new T[Count]; + CopyTo(array); + info.AddValue(ElementsName, array, typeof(T[])); + } + } + + #endregion + + #region IDeserializationCallback methods + + public virtual void OnDeserialization(object? sender) + { + if (_siInfo == null) + { + // It might be necessary to call OnDeserialization from a container if the + // container object also implements OnDeserialization. We can return immediately + // if this function is called twice. Note we set _siInfo to null at the end of this method. + return; + } + + int capacity = _siInfo.GetInt32(CapacityName); + _comparer = (IEqualityComparer)_siInfo.GetValue(ComparerName, typeof(IEqualityComparer))!; + _freeList = -1; + _freeCount = 0; + + if (capacity != 0) + { + _buckets = new int[capacity]; + _entries = new Entry[capacity]; +#if TARGET_64BIT + _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)capacity); +#endif + + T[]? array = (T[]?)_siInfo.GetValue(ElementsName, typeof(T[])); + if (array == null) + { + ThrowHelper.ThrowSerializationException(ExceptionResource.Serialization_MissingKeys); + } + + // There are no resizes here because we already set capacity above. + for (int i = 0; i < array.Length; i++) + { + AddIfNotPresent(array[i], out _); + } + } + else + { + _buckets = null; + } + + _version = _siInfo.GetInt32(VersionName); + _siInfo = null; + } + + #endregion + + #region HashSet methods + + /// Adds the specified element to the . + /// The element to add to the set. + /// true if the element is added to the object; false if the element is already present. + public bool Add(T item) => AddIfNotPresent(item, out _); + + /// Searches the set for a given value and returns the equal value it finds, if any. + /// The value to search for. + /// The value from the set that the search found, or the default value of when the search yielded no match. + /// A value indicating whether the search was successful. + /// + /// This can be useful when you want to reuse a previously stored reference instead of + /// a newly constructed one (so that more sharing of references can occur) or to look up + /// a value that has more complete data than the value you currently have, although their + /// comparer functions indicate they are equal. + /// + public bool TryGetValue(T equalValue, [MaybeNullWhen(false)] out T actualValue) + { + if (_buckets != null) + { + int index = FindItemIndex(equalValue); + if (index >= 0) + { + actualValue = _entries![index].Value; + return true; + } + } + + actualValue = default; + return false; + } + + /// Modifies the current object to contain all elements that are present in itself, the specified collection, or both. + /// The collection to compare to the current object. + public void UnionWith(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + foreach (T item in other) + { + AddIfNotPresent(item, out _); + } + } + + /// Modifies the current object to contain only elements that are present in that object and in the specified collection. + /// The collection to compare to the current object. + public void IntersectWith(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // Intersection of anything with empty set is empty set, so return if count is 0. + // Same if the set intersecting with itself is the same set. + if (Count == 0 || other == this) + { + return; + } + + // If other is known to be empty, intersection is empty set; remove all elements, and we're done. + if (other is ICollection otherAsCollection) + { + if (otherAsCollection.Count == 0) + { + Clear(); + return; + } + + // Faster if other is a hashset using same equality comparer; so check + // that other is a hashset using the same equality comparer. + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + IntersectWithHashSetWithSameComparer(otherAsSet); + return; + } + } + + IntersectWithEnumerable(other); + } + + /// Removes all elements in the specified collection from the current object. + /// The collection to compare to the current object. + public void ExceptWith(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // This is already the empty set; return. + if (Count == 0) + { + return; + } + + // Special case if other is this; a set minus itself is the empty set. + if (other == this) + { + Clear(); + return; + } + + // Remove every element in other from this. + foreach (T element in other) + { + Remove(element); + } + } + + /// Modifies the current object to contain only elements that are present either in that object or in the specified collection, but not both. + /// The collection to compare to the current object. + public void SymmetricExceptWith(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // If set is empty, then symmetric difference is other. + if (Count == 0) + { + UnionWith(other); + return; + } + + // Special-case this; the symmetric difference of a set with itself is the empty set. + if (other == this) + { + Clear(); + return; + } + + // If other is a HashSet, it has unique elements according to its equality comparer, + // but if they're using different equality comparers, then assumption of uniqueness + // will fail. So first check if other is a hashset using the same equality comparer; + // symmetric except is a lot faster and avoids bit array allocations if we can assume + // uniqueness. + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + SymmetricExceptWithUniqueHashSet(otherAsSet); + } + else + { + SymmetricExceptWithEnumerable(other); + } + } + + /// Determines whether a object is a subset of the specified collection. + /// The collection to compare to the current object. + /// true if the object is a subset of ; otherwise, false. + public bool IsSubsetOf(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // The empty set is a subset of any set, and a set is a subset of itself. + // Set is always a subset of itself + if (Count == 0 || other == this) + { + return true; + } + + // Faster if other has unique elements according to this equality comparer; so check + // that other is a hashset using the same equality comparer. + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + // if this has more elements then it can't be a subset + if (Count > otherAsSet.Count) + { + return false; + } + + // already checked that we're using same equality comparer. simply check that + // each element in this is contained in other. + return IsSubsetOfHashSetWithSameComparer(otherAsSet); + } + + (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); + return uniqueCount == Count && unfoundCount >= 0; + } + + /// Determines whether a object is a proper subset of the specified collection. + /// The collection to compare to the current object. + /// true if the object is a proper subset of ; otherwise, false. + public bool IsProperSubsetOf(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // No set is a proper subset of itself. + if (other == this) + { + return false; + } + + if (other is ICollection otherAsCollection) + { + // No set is a proper subset of an empty set. + if (otherAsCollection.Count == 0) + { + return false; + } + + // The empty set is a proper subset of anything but the empty set. + if (Count == 0) + { + return otherAsCollection.Count > 0; + } + + // Faster if other is a hashset (and we're using same equality comparer). + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + if (Count >= otherAsSet.Count) + { + return false; + } + + // This has strictly less than number of items in other, so the following + // check suffices for proper subset. + return IsSubsetOfHashSetWithSameComparer(otherAsSet); + } + } + + (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); + return uniqueCount == Count && unfoundCount > 0; + } + + /// Determines whether a object is a proper superset of the specified collection. + /// The collection to compare to the current object. + /// true if the object is a superset of ; otherwise, false. + public bool IsSupersetOf(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // A set is always a superset of itself. + if (other == this) + { + return true; + } + + // Try to fall out early based on counts. + if (other is ICollection otherAsCollection) + { + // If other is the empty set then this is a superset. + if (otherAsCollection.Count == 0) + { + return true; + } + + // Try to compare based on counts alone if other is a hashset with same equality comparer. + if (other is HashSet otherAsSet && + EqualityComparersAreEqual(this, otherAsSet) && + otherAsSet.Count > Count) + { + return false; + } + } + + return ContainsAllElements(other); + } + + /// Determines whether a object is a proper superset of the specified collection. + /// The collection to compare to the current object. + /// true if the object is a proper superset of ; otherwise, false. + public bool IsProperSupersetOf(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // The empty set isn't a proper superset of any set, and a set is never a strict superset of itself. + if (Count == 0 || other == this) + { + return false; + } + + if (other is ICollection otherAsCollection) + { + // If other is the empty set then this is a superset. + if (otherAsCollection.Count == 0) + { + // Note that this has at least one element, based on above check. + return true; + } + + // Faster if other is a hashset with the same equality comparer + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + if (otherAsSet.Count >= Count) + { + return false; + } + + // Now perform element check. + return ContainsAllElements(otherAsSet); + } + } + + // Couldn't fall out in the above cases; do it the long way + (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true); + return uniqueCount < Count && unfoundCount == 0; + } + + /// Determines whether the current object and a specified collection share common elements. + /// The collection to compare to the current object. + /// true if the object and share at least one common element; otherwise, false. + public bool Overlaps(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + if (Count == 0) + { + return false; + } + + // Set overlaps itself + if (other == this) + { + return true; + } + + foreach (T element in other) + { + if (Contains(element)) + { + return true; + } + } + + return false; + } + + /// Determines whether a object and the specified collection contain the same elements. + /// The collection to compare to the current object. + /// true if the object is equal to ; otherwise, false. + public bool SetEquals(IEnumerable other) + { + if (other == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); + } + + // A set is equal to itself. + if (other == this) + { + return true; + } + + // Faster if other is a hashset and we're using same equality comparer. + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) + { + // Attempt to return early: since both contain unique elements, if they have + // different counts, then they can't be equal. + if (Count != otherAsSet.Count) + { + return false; + } + + // Already confirmed that the sets have the same number of distinct elements, so if + // one is a superset of the other then they must be equal. + return ContainsAllElements(otherAsSet); + } + else + { + // If this count is 0 but other contains at least one element, they can't be equal. + if (Count == 0 && + other is ICollection otherAsCollection && + otherAsCollection.Count > 0) + { + return false; + } + + (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true); + return uniqueCount == Count && unfoundCount == 0; + } + } + + public void CopyTo(T[] array) => CopyTo(array, 0, Count); + + /// Copies the elements of a object to an array, starting at the specified array index. + /// The destination array. + /// The zero-based index in array at which copying begins. + public void CopyTo(T[] array, int arrayIndex) => CopyTo(array, arrayIndex, Count); + + public void CopyTo(T[] array, int arrayIndex, int count) + { + if (array == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } + + // Check array index valid index into array. + if (arrayIndex < 0) + { + throw new ArgumentOutOfRangeException(nameof(arrayIndex), arrayIndex, SR.ArgumentOutOfRange_NeedNonNegNum); + } + + // Also throw if count less than 0. + if (count < 0) + { + throw new ArgumentOutOfRangeException(nameof(count), count, SR.ArgumentOutOfRange_NeedNonNegNum); + } + + // Will the array, starting at arrayIndex, be able to hold elements? Note: not + // checking arrayIndex >= array.Length (consistency with list of allowing + // count of 0; subsequent check takes care of the rest) + if (arrayIndex > array.Length || count > array.Length - arrayIndex) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall); + } + + Entry[]? entries = _entries; + for (int i = 0; i < _count && count != 0; i++) + { + ref Entry entry = ref entries![i]; + if (entry.Next >= -1) + { + array[arrayIndex++] = entry.Value; + count--; + } + } + } + + /// Removes all elements that match the conditions defined by the specified predicate from a collection. + public int RemoveWhere(Predicate match) + { + if (match == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.match); + } + + Entry[]? entries = _entries; + int numRemoved = 0; + for (int i = 0; i < _count; i++) + { + ref Entry entry = ref entries![i]; + if (entry.Next >= -1) + { + // Cache value in case delegate removes it + T value = entry.Value; + if (match(value)) + { + // Check again that remove actually removed it. + if (Remove(value)) + { + numRemoved++; + } + } + } + } + + return numRemoved; + } + + /// Gets the object that is used to determine equality for the values in the set. + public IEqualityComparer Comparer => + (_comparer == null || _comparer is NonRandomizedStringEqualityComparer) ? + EqualityComparer.Default : + _comparer; + + /// Ensures that this hash set can hold the specified number of elements without growing. + public int EnsureCapacity(int capacity) + { + if (capacity < 0) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); + } + + int currentCapacity = _entries == null ? 0 : _entries.Length; + if (currentCapacity >= capacity) + { + return currentCapacity; + } + + if (_buckets == null) + { + return Initialize(capacity); + } + + int newSize = HashHelpers.GetPrime(capacity); + Resize(newSize, forceNewHashCodes: false); + return newSize; + } + + private void Resize() => Resize(HashHelpers.ExpandPrime(_count), forceNewHashCodes: false); + + private void Resize(int newSize, bool forceNewHashCodes) + { + // Value types never rehash + Debug.Assert(!forceNewHashCodes || !typeof(T).IsValueType); + Debug.Assert(_entries != null, "_entries should be non-null"); + Debug.Assert(newSize >= _entries.Length); + + var entries = new Entry[newSize]; + + int count = _count; + Array.Copy(_entries, entries, count); + + if (!typeof(T).IsValueType && forceNewHashCodes) + { + for (int i = 0; i < count; i++) + { + ref Entry entry = ref entries[i]; + if (entry.Next >= -1) + { + Debug.Assert(_comparer == null); + entry.HashCode = entry.Value != null ? entry.Value!.GetHashCode() : 0; + } + } + } + + // Assign member variables after both arrays allocated to guard against corruption from OOM if second fails + _buckets = new int[newSize]; +#if TARGET_64BIT + _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)newSize); +#endif + for (int i = 0; i < count; i++) + { + ref Entry entry = ref entries[i]; + if (entry.Next >= -1) + { + ref int bucket = ref GetBucketRef(entry.HashCode); + entry.Next = bucket - 1; // Value in _buckets is 1-based + bucket = i + 1; + } + } + + _entries = entries; + } + + /// + /// Sets the capacity of a object to the actual number of elements it contains, + /// rounded up to a nearby, implementation-specific value. + /// + public void TrimExcess() + { + int capacity = Count; + + int newSize = HashHelpers.GetPrime(capacity); + Entry[]? oldEntries = _entries; + int currentCapacity = oldEntries == null ? 0 : oldEntries.Length; + if (newSize >= currentCapacity) + { + return; + } + + int oldCount = _count; + _version++; + Initialize(newSize); + Entry[]? entries = _entries; + int count = 0; + for (int i = 0; i < oldCount; i++) + { + int hashCode = oldEntries![i].HashCode; // At this point, we know we have entries. + if (oldEntries[i].Next >= -1) + { + ref Entry entry = ref entries![count]; + entry = oldEntries[i]; + ref int bucket = ref GetBucketRef(hashCode); + entry.Next = bucket - 1; // Value in _buckets is 1-based + bucket = count + 1; + count++; + } + } + + _count = capacity; + _freeCount = 0; + } + + #endregion + + #region Helper methods + + /// Returns an object that can be used for equality testing of a object. + public static IEqualityComparer> CreateSetComparer() => new HashSetEqualityComparer(); + + /// + /// Initializes buckets and slots arrays. Uses suggested capacity by finding next prime + /// greater than or equal to capacity. + /// + private int Initialize(int capacity) + { + int size = HashHelpers.GetPrime(capacity); + var buckets = new int[size]; + var entries = new Entry[size]; + + // Assign member variables after both arrays are allocated to guard against corruption from OOM if second fails. + _freeList = -1; + _buckets = buckets; + _entries = entries; +#if TARGET_64BIT + _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)size); +#endif + + return size; + } + + /// Adds the specified element to the set if it's not already contained. + /// The element to add to the set. + /// The index into of the element. + /// true if the element is added to the object; false if the element is already present. + private bool AddIfNotPresent(T value, out int location) + { + if (_buckets == null) + { + Initialize(0); + } + Debug.Assert(_buckets != null); + + Entry[]? entries = _entries; + Debug.Assert(entries != null, "expected entries to be non-null"); + + IEqualityComparer? comparer = _comparer; + int hashCode; + + uint collisionCount = 0; + ref int bucket = ref Unsafe.NullRef(); + + if (comparer == null) + { + hashCode = value != null ? value.GetHashCode() : 0; + bucket = ref GetBucketRef(hashCode); + int i = bucket - 1; // Value in _buckets is 1-based + if (typeof(T).IsValueType) + { + // ValueType: Devirtualize with EqualityComparer.Default intrinsic + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && EqualityComparer.Default.Equals(entry.Value, value)) + { + location = i; + return false; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + else + { + // Object type: Shared Generic, EqualityComparer.Default won't devirtualize (https://github.com/dotnet/runtime/issues/10050), + // so cache in a local rather than get EqualityComparer per loop iteration. + EqualityComparer defaultComparer = EqualityComparer.Default; + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && defaultComparer.Equals(entry.Value, value)) + { + location = i; + return false; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + } + else + { + hashCode = value != null ? comparer.GetHashCode(value) : 0; + bucket = ref GetBucketRef(hashCode); + int i = bucket - 1; // Value in _buckets is 1-based + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && comparer.Equals(entry.Value, value)) + { + location = i; + return false; + } + i = entry.Next; + + collisionCount++; + if (collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + + int index; + if (_freeCount > 0) + { + index = _freeList; + _freeCount--; + Debug.Assert((StartOfFreeList - entries![_freeList].Next) >= -1, "shouldn't overflow because `next` cannot underflow"); + _freeList = StartOfFreeList - entries[_freeList].Next; + } + else + { + int count = _count; + if (count == entries.Length) + { + Resize(); + bucket = ref GetBucketRef(hashCode); + } + index = count; + _count = count + 1; + entries = _entries; + } + + { + ref Entry entry = ref entries![index]; + entry.HashCode = hashCode; + entry.Next = bucket - 1; // Value in _buckets is 1-based + entry.Value = value; + bucket = index + 1; + _version++; + location = index; + } + + // Value types never rehash + if (!typeof(T).IsValueType && collisionCount > HashHelpers.HashCollisionThreshold && comparer is NonRandomizedStringEqualityComparer) + { + // If we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing + // i.e. EqualityComparer.Default. + _comparer = null; + Resize(entries.Length, forceNewHashCodes: true); + location = FindItemIndex(value); + Debug.Assert(location >= 0); + } + + return true; + } + + /// + /// Checks if this contains of other's elements. Iterates over other's elements and + /// returns false as soon as it finds an element in other that's not in this. + /// Used by SupersetOf, ProperSupersetOf, and SetEquals. + /// + private bool ContainsAllElements(IEnumerable other) + { + foreach (T element in other) + { + if (!Contains(element)) + { + return false; + } + } + + return true; + } + + /// + /// Implementation Notes: + /// If other is a hashset and is using same equality comparer, then checking subset is + /// faster. Simply check that each element in this is in other. + /// + /// Note: if other doesn't use same equality comparer, then Contains check is invalid, + /// which is why callers must take are of this. + /// + /// If callers are concerned about whether this is a proper subset, they take care of that. + /// + internal bool IsSubsetOfHashSetWithSameComparer(HashSet other) + { + foreach (T item in this) + { + if (!other.Contains(item)) + { + return false; + } + } + + return true; + } + + /// + /// If other is a hashset that uses same equality comparer, intersect is much faster + /// because we can use other's Contains + /// + private void IntersectWithHashSetWithSameComparer(HashSet other) + { + Entry[]? entries = _entries; + for (int i = 0; i < _count; i++) + { + ref Entry entry = ref entries![i]; + if (entry.Next >= -1) + { + T item = entry.Value; + if (!other.Contains(item)) + { + Remove(item); + } + } + } + } + + /// + /// Iterate over other. If contained in this, mark an element in bit array corresponding to + /// its position in _slots. If anything is unmarked (in bit array), remove it. + /// + /// This attempts to allocate on the stack, if below StackAllocThreshold. + /// + private unsafe void IntersectWithEnumerable(IEnumerable other) + { + Debug.Assert(_buckets != null, "_buckets shouldn't be null; callers should check first"); + + // Keep track of current last index; don't want to move past the end of our bit array + // (could happen if another thread is modifying the collection). + int originalCount = _count; + int intArrayLength = BitHelper.ToIntArrayLength(originalCount); + + Span span = stackalloc int[StackAllocThreshold]; + BitHelper bitHelper = intArrayLength <= StackAllocThreshold ? + new BitHelper(span.Slice(0, intArrayLength), clear: true) : + new BitHelper(new int[intArrayLength], clear: false); + + // Mark if contains: find index of in slots array and mark corresponding element in bit array. + foreach (T item in other) + { + int index = FindItemIndex(item); + if (index >= 0) + { + bitHelper.MarkBit(index); + } + } + + // If anything unmarked, remove it. Perf can be optimized here if BitHelper had a + // FindFirstUnmarked method. + for (int i = 0; i < originalCount; i++) + { + ref Entry entry = ref _entries![i]; + if (entry.Next >= -1 && !bitHelper.IsMarked(i)) + { + Remove(entry.Value); + } + } + } + + /// + /// if other is a set, we can assume it doesn't have duplicate elements, so use this + /// technique: if can't remove, then it wasn't present in this set, so add. + /// + /// As with other methods, callers take care of ensuring that other is a hashset using the + /// same equality comparer. + /// + /// + private void SymmetricExceptWithUniqueHashSet(HashSet other) + { + foreach (T item in other) + { + if (!Remove(item)) + { + AddIfNotPresent(item, out _); + } + } + } + + /// + /// Implementation notes: + /// + /// Used for symmetric except when other isn't a HashSet. This is more tedious because + /// other may contain duplicates. HashSet technique could fail in these situations: + /// 1. Other has a duplicate that's not in this: HashSet technique would add then + /// remove it. + /// 2. Other has a duplicate that's in this: HashSet technique would remove then add it + /// back. + /// In general, its presence would be toggled each time it appears in other. + /// + /// This technique uses bit marking to indicate whether to add/remove the item. If already + /// present in collection, it will get marked for deletion. If added from other, it will + /// get marked as something not to remove. + /// + /// + /// + private unsafe void SymmetricExceptWithEnumerable(IEnumerable other) + { + int originalCount = _count; + int intArrayLength = BitHelper.ToIntArrayLength(originalCount); + + Span itemsToRemoveSpan = stackalloc int[StackAllocThreshold / 2]; + BitHelper itemsToRemove = intArrayLength <= StackAllocThreshold / 2 ? + new BitHelper(itemsToRemoveSpan.Slice(0, intArrayLength), clear: true) : + new BitHelper(new int[intArrayLength], clear: false); + + Span itemsAddedFromOtherSpan = stackalloc int[StackAllocThreshold / 2]; + BitHelper itemsAddedFromOther = intArrayLength <= StackAllocThreshold / 2 ? + new BitHelper(itemsAddedFromOtherSpan.Slice(0, intArrayLength), clear: true) : + new BitHelper(new int[intArrayLength], clear: false); + + foreach (T item in other) + { + int location; + if (AddIfNotPresent(item, out location)) + { + // wasn't already present in collection; flag it as something not to remove + // *NOTE* if location is out of range, we should ignore. BitHelper will + // detect that it's out of bounds and not try to mark it. But it's + // expected that location could be out of bounds because adding the item + // will increase _lastIndex as soon as all the free spots are filled. + itemsAddedFromOther.MarkBit(location); + } + else + { + // already there...if not added from other, mark for remove. + // *NOTE* Even though BitHelper will check that location is in range, we want + // to check here. There's no point in checking items beyond originalCount + // because they could not have been in the original collection + if (location < originalCount && !itemsAddedFromOther.IsMarked(location)) + { + itemsToRemove.MarkBit(location); + } + } + } + + // if anything marked, remove it + for (int i = 0; i < originalCount; i++) + { + if (itemsToRemove.IsMarked(i)) + { + Remove(_entries![i].Value); + } + } + } + + /// + /// Determines counts that can be used to determine equality, subset, and superset. This + /// is only used when other is an IEnumerable and not a HashSet. If other is a HashSet + /// these properties can be checked faster without use of marking because we can assume + /// other has no duplicates. + /// + /// The following count checks are performed by callers: + /// 1. Equals: checks if unfoundCount = 0 and uniqueFoundCount = _count; i.e. everything + /// in other is in this and everything in this is in other + /// 2. Subset: checks if unfoundCount >= 0 and uniqueFoundCount = _count; i.e. other may + /// have elements not in this and everything in this is in other + /// 3. Proper subset: checks if unfoundCount > 0 and uniqueFoundCount = _count; i.e + /// other must have at least one element not in this and everything in this is in other + /// 4. Proper superset: checks if unfound count = 0 and uniqueFoundCount strictly less + /// than _count; i.e. everything in other was in this and this had at least one element + /// not contained in other. + /// + /// An earlier implementation used delegates to perform these checks rather than returning + /// an ElementCount struct; however this was changed due to the perf overhead of delegates. + /// + /// + /// Allows us to finish faster for equals and proper superset + /// because unfoundCount must be 0. + private unsafe (int UniqueCount, int UnfoundCount) CheckUniqueAndUnfoundElements(IEnumerable other, bool returnIfUnfound) + { + // Need special case in case this has no elements. + if (_count == 0) + { + int numElementsInOther = 0; + foreach (T item in other) + { + numElementsInOther++; + break; // break right away, all we want to know is whether other has 0 or 1 elements + } + + return (UniqueCount: 0, UnfoundCount: numElementsInOther); + } + + Debug.Assert((_buckets != null) && (_count > 0), "_buckets was null but count greater than 0"); + + int originalCount = _count; + int intArrayLength = BitHelper.ToIntArrayLength(originalCount); + + Span span = stackalloc int[StackAllocThreshold]; + BitHelper bitHelper = intArrayLength <= StackAllocThreshold ? + new BitHelper(span.Slice(0, intArrayLength), clear: true) : + new BitHelper(new int[intArrayLength], clear: false); + + int unfoundCount = 0; // count of items in other not found in this + int uniqueFoundCount = 0; // count of unique items in other found in this + + foreach (T item in other) + { + int index = FindItemIndex(item); + if (index >= 0) + { + if (!bitHelper.IsMarked(index)) + { + // Item hasn't been seen yet. + bitHelper.MarkBit(index); + uniqueFoundCount++; + } + } + else + { + unfoundCount++; + if (returnIfUnfound) + { + break; + } + } + } + + return (uniqueFoundCount, unfoundCount); + } + + /// + /// Checks if equality comparers are equal. This is used for algorithms that can + /// speed up if it knows the other item has unique elements. I.e. if they're using + /// different equality comparers, then uniqueness assumption between sets break. + /// + internal static bool EqualityComparersAreEqual(HashSet set1, HashSet set2) => set1.Comparer.Equals(set2.Comparer); + +#endregion + + private struct Entry + { + public int HashCode; + /// + /// 0-based index of next entry in chain: -1 means end of chain + /// also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3, + /// so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc. + /// + public int Next; + public T Value; + } + + public struct Enumerator : IEnumerator + { + private readonly HashSet _hashSet; + private readonly int _version; + private int _index; + private T _current; + + internal Enumerator(HashSet hashSet) + { + _hashSet = hashSet; + _version = hashSet._version; + _index = 0; + _current = default!; + } + + public bool MoveNext() + { + if (_version != _hashSet._version) + { + ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); + } + + // Use unsigned comparison since we set index to dictionary.count+1 when the enumeration ends. + // dictionary.count+1 could be negative if dictionary.count is int.MaxValue + while ((uint)_index < (uint)_hashSet._count) + { + ref Entry entry = ref _hashSet._entries![_index++]; + if (entry.Next >= -1) + { + _current = entry.Value; + return true; + } + } + + _index = _hashSet._count + 1; + _current = default!; + return false; + } + + public T Current => _current; + + public void Dispose() { } + + object? IEnumerator.Current + { + get + { + if (_index == 0 || (_index == _hashSet._count + 1)) + { + ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumOpCantHappen(); + } + + return _current; + } + } + + void IEnumerator.Reset() + { + if (_version != _hashSet._version) + { + ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); + } + + _index = 0; + _current = default!; + } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs new file mode 100644 index 0000000..6390be8 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs @@ -0,0 +1,78 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Collections.Generic +{ + /// Equality comparer for hashsets of hashsets + internal sealed class HashSetEqualityComparer : IEqualityComparer?> + { + public bool Equals(HashSet? x, HashSet? y) + { + // If they're the exact same instance, they're equal. + if (ReferenceEquals(x, y)) + { + return true; + } + + // They're not both null, so if either is null, they're not equal. + if (x == null || y == null) + { + return false; + } + + EqualityComparer defaultComparer = EqualityComparer.Default; + + // If both sets use the same comparer, they're equal if they're the same + // size and one is a "subset" of the other. + if (HashSet.EqualityComparersAreEqual(x, y)) + { + return x.Count == y.Count && y.IsSubsetOfHashSetWithSameComparer(x); + } + + // Otherwise, do an O(N^2) match. + foreach (T yi in y) + { + bool found = false; + foreach (T xi in x) + { + if (defaultComparer.Equals(yi, xi)) + { + found = true; + break; + } + } + + if (!found) + { + return false; + } + } + + return true; + } + + public int GetHashCode(HashSet? obj) + { + int hashCode = 0; // default to 0 for null/empty set + + if (obj != null) + { + foreach (T t in obj) + { + if (t != null) + { + hashCode ^= t.GetHashCode(); // same hashcode as as default comparer + } + } + } + + return hashCode; + } + + // Equals method for the comparer itself. + public override bool Equals(object? obj) => obj is HashSetEqualityComparer; + + public override int GetHashCode() => EqualityComparer.Default.GetHashCode(); + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/InsertionBehavior.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/InsertionBehavior.cs new file mode 100644 index 0000000..32b4881 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/InsertionBehavior.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Collections.Generic +{ + /// + /// Used internally to control behavior of insertion into a or . + /// + internal enum InsertionBehavior : byte + { + /// + /// The default insertion behavior. + /// + None = 0, + + /// + /// Specifies that an existing entry with the same key should be overwritten if encountered. + /// + OverwriteExisting = 1, + + /// + /// Specifies that if an existing entry with the same key is encountered, an exception should be thrown. + /// + ThrowOnExisting = 2 + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormat.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormat.cs index ae48789..ff9e0a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormat.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormat.cs @@ -653,8 +653,8 @@ namespace System if (isJapaneseCalendar && !LocalAppContextSwitches.FormatJapaneseFirstYearAsANumber && year == 1 && - ((i + tokenLen < format.Length && format[i + tokenLen] == DateTimeFormatInfoScanner.CJKYearSuff[0]) || - (i + tokenLen < format.Length - 1 && format[i + tokenLen] == '\'' && format[i + tokenLen + 1] == DateTimeFormatInfoScanner.CJKYearSuff[0]))) + ((i + tokenLen < format.Length && format[i + tokenLen] == DateTimeFormatInfoScanner.CJKYearSuff) || + (i + tokenLen < format.Length - 1 && format[i + tokenLen] == '\'' && format[i + tokenLen + 1] == DateTimeFormatInfoScanner.CJKYearSuff))) { // We are formatting a Japanese date with year equals 1 and the year number is followed by the year sign \u5e74 // In Japanese dates, the first year in the era is not formatted as a number 1 instead it is formatted as \u5143 which means diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormatInfoScanner.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormatInfoScanner.cs index f1b382e..d730734 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormatInfoScanner.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/DateTimeFormatInfoScanner.cs @@ -91,55 +91,26 @@ namespace System.Globalization internal const char IgnorableSymbolChar = '\xe001'; // Known CJK suffix - internal const string CJKYearSuff = "\u5e74"; - internal const string CJKMonthSuff = "\u6708"; - internal const string CJKDaySuff = "\u65e5"; + internal const char CJKYearSuff = '\u5e74'; + internal const char CJKMonthSuff = '\u6708'; + internal const char CJKDaySuff = '\u65e5'; - internal const string KoreanYearSuff = "\ub144"; - internal const string KoreanMonthSuff = "\uc6d4"; - internal const string KoreanDaySuff = "\uc77c"; + internal const char KoreanYearSuff = '\ub144'; + internal const char KoreanMonthSuff = '\uc6d4'; + internal const char KoreanDaySuff = '\uc77c'; - internal const string KoreanHourSuff = "\uc2dc"; - internal const string KoreanMinuteSuff = "\ubd84"; - internal const string KoreanSecondSuff = "\ucd08"; + internal const char KoreanHourSuff = '\uc2dc'; + internal const char KoreanMinuteSuff = '\ubd84'; + internal const char KoreanSecondSuff = '\ucd08'; - internal const string CJKHourSuff = "\u6642"; - internal const string ChineseHourSuff = "\u65f6"; + internal const char CJKHourSuff = '\u6642'; + internal const char ChineseHourSuff = '\u65f6'; - internal const string CJKMinuteSuff = "\u5206"; - internal const string CJKSecondSuff = "\u79d2"; + internal const char CJKMinuteSuff = '\u5206'; + internal const char CJKSecondSuff = '\u79d2'; - // The collection fo date words & postfix. + // The collection for date words & postfix. internal List m_dateWords = new List(); - // Hashtable for the known words. - private static volatile Dictionary? s_knownWords; - - private static Dictionary KnownWords => - s_knownWords ??= - new Dictionary(16) - { - // Add known words into the hash table. - - // Skip these special symbols. - { "/", string.Empty }, - { "-", string.Empty }, - { ".", string.Empty }, - - // Skip known CJK suffixes. - { CJKYearSuff, string.Empty }, - { CJKMonthSuff, string.Empty }, - { CJKDaySuff, string.Empty }, - { KoreanYearSuff, string.Empty }, - { KoreanMonthSuff, string.Empty }, - { KoreanDaySuff, string.Empty }, - { KoreanHourSuff, string.Empty }, - { KoreanMinuteSuff, string.Empty }, - { KoreanSecondSuff, string.Empty }, - { CJKHourSuff, string.Empty }, - { ChineseHourSuff, string.Empty }, - { CJKMinuteSuff, string.Empty }, - { CJKSecondSuff, string.Empty } - }; //////////////////////////////////////////////////////////////////////////// // @@ -203,43 +174,68 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////////// internal void AddDateWordOrPostfix(string? formatPostfix, string str) { - if (str.Length > 0) + if (str.Length == 0) { - // Some cultures use . like an abbreviation - if (str.Equals(".")) + return; + } + + if (str.Length == 1) + { + switch (str[0]) { - AddIgnorableSymbols("."); - return; + // Some cultures use . like an abbreviation + case '.': + AddIgnorableSymbols("."); + return; + + // Skip these special symbols. + case '/': + case '-': + return; + + // Skip known CJK suffixes. + case CJKYearSuff: + case CJKMonthSuff: + case CJKDaySuff: + case KoreanYearSuff: + case KoreanMonthSuff: + case KoreanDaySuff: + case KoreanHourSuff: + case KoreanMinuteSuff: + case KoreanSecondSuff: + case CJKHourSuff: + case ChineseHourSuff: + case CJKMinuteSuff: + case CJKSecondSuff: + return; } + } - if (!KnownWords.TryGetValue(str, out _)) + m_dateWords ??= new List(); + + if (formatPostfix == "MMMM") + { + // Add the word into the ArrayList as "\xfffe" + real month postfix. + string temp = MonthPostfixChar + str; + if (!m_dateWords.Contains(temp)) + { + m_dateWords.Add(temp); + } + } + else + { + if (!m_dateWords.Contains(str)) { - m_dateWords ??= new List(); + m_dateWords.Add(str); + } - if (formatPostfix == "MMMM") - { - // Add the word into the ArrayList as "\xfffe" + real month postfix. - string temp = MonthPostfixChar + str; - if (!m_dateWords.Contains(temp)) - { - m_dateWords.Add(temp); - } - } - else + if (str[^1] == '.') + { + // Old version ignore the trailing dot in the date words. Support this as well. + string strWithoutDot = str[0..^1]; + if (!m_dateWords.Contains(strWithoutDot)) { - if (!m_dateWords.Contains(str)) - { - m_dateWords.Add(str); - } - if (str[^1] == '.') - { - // Old version ignore the trailing dot in the date words. Support this as well. - string strWithoutDot = str[0..^1]; - if (!m_dateWords.Contains(strWithoutDot)) - { - m_dateWords.Add(strWithoutDot); - } - } + m_dateWords.Add(strWithoutDot); } } } @@ -685,8 +681,8 @@ namespace System.Globalization // we don't need the UseDigitPrefixInTokens since it is slower. switch (array[i][index]) { - case '\x6708': // CJKMonthSuff - case '\xc6d4': // KoreanMonthSuff + case CJKMonthSuff: + case KoreanMonthSuff: return false; } } @@ -697,7 +693,7 @@ namespace System.Globalization // Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'" // instead of just "1\x6708" if (array[i][index] == '\'' && array[i][index + 1] == ' ' && - array[i][index + 2] == '\x6708' && array[i][index + 3] == '\'') + array[i][index + 2] == CJKMonthSuff && array[i][index + 3] == '\'') { return false; } -- 2.7.4