Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path ...
authorMiha Zupan <mihazupan.zupan1@gmail.com>
Wed, 19 Jul 2023 22:08:01 +0000 (00:08 +0200)
committerGitHub <noreply@github.com>
Wed, 19 Jul 2023 22:08:01 +0000 (18:08 -0400)
* Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path

* Add comments and asserts around IOptimizations selection

* Unused using

src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs
src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs
src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs

index b15194e..a7b6331 100644 (file)
     <Compile Include="$(MSBuildThisFileDirectory)System\Index.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\Emit\ILGenerator.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleCharSearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleByteSearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2ByteSearchValues.cs" />
index 4ce70d7..4f81a9b 100644 (file)
@@ -67,8 +67,7 @@ namespace System.Buffers
 
                 if (value > 127)
                 {
-                    // The values were modified concurrent with the call to SearchValues.Create
-                    ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
+                    continue;
                 }
 
                 lookupLocal.Set(value);
index fc7b012..5ec79a3 100644 (file)
@@ -3,7 +3,6 @@
 
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
 
 namespace System.Buffers
 {
@@ -14,16 +13,6 @@ namespace System.Buffers
 
         public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
         {
-            if (Vector128.IsHardwareAccelerated && values.Length < 8)
-            {
-                // ProbabilisticMap does a Span.Contains check to confirm potential matches.
-                // If we have fewer than 8 values, pad them with existing ones to make the verification faster.
-                Span<char> newValues = stackalloc char[8];
-                newValues.Fill(values[0]);
-                values.CopyTo(newValues);
-                values = newValues;
-            }
-
             _values = new string(values);
             _map = new ProbabilisticMap(_values);
         }
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs
new file mode 100644 (file)
index 0000000..065f2cd
--- /dev/null
@@ -0,0 +1,213 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Wasm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class ProbabilisticWithAsciiCharSearchValues<TOptimizations> : SearchValues<char>
+        where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations
+    {
+        private Vector256<byte> _asciiBitmap;
+        private Vector256<byte> _inverseAsciiBitmap;
+        private ProbabilisticMap _map;
+        private readonly string _values;
+
+        public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan<char> values)
+        {
+            Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported);
+            Debug.Assert(values.ContainsAnyInRange((char)0, (char)127));
+
+            IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _);
+            _inverseAsciiBitmap = ~_asciiBitmap;
+
+            _values = new string(values);
+            _map = new ProbabilisticMap(_values);
+        }
+
+        internal override char[] GetValues() => _values.ToCharArray();
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value) =>
+            ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);
+
+        internal override int IndexOfAny(ReadOnlySpan<char> span)
+        {
+            int offset = 0;
+
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
+            {
+                // We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
+                // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
+
+                // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
+                // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
+                // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
+                // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
+                Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));
+
+                if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
+                {
+                    Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");
+
+                    offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
+                        ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                        span.Length,
+                        ref _inverseAsciiBitmap);
+                }
+                else
+                {
+                    Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
+                        "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");
+
+                    offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
+                        ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                        span.Length,
+                        ref _inverseAsciiBitmap);
+                }
+
+                // If we've reached the end of the span or stopped at an ASCII character, we've found the result.
+                if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
+                {
+                    return offset;
+                }
+
+                // Fall back to using the ProbabilisticMap.
+                span = span.Slice(offset);
+            }
+
+            int index = ProbabilisticMap.IndexOfAny(
+                ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
+                ref MemoryMarshal.GetReference(span),
+                span.Length,
+                _values);
+
+            if (index >= 0)
+            {
+                // We found a match. Account for the number of ASCII characters we've skipped previously.
+                index += offset;
+            }
+
+            return index;
+        }
+
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
+        {
+            int offset = 0;
+
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
+            {
+                // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
+                offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
+                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                    span.Length,
+                    ref _asciiBitmap);
+
+                // If we've reached the end of the span or stopped at an ASCII character, we've found the result.
+                if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
+                {
+                    return offset;
+                }
+
+                // Fall back to a simple char-by-char search.
+                span = span.Slice(offset);
+            }
+
+            int index = ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
+                ref MemoryMarshal.GetReference(span),
+                span.Length,
+                _values);
+
+            if (index >= 0)
+            {
+                // We found a match. Account for the number of ASCII characters we've skipped previously.
+                index += offset;
+            }
+
+            return index;
+        }
+
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span)
+        {
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
+            {
+                // We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
+                // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
+
+                // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
+                // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
+                // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
+                // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
+                Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));
+
+                int offset;
+
+                if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
+                {
+                    Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");
+
+                    offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
+                        ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                        span.Length,
+                        ref _inverseAsciiBitmap);
+                }
+                else
+                {
+                    Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
+                        "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");
+
+                    offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
+                        ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                        span.Length,
+                        ref _inverseAsciiBitmap);
+                }
+
+                // If we've reached the end of the span or stopped at an ASCII character, we've found the result.
+                if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
+                {
+                    return offset;
+                }
+
+                // Fall back to using the ProbabilisticMap.
+                span = span.Slice(0, offset + 1);
+            }
+
+            return ProbabilisticMap.LastIndexOfAny(
+                ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
+                ref MemoryMarshal.GetReference(span),
+                span.Length,
+                _values);
+        }
+
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
+        {
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
+            {
+                // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
+                int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
+                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
+                    span.Length,
+                    ref _asciiBitmap);
+
+                // If we've reached the end of the span or stopped at an ASCII character, we've found the result.
+                if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
+                {
+                    return offset;
+                }
+
+                // Fall back to a simple char-by-char search.
+                span = span.Slice(0, offset + 1);
+            }
+
+            return ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
+                ref MemoryMarshal.GetReference(span),
+                span.Length,
+                _values);
+        }
+    }
+}
index 9070e51..b7fee2b 100644 (file)
@@ -140,7 +140,29 @@ namespace System.Buffers
                 return new Latin1CharSearchValues(values);
             }
 
-            return new ProbabilisticCharSearchValues(values);
+            scoped ReadOnlySpan<char> probabilisticValues = values;
+
+            if (Vector128.IsHardwareAccelerated && values.Length < 8)
+            {
+                // ProbabilisticMap does a Span.Contains check to confirm potential matches.
+                // If we have fewer than 8 values, pad them with existing ones to make the verification faster.
+                Span<char> newValues = stackalloc char[8];
+                newValues.Fill(values[0]);
+                values.CopyTo(newValues);
+                probabilisticValues = newValues;
+            }
+
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && minInclusive < 128)
+            {
+                // If we have both ASCII and non-ASCII characters, use an implementation that
+                // does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap.
+
+                return (Ssse3.IsSupported || PackedSimd.IsSupported) && probabilisticValues.Contains('\0')
+                    ? new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(probabilisticValues)
+                    : new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Default>(probabilisticValues);
+            }
+
+            return new ProbabilisticCharSearchValues(probabilisticValues);
         }
 
         private static bool TryGetSingleRange<T>(ReadOnlySpan<T> values, out T minInclusive, out T maxInclusive)