Avoid bloom filter checks for IndexOfAnyExcept in ProbabilisticMap (#85203)
authorMiha Zupan <mihazupan.zupan1@gmail.com>
Mon, 5 Jun 2023 14:07:12 +0000 (16:07 +0200)
committerGitHub <noreply@github.com>
Mon, 5 Jun 2023 14:07:12 +0000 (10:07 -0400)
src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs
src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs

index a61f9b7..fc7b012 100644 (file)
@@ -34,30 +34,16 @@ namespace System.Buffers
         internal override bool ContainsCore(char value) =>
             ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal override int IndexOfAny(ReadOnlySpan<char> span) =>
-            IndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref MemoryMarshal.GetReference(span), span.Length);
+            ProbabilisticMap.IndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            IndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length);
+            ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
-            LastIndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref MemoryMarshal.GetReference(span), span.Length);
+            ProbabilisticMap.LastIndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length);
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
-            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
-            ProbabilisticMap.IndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
-            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
-            ProbabilisticMap.LastIndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);
+            ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
     }
 }
index 564e65f..8a0d653 100644 (file)
@@ -215,116 +215,96 @@ namespace System.Buffers
                 || (searchSpaceLength < 20 && searchSpaceLength < (valuesLength >> 1));
         }
 
-        public static int IndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
-            IndexOfAny<SpanHelpers.DontNegate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
-
-        public static int IndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
-            IndexOfAny<SpanHelpers.Negate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
-
-        public static int LastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
-            LastIndexOfAny<SpanHelpers.DontNegate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
-
-        public static int LastIndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
-            LastIndexOfAny<SpanHelpers.Negate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
-
-        private static int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
-            where TNegator : struct, SpanHelpers.INegator<char>
+        public static int IndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
         {
             var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
 
             // If the search space is relatively short compared to the needle, do a simple O(n * m) search.
             if (ShouldUseSimpleLoop(searchSpaceLength, valuesLength))
             {
-                ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
-                ref char cur = ref searchSpace;
+                return IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.DontNegate>(ref searchSpace, searchSpaceLength, valuesSpan);
+            }
 
-                while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
-                {
-                    char c = cur;
-                    if (TNegator.NegateIfNeeded(Contains(valuesSpan, c)))
-                    {
-                        return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
-                    }
+            if (IndexOfAnyAsciiSearcher.TryIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
+            {
+                return index;
+            }
 
-                    cur = ref Unsafe.Add(ref cur, 1);
-                }
+            return ProbabilisticIndexOfAny(ref searchSpace, searchSpaceLength, ref values, valuesLength);
+        }
 
-                return -1;
-            }
+        public static int IndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
+        {
+            var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
 
-            if (typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
-                ? IndexOfAnyAsciiSearcher.TryIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index)
-                : IndexOfAnyAsciiSearcher.TryIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out index))
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported &&
+                !ShouldUseSimpleLoop(searchSpaceLength, valuesLength) &&
+                IndexOfAnyAsciiSearcher.TryIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
             {
                 return index;
             }
 
-            return ProbabilisticIndexOfAny<TNegator>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
+            return IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref searchSpace, searchSpaceLength, valuesSpan);
         }
 
-        private static int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
-            where TNegator : struct, SpanHelpers.INegator<char>
+        public static int LastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
         {
             var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
 
             // If the search space is relatively short compared to the needle, do a simple O(n * m) search.
             if (ShouldUseSimpleLoop(searchSpaceLength, valuesLength))
             {
-                for (int i = searchSpaceLength - 1; i >= 0; i--)
-                {
-                    char c = Unsafe.Add(ref searchSpace, i);
-                    if (TNegator.NegateIfNeeded(Contains(valuesSpan, c)))
-                    {
-                        return i;
-                    }
-                }
+                return LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.DontNegate>(ref searchSpace, searchSpaceLength, valuesSpan);
+            }
 
-                return -1;
+            if (IndexOfAnyAsciiSearcher.TryLastIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
+            {
+                return index;
             }
 
-            if (typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
-                ? IndexOfAnyAsciiSearcher.TryLastIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index)
-                : IndexOfAnyAsciiSearcher.TryLastIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out index))
+            return ProbabilisticLastIndexOfAny(ref searchSpace, searchSpaceLength, ref values, valuesLength);
+        }
+
+        public static int LastIndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
+        {
+            var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
+
+            if (IndexOfAnyAsciiSearcher.IsVectorizationSupported &&
+                !ShouldUseSimpleLoop(searchSpaceLength, valuesLength) &&
+                IndexOfAnyAsciiSearcher.TryLastIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
             {
                 return index;
             }
 
-            return ProbabilisticLastIndexOfAny<TNegator>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
+            return LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref searchSpace, searchSpaceLength, valuesSpan);
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static int ProbabilisticIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
-            where TNegator : struct, SpanHelpers.INegator<char>
+        private static int ProbabilisticIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
         {
             var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
 
             var map = new ProbabilisticMap(valuesSpan);
             ref uint charMap = ref Unsafe.As<ProbabilisticMap, uint>(ref map);
 
-            return typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
-                ? IndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan)
-                : IndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
+            return IndexOfAny(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static int ProbabilisticLastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
-            where TNegator : struct, SpanHelpers.INegator<char>
+        private static int ProbabilisticLastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
         {
             var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);
 
             var map = new ProbabilisticMap(valuesSpan);
             ref uint charMap = ref Unsafe.As<ProbabilisticMap, uint>(ref map);
 
-            return typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
-                ? LastIndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan)
-                : LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
+            return LastIndexOfAny(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static int IndexOfAny<TNegator>(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
-            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
+        internal static int IndexOfAny(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
         {
-            if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && typeof(TNegator) == typeof(IndexOfAnyAsciiSearcher.DontNegate) && searchSpaceLength >= 16)
+            if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16)
             {
                 return IndexOfAnyVectorized(ref charMap, ref searchSpace, searchSpaceLength, values);
             }
@@ -335,7 +315,7 @@ namespace System.Buffers
             while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
             {
                 int ch = cur;
-                if (TNegator.NegateIfNeeded(Contains(ref charMap, values, ch)))
+                if (Contains(ref charMap, values, ch))
                 {
                     return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
                 }
@@ -347,13 +327,12 @@ namespace System.Buffers
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static int LastIndexOfAny<TNegator>(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
-            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
+        internal static int LastIndexOfAny(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
         {
             for (int i = searchSpaceLength - 1; i >= 0; i--)
             {
                 int ch = Unsafe.Add(ref searchSpace, i);
-                if (TNegator.NegateIfNeeded(Contains(ref charMap, values, ch)))
+                if (Contains(ref charMap, values, ch))
                 {
                     return i;
                 }
@@ -473,5 +452,42 @@ namespace System.Buffers
 
             return -1;
         }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static int IndexOfAnySimpleLoop<TNegator>(ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
+            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
+        {
+            ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
+            ref char cur = ref searchSpace;
+
+            while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
+            {
+                char c = cur;
+                if (TNegator.NegateIfNeeded(Contains(values, c)))
+                {
+                    return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
+                }
+
+                cur = ref Unsafe.Add(ref cur, 1);
+            }
+
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static int LastIndexOfAnySimpleLoop<TNegator>(ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
+            where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
+        {
+            for (int i = searchSpaceLength - 1; i >= 0; i--)
+            {
+                char c = Unsafe.Add(ref searchSpace, i);
+                if (TNegator.NegateIfNeeded(Contains(values, c)))
+                {
+                    return i;
+                }
+            }
+
+            return -1;
+        }
     }
 }