From 68c87d6694983ac47168007156e389f9a9f6c3fb Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Mon, 22 Oct 2018 21:32:37 +0100 Subject: [PATCH] Additionally Vectorize string.IndexOfAny for value lengths 2,3,4,5 (dotnet/coreclr#19790) * Vectorize string.IndexOfAny * Vectorize string.IndexOfAny [4,5] * Feedback * Call order preference Commit migrated from https://github.com/dotnet/coreclr/commit/018df68ca8c15356a345adc81ef74d1f246df04c --- .../src/System/MemoryExtensions.cs | 137 +++++++ .../src/System/SpanHelpers.Char.cs | 418 ++++++++++++++++++++- .../src/System/String.Searching.cs | 67 +--- 3 files changed, 554 insertions(+), 68 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index 1f5206b..abd4dbc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -486,6 +486,12 @@ namespace System Unsafe.As(ref value0), Unsafe.As(ref value1), span.Length); + if (typeof(T) == typeof(char)) + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + Unsafe.As(ref value0), + Unsafe.As(ref value1), + span.Length); return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length); } @@ -508,6 +514,13 @@ namespace System Unsafe.As(ref value1), Unsafe.As(ref value2), span.Length); + if (typeof(T) == typeof(char)) + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + Unsafe.As(ref value0), + Unsafe.As(ref value1), + Unsafe.As(ref value2), + span.Length); return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length); } @@ -522,11 +535,67 @@ namespace System where T : IEquatable { if (typeof(T) == typeof(byte)) + { return SpanHelpers.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref Unsafe.As(ref MemoryMarshal.GetReference(values)), values.Length); + } + if (typeof(T) == typeof(char)) + { + ref var valueRef = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); + if (values.Length == 5) + { + // Length 5 is a common length for FileSystemName expression (", <, >, *, ?) and in preference to 2 as it has an explicit overload + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + Unsafe.Add(ref valueRef, 3), + Unsafe.Add(ref valueRef, 4), + span.Length); + } + else if (values.Length == 2) + { + // Length 2 is a common length for simple wildcards (*, ?), directory separators (/, \), quotes (", '), brackets, etc + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + span.Length); + } + else if (values.Length == 4) + { + // Length 4 before 3 as 3 has an explicit overload + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + Unsafe.Add(ref valueRef, 3), + span.Length); + } + else if (values.Length == 3) + { + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + span.Length); + } + else if (values.Length == 1) + { + // Length 1 last, as ctoring a ReadOnlySpan to call this overload for a single value + // is already throwing away a bunch of performance vs just calling IndexOf + return SpanHelpers.IndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + span.Length); + } + } return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); } @@ -547,6 +616,12 @@ namespace System Unsafe.As(ref value0), Unsafe.As(ref value1), span.Length); + if (typeof(T) == typeof(char)) + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + Unsafe.As(ref value0), + Unsafe.As(ref value1), + span.Length); return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length); } @@ -569,6 +644,13 @@ namespace System Unsafe.As(ref value1), Unsafe.As(ref value2), span.Length); + if (typeof(T) == typeof(char)) + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + Unsafe.As(ref value0), + Unsafe.As(ref value1), + Unsafe.As(ref value2), + span.Length); return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length); } @@ -589,6 +671,61 @@ namespace System ref Unsafe.As(ref MemoryMarshal.GetReference(values)), values.Length); + if (typeof(T) == typeof(char)) + { + ref var valueRef = ref Unsafe.As(ref MemoryMarshal.GetReference(values)); + if (values.Length == 5) + { + // Length 5 is a common length for FileSystemName expression (", <, >, *, ?) and in preference to 2 as it has an explicit overload + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + Unsafe.Add(ref valueRef, 3), + Unsafe.Add(ref valueRef, 4), + span.Length); + } + else if (values.Length == 2) + { + // Length 2 is a common length for simple wildcards (*, ?), directory separators (/, \), quotes (", '), brackets, etc + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + span.Length); + } + else if (values.Length == 4) + { + // Length 4 before 3 as 3 has an explicit overload + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + Unsafe.Add(ref valueRef, 3), + span.Length); + } + else if (values.Length == 3) + { + return SpanHelpers.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + Unsafe.Add(ref valueRef, 1), + Unsafe.Add(ref valueRef, 2), + span.Length); + } + else if (values.Length == 1) + { + // Length 1 last, as ctoring a ReadOnlySpan to call this overload for a single value + // is already throwing away a bunch of performance vs just calling IndexOf + return SpanHelpers.IndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + valueRef, + span.Length); + } + } + return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 02df69f..40d0575 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -189,13 +189,13 @@ namespace System { length -= 4; - if (*pCh == value) + if (pCh[0] == value) goto Found; - if (*(pCh + 1) == value) + if (pCh[1] == value) goto Found1; - if (*(pCh + 2) == value) + if (pCh[2] == value) goto Found2; - if (*(pCh + 3) == value) + if (pCh[3] == value) goto Found3; pCh += 4; @@ -203,12 +203,12 @@ namespace System while (length > 0) { - length -= 1; + length--; - if (*pCh == value) + if (pCh[0] == value) goto Found; - pCh += 1; + pCh++; } // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow @@ -257,6 +257,410 @@ namespace System } } + public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, int length) + { + Debug.Assert(length >= 0); + + fixed (char* pChars = &searchSpace) + { + char* pCh = pChars; + char* pEndCh = pCh + length; + + if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + { + // Figure out how many characters to read sequentially until we are vector aligned + // This is equivalent to: + // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte + // length = (Vector.Count - unaligned) % Vector.Count + const int elementsPerByte = sizeof(ushort) / sizeof(byte); + int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; + length = (Vector.Count - unaligned) & (Vector.Count - 1); + } + + SequentialScan: + while (length >= 4) + { + length -= 4; + + if (pCh[0] == value0 || pCh[0] == value1) + goto Found; + if (pCh[1] == value0 || pCh[1] == value1) + goto Found1; + if (pCh[2] == value0 || pCh[2] == value1) + goto Found2; + if (pCh[3] == value0 || pCh[3] == value1) + goto Found3; + + pCh += 4; + } + + while (length > 0) + { + length--; + + if (pCh[0] == value0 || pCh[0] == value1) + goto Found; + + pCh++; + } + + // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. + if (Vector.IsHardwareAccelerated && pCh < pEndCh) + { + // Get the highest multiple of Vector.Count that is within the search space. + // That will be how many times we iterate in the loop below. + // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) + length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); + + // Get comparison Vector + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + + while (length > 0) + { + // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned + Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); + Vector vData = Unsafe.Read>(pCh); + var vMatches = Vector.BitwiseOr( + Vector.Equals(vData, values0), + Vector.Equals(vData, values1)); + if (Vector.Zero.Equals(vMatches)) + { + pCh += Vector.Count; + length -= Vector.Count; + continue; + } + // Find offset of first match + return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); + } + + if (pCh < pEndCh) + { + length = (int)(pEndCh - pCh); + goto SequentialScan; + } + } + + return -1; + Found3: + pCh++; + Found2: + pCh++; + Found1: + pCh++; + Found: + return (int)(pCh - pChars); + } + } + + public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, int length) + { + Debug.Assert(length >= 0); + + fixed (char* pChars = &searchSpace) + { + char* pCh = pChars; + char* pEndCh = pCh + length; + + if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + { + // Figure out how many characters to read sequentially until we are vector aligned + // This is equivalent to: + // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte + // length = (Vector.Count - unaligned) % Vector.Count + const int elementsPerByte = sizeof(ushort) / sizeof(byte); + int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; + length = (Vector.Count - unaligned) & (Vector.Count - 1); + } + + SequentialScan: + while (length >= 4) + { + length -= 4; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2) + goto Found; + if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2) + goto Found1; + if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2) + goto Found2; + if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2) + goto Found3; + + pCh += 4; + } + + while (length > 0) + { + length--; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2) + goto Found; + + pCh++; + } + + // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. + if (Vector.IsHardwareAccelerated && pCh < pEndCh) + { + // Get the highest multiple of Vector.Count that is within the search space. + // That will be how many times we iterate in the loop below. + // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) + length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); + + // Get comparison Vector + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + Vector values2 = new Vector(value2); + + while (length > 0) + { + // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned + Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); + Vector vData = Unsafe.Read>(pCh); + var vMatches = Vector.BitwiseOr( + Vector.BitwiseOr( + Vector.Equals(vData, values0), + Vector.Equals(vData, values1)), + Vector.Equals(vData, values2)); + + if (Vector.Zero.Equals(vMatches)) + { + pCh += Vector.Count; + length -= Vector.Count; + continue; + } + // Find offset of first match + return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); + } + + if (pCh < pEndCh) + { + length = (int)(pEndCh - pCh); + goto SequentialScan; + } + } + return -1; + Found3: + pCh++; + Found2: + pCh++; + Found1: + pCh++; + Found: + return (int)(pCh - pChars); + } + } + + public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, int length) + { + Debug.Assert(length >= 0); + + fixed (char* pChars = &searchSpace) + { + char* pCh = pChars; + char* pEndCh = pCh + length; + + if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + { + // Figure out how many characters to read sequentially until we are vector aligned + // This is equivalent to: + // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte + // length = (Vector.Count - unaligned) % Vector.Count + const int elementsPerByte = sizeof(ushort) / sizeof(byte); + int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; + length = (Vector.Count - unaligned) & (Vector.Count - 1); + } + + SequentialScan: + while (length >= 4) + { + length -= 4; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3) + goto Found; + if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3) + goto Found1; + if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3) + goto Found2; + if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3) + goto Found3; + + pCh += 4; + } + + while (length > 0) + { + length--; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3) + goto Found; + + pCh++; + } + + // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. + if (Vector.IsHardwareAccelerated && pCh < pEndCh) + { + // Get the highest multiple of Vector.Count that is within the search space. + // That will be how many times we iterate in the loop below. + // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) + length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); + + // Get comparison Vector + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + Vector values2 = new Vector(value2); + Vector values3 = new Vector(value3); + + while (length > 0) + { + // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned + Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); + Vector vData = Unsafe.Read>(pCh); + var vMatches = Vector.BitwiseOr( + Vector.BitwiseOr( + Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)), + Vector.Equals(vData, values2)), + Vector.Equals(vData, values3)); + + if (Vector.Zero.Equals(vMatches)) + { + pCh += Vector.Count; + length -= Vector.Count; + continue; + } + // Find offset of first match + return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); + } + + if (pCh < pEndCh) + { + length = (int)(pEndCh - pCh); + goto SequentialScan; + } + } + + return -1; + Found3: + pCh++; + Found2: + pCh++; + Found1: + pCh++; + Found: + return (int)(pCh - pChars); + } + } + + public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, char value4, int length) + { + Debug.Assert(length >= 0); + + fixed (char* pChars = &searchSpace) + { + char* pCh = pChars; + char* pEndCh = pCh + length; + + if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + { + // Figure out how many characters to read sequentially until we are vector aligned + // This is equivalent to: + // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte + // length = (Vector.Count - unaligned) % Vector.Count + const int elementsPerByte = sizeof(ushort) / sizeof(byte); + int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; + length = (Vector.Count - unaligned) & (Vector.Count - 1); + } + + SequentialScan: + while (length >= 4) + { + length -= 4; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4) + goto Found; + if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3 || pCh[1] == value4) + goto Found1; + if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3 || pCh[2] == value4) + goto Found2; + if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3 || pCh[3] == value4) + goto Found3; + + pCh += 4; + } + + while (length > 0) + { + length--; + + if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4) + goto Found; + + pCh++; + } + + // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. + if (Vector.IsHardwareAccelerated && pCh < pEndCh) + { + // Get the highest multiple of Vector.Count that is within the search space. + // That will be how many times we iterate in the loop below. + // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) + length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); + + // Get comparison Vector + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + Vector values2 = new Vector(value2); + Vector values3 = new Vector(value3); + Vector values4 = new Vector(value4); + + while (length > 0) + { + // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned + Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); + Vector vData = Unsafe.Read>(pCh); + var vMatches = Vector.BitwiseOr( + Vector.BitwiseOr( + Vector.BitwiseOr( + Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)), + Vector.Equals(vData, values2)), + Vector.Equals(vData, values3)), + Vector.Equals(vData, values4)); + + if (Vector.Zero.Equals(vMatches)) + { + pCh += Vector.Count; + length -= Vector.Count; + continue; + } + // Find offset of first match + return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); + } + + if (pCh < pEndCh) + { + length = (int)(pEndCh - pCh); + goto SequentialScan; + } + } + + return -1; + Found3: + pCh++; + Found2: + pCh++; + Found1: + pCh++; + Found: + return (int)(pCh - pChars); + } + } + public static unsafe int LastIndexOf(ref char searchSpace, char value, int length) { Debug.Assert(length >= 0); diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs index 7660c52..e288b2c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs @@ -99,78 +99,23 @@ namespace System if ((uint)count > (uint)(Length - startIndex)) throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - if (anyOf.Length == 2) + if (anyOf.Length > 0 && anyOf.Length <= 5) { - // Very common optimization for directory separators (/, \), quotes (", '), brackets, etc - return IndexOfAny(anyOf[0], anyOf[1], startIndex, count); + // The ReadOnlySpan.IndexOfAny extension is vectorized for values of 1 - 5 in length + var result = new ReadOnlySpan(ref Unsafe.Add(ref _firstChar, startIndex), count).IndexOfAny(anyOf); + return result == -1 ? result : result + startIndex; } - else if (anyOf.Length == 3) - { - return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count); - } - else if (anyOf.Length > 3) + else if (anyOf.Length > 5) { + // Use Probabilistic Map return IndexOfCharArray(anyOf, startIndex, count); } - else if (anyOf.Length == 1) - { - return IndexOf(anyOf[0], startIndex, count); - } else // anyOf.Length == 0 { return -1; } } - private unsafe int IndexOfAny(char value1, char value2, int startIndex, int count) - { - fixed (char* pChars = &_firstChar) - { - char* pCh = pChars + startIndex; - - while (count > 0) - { - char c = *pCh; - - if (c == value1 || c == value2) - return (int)(pCh - pChars); - - // Possibly reads outside of count and can include null terminator - // Handled in the return logic - c = *(pCh + 1); - - if (c == value1 || c == value2) - return (count == 1 ? -1 : (int)(pCh - pChars) + 1); - - pCh += 2; - count -= 2; - } - - return -1; - } - } - - private unsafe int IndexOfAny(char value1, char value2, char value3, int startIndex, int count) - { - fixed (char* pChars = &_firstChar) - { - char* pCh = pChars + startIndex; - - while (count > 0) - { - char c = *pCh; - - if (c == value1 || c == value2 || c == value3) - return (int)(pCh - pChars); - - pCh++; - count--; - } - - return -1; - } - } - private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count) { // use probabilistic map, see InitializeProbabilisticMap -- 2.7.4