Additionally Vectorize string.IndexOfAny for value lengths 2,3,4,5 (#19790)
authorBen Adams <thundercat@illyriad.co.uk>
Mon, 22 Oct 2018 20:32:37 +0000 (21:32 +0100)
committerTanner Gooding <tagoo@outlook.com>
Mon, 22 Oct 2018 20:32:37 +0000 (13:32 -0700)
* Vectorize string.IndexOfAny

* Vectorize string.IndexOfAny [4,5]

* Feedback

* Call order preference

src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs
src/System.Private.CoreLib/shared/System/String.Searching.cs

index 1f5206b..abd4dbc 100644 (file)
@@ -486,6 +486,12 @@ namespace System
                     Unsafe.As<T, byte>(ref value0),
                     Unsafe.As<T, byte>(ref value1),
                     span.Length);
+            if (typeof(T) == typeof(char))
+                return SpanHelpers.IndexOfAny(
+                    ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                    Unsafe.As<T, char>(ref value0),
+                    Unsafe.As<T, char>(ref value1),
+                    span.Length);
 
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length);
         }
@@ -508,6 +514,13 @@ namespace System
                     Unsafe.As<T, byte>(ref value1),
                     Unsafe.As<T, byte>(ref value2),
                     span.Length);
+            if (typeof(T) == typeof(char))
+                return SpanHelpers.IndexOfAny(
+                    ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                    Unsafe.As<T, char>(ref value0),
+                    Unsafe.As<T, char>(ref value1),
+                    Unsafe.As<T, char>(ref value2),
+                    span.Length);
 
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length);
         }
@@ -522,11 +535,67 @@ namespace System
             where T : IEquatable<T>
         {
             if (typeof(T) == typeof(byte))
+            {
                 return SpanHelpers.IndexOfAny(
                     ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)),
                     span.Length,
                     ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(values)),
                     values.Length);
+            }
+            if (typeof(T) == typeof(char))
+            {
+                ref var valueRef = ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(values));
+                if (values.Length == 5)
+                {
+                    // Length 5 is a common length for FileSystemName expression (", <, >, *, ?) and in preference to 2 as it has an explicit overload
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        Unsafe.Add(ref valueRef, 3),
+                        Unsafe.Add(ref valueRef, 4),
+                        span.Length);
+                }
+                else if (values.Length == 2)
+                {
+                    // Length 2 is a common length for simple wildcards (*, ?),  directory separators (/, \), quotes (", '), brackets, etc
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        span.Length);
+                }
+                else if (values.Length == 4)
+                {
+                    // Length 4 before 3 as 3 has an explicit overload
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        Unsafe.Add(ref valueRef, 3),
+                        span.Length);
+                }
+                else if (values.Length == 3)
+                {
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        span.Length);
+                }
+                else if (values.Length == 1)
+                {
+                    // Length 1 last, as ctoring a ReadOnlySpan to call this overload for a single value
+                    // is already throwing away a bunch of performance vs just calling IndexOf
+                    return SpanHelpers.IndexOf(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        span.Length);
+                }
+            }
 
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length);
         }
@@ -547,6 +616,12 @@ namespace System
                     Unsafe.As<T, byte>(ref value0),
                     Unsafe.As<T, byte>(ref value1),
                     span.Length);
+            if (typeof(T) == typeof(char))
+                return SpanHelpers.IndexOfAny(
+                    ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                    Unsafe.As<T, char>(ref value0),
+                    Unsafe.As<T, char>(ref value1),
+                    span.Length);
 
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length);
         }
@@ -569,6 +644,13 @@ namespace System
                     Unsafe.As<T, byte>(ref value1),
                     Unsafe.As<T, byte>(ref value2),
                     span.Length);
+            if (typeof(T) == typeof(char))
+                return SpanHelpers.IndexOfAny(
+                    ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                    Unsafe.As<T, char>(ref value0),
+                    Unsafe.As<T, char>(ref value1),
+                    Unsafe.As<T, char>(ref value2),
+                    span.Length);
 
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length);
         }
@@ -589,6 +671,61 @@ namespace System
                     ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(values)),
                     values.Length);
 
+            if (typeof(T) == typeof(char))
+            {
+                ref var valueRef = ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(values));
+                if (values.Length == 5)
+                {
+                    // Length 5 is a common length for FileSystemName expression (", <, >, *, ?) and in preference to 2 as it has an explicit overload
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        Unsafe.Add(ref valueRef, 3),
+                        Unsafe.Add(ref valueRef, 4),
+                        span.Length);
+                }
+                else if (values.Length == 2)
+                {
+                    // Length 2 is a common length for simple wildcards (*, ?),  directory separators (/, \), quotes (", '), brackets, etc
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        span.Length);
+                }
+                else if (values.Length == 4)
+                {
+                    // Length 4 before 3 as 3 has an explicit overload
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        Unsafe.Add(ref valueRef, 3),
+                        span.Length);
+                }
+                else if (values.Length == 3)
+                {
+                    return SpanHelpers.IndexOfAny(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        Unsafe.Add(ref valueRef, 1),
+                        Unsafe.Add(ref valueRef, 2),
+                        span.Length);
+                }
+                else if (values.Length == 1)
+                {
+                    // Length 1 last, as ctoring a ReadOnlySpan to call this overload for a single value
+                    // is already throwing away a bunch of performance vs just calling IndexOf
+                    return SpanHelpers.IndexOf(
+                        ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
+                        valueRef,
+                        span.Length);
+                }
+            }
+
             return SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length);
         }
 
index 02df69f..40d0575 100644 (file)
@@ -189,13 +189,13 @@ namespace System
                 {
                     length -= 4;
 
-                    if (*pCh == value)
+                    if (pCh[0] == value)
                         goto Found;
-                    if (*(pCh + 1) == value)
+                    if (pCh[1] == value)
                         goto Found1;
-                    if (*(pCh + 2) == value)
+                    if (pCh[2] == value)
                         goto Found2;
-                    if (*(pCh + 3) == value)
+                    if (pCh[3] == value)
                         goto Found3;
 
                     pCh += 4;
@@ -203,12 +203,12 @@ namespace System
 
                 while (length > 0)
                 {
-                    length -= 1;
+                    length--;
 
-                    if (*pCh == value)
+                    if (pCh[0] == value)
                         goto Found;
 
-                    pCh += 1;
+                    pCh++;
                 }
 
                 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
@@ -257,6 +257,410 @@ namespace System
             }
         }
 
+        public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, int length)
+        {
+            Debug.Assert(length >= 0);
+
+            fixed (char* pChars = &searchSpace)
+            {
+                char* pCh = pChars;
+                char* pEndCh = pCh + length;
+
+                if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
+                {
+                    // Figure out how many characters to read sequentially until we are vector aligned
+                    // This is equivalent to:
+                    //         unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
+                    //         length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
+                    const int elementsPerByte = sizeof(ushort) / sizeof(byte);
+                    int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
+                    length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
+                }
+
+            SequentialScan:
+                while (length >= 4)
+                {
+                    length -= 4;
+
+                    if (pCh[0] == value0 || pCh[0] == value1)
+                        goto Found;
+                    if (pCh[1] == value0 || pCh[1] == value1)
+                        goto Found1;
+                    if (pCh[2] == value0 || pCh[2] == value1)
+                        goto Found2;
+                    if (pCh[3] == value0 || pCh[3] == value1)
+                        goto Found3;
+
+                    pCh += 4;
+                }
+
+                while (length > 0)
+                {
+                    length--;
+
+                    if (pCh[0] == value0 || pCh[0] == value1)
+                        goto Found;
+
+                    pCh++;
+                }
+
+                // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
+                // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
+                if (Vector.IsHardwareAccelerated && pCh < pEndCh)
+                {
+                    // Get the highest multiple of Vector<ushort>.Count that is within the search space.
+                    // That will be how many times we iterate in the loop below.
+                    // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
+                    length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
+
+                    // Get comparison Vector
+                    Vector<ushort> values0 = new Vector<ushort>(value0);
+                    Vector<ushort> values1 = new Vector<ushort>(value1);
+
+                    while (length > 0)
+                    {
+                        // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
+                        Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
+                        Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
+                        var vMatches = Vector.BitwiseOr(
+                                        Vector.Equals(vData, values0),
+                                        Vector.Equals(vData, values1));
+                        if (Vector<ushort>.Zero.Equals(vMatches))
+                        {
+                            pCh += Vector<ushort>.Count;
+                            length -= Vector<ushort>.Count;
+                            continue;
+                        }
+                        // Find offset of first match
+                        return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
+                    }
+
+                    if (pCh < pEndCh)
+                    {
+                        length = (int)(pEndCh - pCh);
+                        goto SequentialScan;
+                    }
+                }
+
+                return -1;
+            Found3:
+                pCh++;
+            Found2:
+                pCh++;
+            Found1:
+                pCh++;
+            Found:
+                return (int)(pCh - pChars);
+            }
+        }
+
+        public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, int length)
+        {
+            Debug.Assert(length >= 0);
+
+            fixed (char* pChars = &searchSpace)
+            {
+                char* pCh = pChars;
+                char* pEndCh = pCh + length;
+
+                if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
+                {
+                    // Figure out how many characters to read sequentially until we are vector aligned
+                    // This is equivalent to:
+                    //         unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
+                    //         length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
+                    const int elementsPerByte = sizeof(ushort) / sizeof(byte);
+                    int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
+                    length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
+                }
+
+            SequentialScan:
+                while (length >= 4)
+                {
+                    length -= 4;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
+                        goto Found;
+                    if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2)
+                        goto Found1;
+                    if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2)
+                        goto Found2;
+                    if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2)
+                        goto Found3;
+
+                    pCh += 4;
+                }
+
+                while (length > 0)
+                {
+                    length--;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
+                        goto Found;
+
+                    pCh++;
+                }
+
+                // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
+                // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
+                if (Vector.IsHardwareAccelerated && pCh < pEndCh)
+                {
+                    // Get the highest multiple of Vector<ushort>.Count that is within the search space.
+                    // That will be how many times we iterate in the loop below.
+                    // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
+                    length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
+
+                    // Get comparison Vector
+                    Vector<ushort> values0 = new Vector<ushort>(value0);
+                    Vector<ushort> values1 = new Vector<ushort>(value1);
+                    Vector<ushort> values2 = new Vector<ushort>(value2);
+
+                    while (length > 0)
+                    {
+                        // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
+                        Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
+                        Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
+                        var vMatches = Vector.BitwiseOr(
+                                        Vector.BitwiseOr(
+                                            Vector.Equals(vData, values0),
+                                            Vector.Equals(vData, values1)),
+                                        Vector.Equals(vData, values2));
+
+                        if (Vector<ushort>.Zero.Equals(vMatches))
+                        {
+                            pCh += Vector<ushort>.Count;
+                            length -= Vector<ushort>.Count;
+                            continue;
+                        }
+                        // Find offset of first match
+                        return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
+                    }
+
+                    if (pCh < pEndCh)
+                    {
+                        length = (int)(pEndCh - pCh);
+                        goto SequentialScan;
+                    }
+                }
+                return -1;
+            Found3:
+                pCh++;
+            Found2:
+                pCh++;
+            Found1:
+                pCh++;
+            Found:
+                return (int)(pCh - pChars);
+            }
+        }
+
+        public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, int length)
+        {
+            Debug.Assert(length >= 0);
+
+            fixed (char* pChars = &searchSpace)
+            {
+                char* pCh = pChars;
+                char* pEndCh = pCh + length;
+
+                if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
+                {
+                    // Figure out how many characters to read sequentially until we are vector aligned
+                    // This is equivalent to:
+                    //         unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
+                    //         length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
+                    const int elementsPerByte = sizeof(ushort) / sizeof(byte);
+                    int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
+                    length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
+                }
+
+            SequentialScan:
+                while (length >= 4)
+                {
+                    length -= 4;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3)
+                        goto Found;
+                    if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3)
+                        goto Found1;
+                    if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3)
+                        goto Found2;
+                    if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3)
+                        goto Found3;
+
+                    pCh += 4;
+                }
+
+                while (length > 0)
+                {
+                    length--;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3)
+                        goto Found;
+
+                    pCh++;
+                }
+
+                // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
+                // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
+                if (Vector.IsHardwareAccelerated && pCh < pEndCh)
+                {
+                    // Get the highest multiple of Vector<ushort>.Count that is within the search space.
+                    // That will be how many times we iterate in the loop below.
+                    // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
+                    length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
+
+                    // Get comparison Vector
+                    Vector<ushort> values0 = new Vector<ushort>(value0);
+                    Vector<ushort> values1 = new Vector<ushort>(value1);
+                    Vector<ushort> values2 = new Vector<ushort>(value2);
+                    Vector<ushort> values3 = new Vector<ushort>(value3);
+
+                    while (length > 0)
+                    {
+                        // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
+                        Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
+                        Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
+                        var vMatches = Vector.BitwiseOr(
+                                            Vector.BitwiseOr(
+                                                Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)),
+                                                Vector.Equals(vData, values2)),
+                                            Vector.Equals(vData, values3));
+
+                        if (Vector<ushort>.Zero.Equals(vMatches))
+                        {
+                            pCh += Vector<ushort>.Count;
+                            length -= Vector<ushort>.Count;
+                            continue;
+                        }
+                        // Find offset of first match
+                        return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
+                    }
+
+                    if (pCh < pEndCh)
+                    {
+                        length = (int)(pEndCh - pCh);
+                        goto SequentialScan;
+                    }
+                }
+
+                return -1;
+            Found3:
+                pCh++;
+            Found2:
+                pCh++;
+            Found1:
+                pCh++;
+            Found:
+                return (int)(pCh - pChars);
+            }
+        }
+
+        public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, char value4, int length)
+        {
+            Debug.Assert(length >= 0);
+
+            fixed (char* pChars = &searchSpace)
+            {
+                char* pCh = pChars;
+                char* pEndCh = pCh + length;
+
+                if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
+                {
+                    // Figure out how many characters to read sequentially until we are vector aligned
+                    // This is equivalent to:
+                    //         unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
+                    //         length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
+                    const int elementsPerByte = sizeof(ushort) / sizeof(byte);
+                    int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
+                    length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
+                }
+
+            SequentialScan:
+                while (length >= 4)
+                {
+                    length -= 4;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4)
+                        goto Found;
+                    if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3 || pCh[1] == value4)
+                        goto Found1;
+                    if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3 || pCh[2] == value4)
+                        goto Found2;
+                    if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3 || pCh[3] == value4)
+                        goto Found3;
+
+                    pCh += 4;
+                }
+
+                while (length > 0)
+                {
+                    length--;
+
+                    if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4)
+                        goto Found;
+
+                    pCh++;
+                }
+
+                // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
+                // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
+                if (Vector.IsHardwareAccelerated && pCh < pEndCh)
+                {
+                    // Get the highest multiple of Vector<ushort>.Count that is within the search space.
+                    // That will be how many times we iterate in the loop below.
+                    // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
+                    length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
+
+                    // Get comparison Vector
+                    Vector<ushort> values0 = new Vector<ushort>(value0);
+                    Vector<ushort> values1 = new Vector<ushort>(value1);
+                    Vector<ushort> values2 = new Vector<ushort>(value2);
+                    Vector<ushort> values3 = new Vector<ushort>(value3);
+                    Vector<ushort> values4 = new Vector<ushort>(value4);
+
+                    while (length > 0)
+                    {
+                        // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
+                        Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
+                        Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
+                        var vMatches = Vector.BitwiseOr(
+                                            Vector.BitwiseOr(
+                                                Vector.BitwiseOr(
+                                                    Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)),
+                                                    Vector.Equals(vData, values2)),
+                                                Vector.Equals(vData, values3)),
+                                            Vector.Equals(vData, values4));
+
+                        if (Vector<ushort>.Zero.Equals(vMatches))
+                        {
+                            pCh += Vector<ushort>.Count;
+                            length -= Vector<ushort>.Count;
+                            continue;
+                        }
+                        // Find offset of first match
+                        return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
+                    }
+
+                    if (pCh < pEndCh)
+                    {
+                        length = (int)(pEndCh - pCh);
+                        goto SequentialScan;
+                    }
+                }
+
+                return -1;
+            Found3:
+                pCh++;
+            Found2:
+                pCh++;
+            Found1:
+                pCh++;
+            Found:
+                return (int)(pCh - pChars);
+            }
+        }
+
         public static unsafe int LastIndexOf(ref char searchSpace, char value, int length)
         {
             Debug.Assert(length >= 0);
index 7660c52..e288b2c 100644 (file)
@@ -99,78 +99,23 @@ namespace System
             if ((uint)count > (uint)(Length - startIndex))
                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
 
-            if (anyOf.Length == 2)
+            if (anyOf.Length > 0 && anyOf.Length <= 5)
             {
-                // Very common optimization for directory separators (/, \), quotes (", '), brackets, etc
-                return IndexOfAny(anyOf[0], anyOf[1], startIndex, count);
+                // The ReadOnlySpan.IndexOfAny extension is vectorized for values of 1 - 5 in length
+                var result = new ReadOnlySpan<char>(ref Unsafe.Add(ref _firstChar, startIndex), count).IndexOfAny(anyOf);
+                return result == -1 ? result : result + startIndex;
             }
-            else if (anyOf.Length == 3)
-            {
-                return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count);
-            }
-            else if (anyOf.Length > 3)
+            else if (anyOf.Length > 5)
             {
+                // Use Probabilistic Map
                 return IndexOfCharArray(anyOf, startIndex, count);
             }
-            else if (anyOf.Length == 1)
-            {
-                return IndexOf(anyOf[0], startIndex, count);
-            }
             else // anyOf.Length == 0
             {
                 return -1;
             }
         }
 
-        private unsafe int IndexOfAny(char value1, char value2, int startIndex, int count)
-        {
-            fixed (char* pChars = &_firstChar)
-            {
-                char* pCh = pChars + startIndex;
-
-                while (count > 0)
-                {
-                    char c = *pCh;
-
-                    if (c == value1 || c == value2)
-                        return (int)(pCh - pChars);
-
-                    // Possibly reads outside of count and can include null terminator
-                    // Handled in the return logic
-                    c = *(pCh + 1);
-
-                    if (c == value1 || c == value2)
-                        return (count == 1 ? -1 : (int)(pCh - pChars) + 1);
-
-                    pCh += 2;
-                    count -= 2;
-                }
-
-                return -1;
-            }
-        }
-
-        private unsafe int IndexOfAny(char value1, char value2, char value3, int startIndex, int count)
-        {
-            fixed (char* pChars = &_firstChar)
-            {
-                char* pCh = pChars + startIndex;
-
-                while (count > 0)
-                {
-                    char c = *pCh;
-
-                    if (c == value1 || c == value2 || c == value3)
-                        return (int)(pCh - pChars);
-
-                    pCh++;
-                    count--;
-                }
-
-                return -1;
-            }
-        }
-
         private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count)
         {
             // use probabilistic map, see InitializeProbabilisticMap