Delete duplicative fallback paths from Ascii (#90390)
authorStephen Toub <stoub@microsoft.com>
Fri, 11 Aug 2023 17:21:20 +0000 (13:21 -0400)
committerGitHub <noreply@github.com>
Fri, 11 Aug 2023 17:21:20 +0000 (13:21 -0400)
* Delete duplicative fallback paths from Ascii

GetIndexOfFirstNonAsciiChar has four different paths it takes: one for Vector512/256, one for Ssse2/AdvSimd, one for Vector128, and one for everything else. But the Vector512/256 path is capable of both Vector128 and scalar, which obsoletes the default fallback case which uses `Vector<T>` and a scalar path.  We can entirely delete the "_Default" path, and consolidate the calls to the Vector512/256/128/scalar path.

GetIndexOfFirstNonAsciiByte is similar.  Its "_Default" doesn't use `Vector<T>`, but it's still duplicative and deletable.

* Address PR feedback

src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs

index 6b4eb2e3e13909ff78045685f6a010562e6be983..ba07b985b6705842e3d4fc9dcc1655b2abf296ec 100644 (file)
@@ -96,26 +96,21 @@ namespace System.Text
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
         {
-            // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
-            // code below. This has two benefits: (a) we can take advantage of specific instructions like
-            // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
+            // If 256/512-bit aren't supported but SSE2 is supported, use those specific intrinsics instead of
+            // the generic vectorized code. This has two benefits: (a) we can take advantage of specific instructions
+            // like pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
             // this method is running.
 
-            if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated)
-            {
-                return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
-            }
-            else if (Sse2.IsSupported || (AdvSimd.IsSupported && BitConverter.IsLittleEndian))
+            if (!Vector512.IsHardwareAccelerated &&
+                !Vector256.IsHardwareAccelerated &&
+                (Sse2.IsSupported || AdvSimd.IsSupported))
             {
                 return GetIndexOfFirstNonAsciiByte_Intrinsified(pBuffer, bufferLength);
             }
-            else if (Vector128.IsHardwareAccelerated)
-            {
-                return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
-            }
             else
             {
-                return GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength);
+                // Handles Vector512, Vector256, Vector128, and scalar.
+                return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
             }
         }
 
@@ -134,7 +129,6 @@ namespace System.Text
 
             if (Vector512.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector512<byte>.Count)
             {
-
                 if (Vector512.Load(pBuffer).ExtractMostSignificantBits() == 0)
                 {
                     // The first several elements of the input buffer were ASCII. Bump up the pointer to the
@@ -171,7 +165,6 @@ namespace System.Text
             }
             else if (Vector256.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector256<byte>.Count)
             {
-
                 if (Vector256.Load(pBuffer).ExtractMostSignificantBits() == 0)
                 {
                     // The first several elements of the input buffer were ASCII. Bump up the pointer to the
@@ -208,7 +201,6 @@ namespace System.Text
             }
             else if (Vector128.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector128<byte>.Count)
             {
-
                 if (!VectorContainsNonAsciiChar(Vector128.Load(pBuffer)))
                 {
                     // The first several elements of the input buffer were ASCII. Bump up the pointer to the
@@ -339,105 +331,6 @@ namespace System.Text
             goto Finish;
         }
 
-        private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, nuint bufferLength)
-        {
-            // Squirrel away the original buffer reference. This method works by determining the exact
-            // byte reference where non-ASCII data begins, so we need this base value to perform the
-            // final subtraction at the end of the method to get the index into the original buffer.
-
-            byte* pOriginalBuffer = pBuffer;
-
-            // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads.
-            // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII bytes.
-
-            uint currentUInt32;
-
-            // Try reading 64 bits at a time in a loop.
-
-            for (; bufferLength >= 8; bufferLength -= 8)
-            {
-                currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
-                uint nextUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer + 4);
-
-                if (!AllBytesInUInt32AreAscii(currentUInt32 | nextUInt32))
-                {
-                    // One of these two values contains non-ASCII bytes.
-                    // Figure out which one it is, then put it in 'current' so that we can drain the ASCII bytes.
-
-                    if (AllBytesInUInt32AreAscii(currentUInt32))
-                    {
-                        currentUInt32 = nextUInt32;
-                        pBuffer += 4;
-                    }
-
-                    goto FoundNonAsciiData;
-                }
-
-                pBuffer += 8; // consumed 8 ASCII bytes
-            }
-
-            // From this point forward we don't need to update bufferLength.
-            // Try reading 32 bits.
-
-            if ((bufferLength & 4) != 0)
-            {
-                currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
-                if (!AllBytesInUInt32AreAscii(currentUInt32))
-                {
-                    goto FoundNonAsciiData;
-                }
-
-                pBuffer += 4;
-            }
-
-            // Try reading 16 bits.
-
-            if ((bufferLength & 2) != 0)
-            {
-                currentUInt32 = Unsafe.ReadUnaligned<ushort>(pBuffer);
-                if (!AllBytesInUInt32AreAscii(currentUInt32))
-                {
-                    if (!BitConverter.IsLittleEndian)
-                    {
-                        currentUInt32 <<= 16;
-                    }
-                    goto FoundNonAsciiData;
-                }
-
-                pBuffer += 2;
-            }
-
-            // Try reading 8 bits
-
-            if ((bufferLength & 1) != 0)
-            {
-                // If the buffer contains non-ASCII data, the comparison below will fail, and
-                // we'll end up not incrementing the buffer reference.
-
-                if (*(sbyte*)pBuffer >= 0)
-                {
-                    pBuffer++;
-                }
-            }
-
-        Finish:
-
-            nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer;
-            return totalNumBytesRead;
-
-        FoundNonAsciiData:
-
-            Debug.Assert(!AllBytesInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input.");
-
-            // The method being called doesn't bother looking at whether the high byte is ASCII. There are only
-            // two scenarios: (a) either one of the earlier bytes is not ASCII and the search terminates before
-            // we get to the high byte; or (b) all of the earlier bytes are ASCII, so the high byte must be
-            // non-ASCII. In both cases we only care about the low 24 bits.
-
-            pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentUInt32);
-            goto Finish;
-        }
-
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static bool ContainsNonAsciiByte_Sse2(uint sseMask)
         {
@@ -817,22 +710,21 @@ namespace System.Text
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
         {
-            // If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized
-            // code below. This has two benefits: (a) we can take advantage of specific instructions like
-            // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
+            // If 256/512-bit aren't supported but SSE2/ASIMD is supported, use those specific intrinsics instead of
+            // the generic vectorized code. This has two benefits: (a) we can take advantage of specific instructions
+            // like pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
             // this method is running.
 
-            if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated)
-            {
-                return GetIndexOfFirstNonAsciiChar_Vector(pBuffer, bufferLength);
-            }
-            else if (Sse2.IsSupported || (AdvSimd.IsSupported && BitConverter.IsLittleEndian))
+            if (!Vector512.IsHardwareAccelerated &&
+                !Vector256.IsHardwareAccelerated &&
+                (Sse2.IsSupported || AdvSimd.IsSupported))
             {
                 return GetIndexOfFirstNonAsciiChar_Intrinsified(pBuffer, bufferLength);
             }
             else
             {
-                return GetIndexOfFirstNonAsciiChar_Default(pBuffer, bufferLength);
+                // Handles Vector512, Vector256, Vector128, and scalar.
+                return GetIndexOfFirstNonAsciiChar_Vector(pBuffer, bufferLength);
             }
         }
 
@@ -1039,142 +931,6 @@ namespace System.Text
             goto Finish;
         }
 
-        private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, nuint bufferLength /* in chars */)
-        {
-            // Squirrel away the original buffer reference.This method works by determining the exact
-            // char reference where non-ASCII data begins, so we need this base value to perform the
-            // final subtraction at the end of the method to get the index into the original buffer.
-
-            char* pOriginalBuffer = pBuffer;
-
-#if SYSTEM_PRIVATE_CORELIB
-            Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char));
-#endif
-
-            // Before we drain off char-by-char, try a generic vectorized loop.
-            // Only run the loop if we have at least two vectors we can pull out.
-
-            if (Vector.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector<ushort>.Count)
-            {
-                uint SizeOfVectorInChars = (uint)Vector<ushort>.Count; // JIT will make this a const
-                uint SizeOfVectorInBytes = (uint)Vector<byte>.Count; // JIT will make this a const
-
-                Vector<ushort> maxAscii = new Vector<ushort>(0x007F);
-
-                if (Vector.LessThanOrEqualAll(Unsafe.ReadUnaligned<Vector<ushort>>(pBuffer), maxAscii))
-                {
-                    // The first several elements of the input buffer were ASCII. Bump up the pointer to the
-                    // next aligned boundary, then perform aligned reads from here on out until we find non-ASCII
-                    // data or we approach the end of the buffer. It's possible we'll reread data; this is ok.
-
-                    char* pFinalVectorReadPos = pBuffer + bufferLength - SizeOfVectorInChars;
-                    pBuffer = (char*)(((nuint)pBuffer + SizeOfVectorInBytes) & ~(nuint)(SizeOfVectorInBytes - 1));
-
-#if DEBUG
-                    long numCharsRead = pBuffer - pOriginalBuffer;
-                    Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVectorInChars, "We should've made forward progress of at least one char.");
-                    Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer.");
-#endif
-
-                    Debug.Assert(pBuffer <= pFinalVectorReadPos, "Should be able to read at least one vector.");
-
-                    do
-                    {
-                        Debug.Assert((nuint)pBuffer % SizeOfVectorInBytes == 0, "Vector read should be aligned.");
-                        if (Vector.GreaterThanAny(Unsafe.Read<Vector<ushort>>(pBuffer), maxAscii))
-                        {
-                            break; // found non-ASCII data
-                        }
-                        pBuffer += SizeOfVectorInChars;
-                    } while (pBuffer <= pFinalVectorReadPos);
-
-                    // Adjust the remaining buffer length for the number of elements we just consumed.
-
-                    bufferLength -= ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char);
-                }
-            }
-
-            // At this point, the buffer length wasn't enough to perform a vectorized search, or we did perform
-            // a vectorized search and encountered non-ASCII data. In either case go down a non-vectorized code
-            // path to drain any remaining ASCII chars.
-            //
-            // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads.
-            // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII chars.
-
-            uint currentUInt32;
-
-            // Try reading 64 bits at a time in a loop.
-
-            for (; bufferLength >= 4; bufferLength -= 4) // 64 bits = 4 * 16-bit chars
-            {
-                currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
-                uint nextUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer + 4 / sizeof(char));
-
-                if (!AllCharsInUInt32AreAscii(currentUInt32 | nextUInt32))
-                {
-                    // One of these two values contains non-ASCII chars.
-                    // Figure out which one it is, then put it in 'current' so that we can drain the ASCII chars.
-
-                    if (AllCharsInUInt32AreAscii(currentUInt32))
-                    {
-                        currentUInt32 = nextUInt32;
-                        pBuffer += 2;
-                    }
-
-                    goto FoundNonAsciiData;
-                }
-
-                pBuffer += 4; // consumed 4 ASCII chars
-            }
-
-            // From this point forward we don't need to keep track of the remaining buffer length.
-            // Try reading 32 bits.
-
-            if ((bufferLength & 2) != 0) // 32 bits = 2 * 16-bit chars
-            {
-                currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
-                if (!AllCharsInUInt32AreAscii(currentUInt32))
-                {
-                    goto FoundNonAsciiData;
-                }
-
-                pBuffer += 2;
-            }
-
-            // Try reading 16 bits.
-            // No need to try an 8-bit read after this since we're working with chars.
-
-            if ((bufferLength & 1) != 0)
-            {
-                // If the buffer contains non-ASCII data, the comparison below will fail, and
-                // we'll end up not incrementing the buffer reference.
-
-                if (*pBuffer <= 0x007F)
-                {
-                    pBuffer++;
-                }
-            }
-
-        Finish:
-
-            nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer;
-            Debug.Assert(totalNumBytesRead % sizeof(char) == 0, "Total number of bytes read should be even since we're working with chars.");
-            return totalNumBytesRead / sizeof(char); // convert byte count -> char count before returning
-
-        FoundNonAsciiData:
-
-            Debug.Assert(!AllCharsInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input.");
-
-            // We don't bother looking at the second char - only the first char.
-
-            if (FirstCharInUInt32IsAscii(currentUInt32))
-            {
-                pBuffer++;
-            }
-
-            goto Finish;
-        }
-
         private static unsafe nuint GetIndexOfFirstNonAsciiChar_Intrinsified(char* pBuffer, nuint bufferLength /* in chars */)
         {
             // This method contains logic optimized using vector instructions for both x64 and Arm64.