[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
{
- // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
- // code below. This has two benefits: (a) we can take advantage of specific instructions like
- // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
+ // If 256/512-bit aren't supported but SSE2 is supported, use those specific intrinsics instead of
+ // the generic vectorized code. This has two benefits: (a) we can take advantage of specific instructions
+ // like pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
// this method is running.
- if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated)
- {
- return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
- }
- else if (Sse2.IsSupported || (AdvSimd.IsSupported && BitConverter.IsLittleEndian))
+ if (!Vector512.IsHardwareAccelerated &&
+ !Vector256.IsHardwareAccelerated &&
+ (Sse2.IsSupported || AdvSimd.IsSupported))
{
return GetIndexOfFirstNonAsciiByte_Intrinsified(pBuffer, bufferLength);
}
- else if (Vector128.IsHardwareAccelerated)
- {
- return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
- }
else
{
- return GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength);
+ // Handles Vector512, Vector256, Vector128, and scalar.
+ return GetIndexOfFirstNonAsciiByte_Vector(pBuffer, bufferLength);
}
}
if (Vector512.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector512<byte>.Count)
{
-
if (Vector512.Load(pBuffer).ExtractMostSignificantBits() == 0)
{
// The first several elements of the input buffer were ASCII. Bump up the pointer to the
}
else if (Vector256.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector256<byte>.Count)
{
-
if (Vector256.Load(pBuffer).ExtractMostSignificantBits() == 0)
{
// The first several elements of the input buffer were ASCII. Bump up the pointer to the
}
else if (Vector128.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector128<byte>.Count)
{
-
if (!VectorContainsNonAsciiChar(Vector128.Load(pBuffer)))
{
// The first several elements of the input buffer were ASCII. Bump up the pointer to the
goto Finish;
}
- private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, nuint bufferLength)
- {
- // Squirrel away the original buffer reference. This method works by determining the exact
- // byte reference where non-ASCII data begins, so we need this base value to perform the
- // final subtraction at the end of the method to get the index into the original buffer.
-
- byte* pOriginalBuffer = pBuffer;
-
- // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads.
- // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII bytes.
-
- uint currentUInt32;
-
- // Try reading 64 bits at a time in a loop.
-
- for (; bufferLength >= 8; bufferLength -= 8)
- {
- currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
- uint nextUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer + 4);
-
- if (!AllBytesInUInt32AreAscii(currentUInt32 | nextUInt32))
- {
- // One of these two values contains non-ASCII bytes.
- // Figure out which one it is, then put it in 'current' so that we can drain the ASCII bytes.
-
- if (AllBytesInUInt32AreAscii(currentUInt32))
- {
- currentUInt32 = nextUInt32;
- pBuffer += 4;
- }
-
- goto FoundNonAsciiData;
- }
-
- pBuffer += 8; // consumed 8 ASCII bytes
- }
-
- // From this point forward we don't need to update bufferLength.
- // Try reading 32 bits.
-
- if ((bufferLength & 4) != 0)
- {
- currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
- if (!AllBytesInUInt32AreAscii(currentUInt32))
- {
- goto FoundNonAsciiData;
- }
-
- pBuffer += 4;
- }
-
- // Try reading 16 bits.
-
- if ((bufferLength & 2) != 0)
- {
- currentUInt32 = Unsafe.ReadUnaligned<ushort>(pBuffer);
- if (!AllBytesInUInt32AreAscii(currentUInt32))
- {
- if (!BitConverter.IsLittleEndian)
- {
- currentUInt32 <<= 16;
- }
- goto FoundNonAsciiData;
- }
-
- pBuffer += 2;
- }
-
- // Try reading 8 bits
-
- if ((bufferLength & 1) != 0)
- {
- // If the buffer contains non-ASCII data, the comparison below will fail, and
- // we'll end up not incrementing the buffer reference.
-
- if (*(sbyte*)pBuffer >= 0)
- {
- pBuffer++;
- }
- }
-
- Finish:
-
- nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer;
- return totalNumBytesRead;
-
- FoundNonAsciiData:
-
- Debug.Assert(!AllBytesInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input.");
-
- // The method being called doesn't bother looking at whether the high byte is ASCII. There are only
- // two scenarios: (a) either one of the earlier bytes is not ASCII and the search terminates before
- // we get to the high byte; or (b) all of the earlier bytes are ASCII, so the high byte must be
- // non-ASCII. In both cases we only care about the low 24 bits.
-
- pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentUInt32);
- goto Finish;
- }
-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool ContainsNonAsciiByte_Sse2(uint sseMask)
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
{
- // If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized
- // code below. This has two benefits: (a) we can take advantage of specific instructions like
- // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
+ // If 256/512-bit aren't supported but SSE2/ASIMD is supported, use those specific intrinsics instead of
+ // the generic vectorized code. This has two benefits: (a) we can take advantage of specific instructions
+ // like pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
// this method is running.
- if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated)
- {
- return GetIndexOfFirstNonAsciiChar_Vector(pBuffer, bufferLength);
- }
- else if (Sse2.IsSupported || (AdvSimd.IsSupported && BitConverter.IsLittleEndian))
+ if (!Vector512.IsHardwareAccelerated &&
+ !Vector256.IsHardwareAccelerated &&
+ (Sse2.IsSupported || AdvSimd.IsSupported))
{
return GetIndexOfFirstNonAsciiChar_Intrinsified(pBuffer, bufferLength);
}
else
{
- return GetIndexOfFirstNonAsciiChar_Default(pBuffer, bufferLength);
+ // Handles Vector512, Vector256, Vector128, and scalar.
+ return GetIndexOfFirstNonAsciiChar_Vector(pBuffer, bufferLength);
}
}
goto Finish;
}
- private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, nuint bufferLength /* in chars */)
- {
- // Squirrel away the original buffer reference.This method works by determining the exact
- // char reference where non-ASCII data begins, so we need this base value to perform the
- // final subtraction at the end of the method to get the index into the original buffer.
-
- char* pOriginalBuffer = pBuffer;
-
-#if SYSTEM_PRIVATE_CORELIB
- Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char));
-#endif
-
- // Before we drain off char-by-char, try a generic vectorized loop.
- // Only run the loop if we have at least two vectors we can pull out.
-
- if (Vector.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector<ushort>.Count)
- {
- uint SizeOfVectorInChars = (uint)Vector<ushort>.Count; // JIT will make this a const
- uint SizeOfVectorInBytes = (uint)Vector<byte>.Count; // JIT will make this a const
-
- Vector<ushort> maxAscii = new Vector<ushort>(0x007F);
-
- if (Vector.LessThanOrEqualAll(Unsafe.ReadUnaligned<Vector<ushort>>(pBuffer), maxAscii))
- {
- // The first several elements of the input buffer were ASCII. Bump up the pointer to the
- // next aligned boundary, then perform aligned reads from here on out until we find non-ASCII
- // data or we approach the end of the buffer. It's possible we'll reread data; this is ok.
-
- char* pFinalVectorReadPos = pBuffer + bufferLength - SizeOfVectorInChars;
- pBuffer = (char*)(((nuint)pBuffer + SizeOfVectorInBytes) & ~(nuint)(SizeOfVectorInBytes - 1));
-
-#if DEBUG
- long numCharsRead = pBuffer - pOriginalBuffer;
- Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVectorInChars, "We should've made forward progress of at least one char.");
- Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer.");
-#endif
-
- Debug.Assert(pBuffer <= pFinalVectorReadPos, "Should be able to read at least one vector.");
-
- do
- {
- Debug.Assert((nuint)pBuffer % SizeOfVectorInBytes == 0, "Vector read should be aligned.");
- if (Vector.GreaterThanAny(Unsafe.Read<Vector<ushort>>(pBuffer), maxAscii))
- {
- break; // found non-ASCII data
- }
- pBuffer += SizeOfVectorInChars;
- } while (pBuffer <= pFinalVectorReadPos);
-
- // Adjust the remaining buffer length for the number of elements we just consumed.
-
- bufferLength -= ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char);
- }
- }
-
- // At this point, the buffer length wasn't enough to perform a vectorized search, or we did perform
- // a vectorized search and encountered non-ASCII data. In either case go down a non-vectorized code
- // path to drain any remaining ASCII chars.
- //
- // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads.
- // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII chars.
-
- uint currentUInt32;
-
- // Try reading 64 bits at a time in a loop.
-
- for (; bufferLength >= 4; bufferLength -= 4) // 64 bits = 4 * 16-bit chars
- {
- currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
- uint nextUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer + 4 / sizeof(char));
-
- if (!AllCharsInUInt32AreAscii(currentUInt32 | nextUInt32))
- {
- // One of these two values contains non-ASCII chars.
- // Figure out which one it is, then put it in 'current' so that we can drain the ASCII chars.
-
- if (AllCharsInUInt32AreAscii(currentUInt32))
- {
- currentUInt32 = nextUInt32;
- pBuffer += 2;
- }
-
- goto FoundNonAsciiData;
- }
-
- pBuffer += 4; // consumed 4 ASCII chars
- }
-
- // From this point forward we don't need to keep track of the remaining buffer length.
- // Try reading 32 bits.
-
- if ((bufferLength & 2) != 0) // 32 bits = 2 * 16-bit chars
- {
- currentUInt32 = Unsafe.ReadUnaligned<uint>(pBuffer);
- if (!AllCharsInUInt32AreAscii(currentUInt32))
- {
- goto FoundNonAsciiData;
- }
-
- pBuffer += 2;
- }
-
- // Try reading 16 bits.
- // No need to try an 8-bit read after this since we're working with chars.
-
- if ((bufferLength & 1) != 0)
- {
- // If the buffer contains non-ASCII data, the comparison below will fail, and
- // we'll end up not incrementing the buffer reference.
-
- if (*pBuffer <= 0x007F)
- {
- pBuffer++;
- }
- }
-
- Finish:
-
- nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer;
- Debug.Assert(totalNumBytesRead % sizeof(char) == 0, "Total number of bytes read should be even since we're working with chars.");
- return totalNumBytesRead / sizeof(char); // convert byte count -> char count before returning
-
- FoundNonAsciiData:
-
- Debug.Assert(!AllCharsInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input.");
-
- // We don't bother looking at the second char - only the first char.
-
- if (FirstCharInUInt32IsAscii(currentUInt32))
- {
- pBuffer++;
- }
-
- goto Finish;
- }
-
private static unsafe nuint GetIndexOfFirstNonAsciiChar_Intrinsified(char* pBuffer, nuint bufferLength /* in chars */)
{
// This method contains logic optimized using vector instructions for both x64 and Arm64.