Solve recently introduce ASCII regression (#80709)
authorAdam Sitnik <adam.sitnik@gmail.com>
Tue, 17 Jan 2023 21:24:08 +0000 (22:24 +0100)
committerGitHub <noreply@github.com>
Tue, 17 Jan 2023 21:24:08 +0000 (22:24 +0100)
* call the helper methods directly to reduce overhead (important for small inputs)

* inline the helper that was supposed to be public into it's only caller

src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs
src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs
src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs
src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs
src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs

index fb2c0c7..ee4bf68 100644 (file)
@@ -193,8 +193,7 @@ namespace System.Text
             {
                 // Unrecognized fallback mechanism - count chars manually.
 
-                int firstNonAsciiIndex = Ascii.GetIndexOfFirstNonAsciiChar(new ReadOnlySpan<char>(pChars, charsLength));
-                byteCount = firstNonAsciiIndex < 0 ? charsLength : firstNonAsciiIndex;
+                byteCount = (int)Ascii.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength);
             }
 
             charsConsumed = byteCount;
@@ -355,8 +354,10 @@ namespace System.Text
         [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon
         private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
         {
-            Ascii.FromUtf16(new ReadOnlySpan<char>(pChars, charsLength), new Span<byte>(pBytes, bytesLength), out charsConsumed);
-            return charsConsumed;
+            int bytesWritten = (int)Ascii.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength));
+
+            charsConsumed = bytesWritten;
+            return bytesWritten;
         }
 
         private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
@@ -513,8 +514,7 @@ namespace System.Text
             {
                 // Unrecognized fallback mechanism - count bytes manually.
 
-                int indexOfFirstNonAscii = Ascii.GetIndexOfFirstNonAsciiByte(new ReadOnlySpan<byte>(pBytes, bytesLength));
-                charCount = indexOfFirstNonAscii < 0 ? bytesLength : indexOfFirstNonAscii;
+                charCount = (int)Ascii.GetIndexOfFirstNonAsciiByte(pBytes, (uint)bytesLength);
             }
 
             bytesConsumed = charCount;
@@ -627,7 +627,7 @@ namespace System.Text
         [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
         private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
         {
-            Ascii.ToUtf16(new ReadOnlySpan<byte>(pBytes, bytesLength), new Span<char>(pChars, charsLength), out bytesConsumed);
+            bytesConsumed = (int)Ascii.WidenAsciiToUtf16(pBytes, pChars, (uint)Math.Min(charsLength, bytesLength));
             return bytesConsumed;
         }
 
index 86818e4..2d2ec7a 100644 (file)
@@ -81,7 +81,7 @@ namespace System.Text
         /// </summary>
         /// <returns>An ASCII byte is defined as 0x00 - 0x7F, inclusive.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
+        internal static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
         {
             // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
             // code below. This has two benefits: (a) we can take advantage of specific instructions like
@@ -617,7 +617,7 @@ namespace System.Text
         /// </summary>
         /// <returns>An ASCII char is defined as 0x0000 - 0x007F, inclusive.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
+        internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
         {
             // If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized
             // code below. This has two benefits: (a) we can take advantage of specific instructions like
@@ -1152,7 +1152,7 @@ namespace System.Text
         /// or once <paramref name="elementCount"/> elements have been converted. Returns the total number
         /// of elements that were able to be converted.
         /// </summary>
-        private static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
+        internal static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
         {
             nuint currentOffset = 0;
 
@@ -1576,7 +1576,7 @@ namespace System.Text
         /// or once <paramref name="elementCount"/> elements have been converted. Returns the total number
         /// of elements that were able to be converted.
         /// </summary>
-        private static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
+        internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
         {
             // Intrinsified in mono interpreter
             nuint currentOffset = 0;
index fb8b928..8934bd9 100644 (file)
@@ -9,66 +9,50 @@ namespace System.Text
     public static partial class Ascii
     {
         /// <summary>
-        /// Returns the index of the first non-ASCII byte in a buffer.
+        /// Determines whether the provided value contains only ASCII bytes.
         /// </summary>
-        /// <param name="buffer">The buffer to scan.</param>
-        /// <returns>The index in <paramref name="buffer"/> where the first non-ASCII
-        /// byte appears, or -1 if the buffer contains only ASCII bytes.</returns>
-        internal static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan<byte> buffer)
+        /// <param name="value">The value to inspect.</param>
+        /// <returns>True if <paramref name="value"/> contains only ASCII bytes or is
+        /// empty; False otherwise.</returns>
+        public static unsafe bool IsValid(ReadOnlySpan<byte> value)
         {
-            if (buffer.IsEmpty)
+            if (value.IsEmpty)
             {
-                return -1;
+                return true;
             }
 
-            nuint bufferLength = (uint)buffer.Length;
-            fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer))
+            nuint bufferLength = (uint)value.Length;
+            fixed (byte* pBuffer = &MemoryMarshal.GetReference(value))
             {
                 nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength);
                 Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength);
-                return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement;
+                return idxOfFirstNonAsciiElement == bufferLength;
             }
         }
 
         /// <summary>
-        /// Returns the index of the first non-ASCII char in a buffer.
+        /// Determines whether the provided value contains only ASCII chars.
         /// </summary>
-        /// <param name="buffer">The buffer to scan.</param>
-        /// <returns>The index in <paramref name="buffer"/> where the first non-ASCII
-        /// char appears, or -1 if the buffer contains only ASCII char.</returns>
-        internal static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan<char> buffer)
+        /// <param name="value">The value to inspect.</param>
+        /// <returns>True if <paramref name="value"/> contains only ASCII chars or is
+        /// empty; False otherwise.</returns>
+        public static unsafe bool IsValid(ReadOnlySpan<char> value)
         {
-            if (buffer.IsEmpty)
+            if (value.IsEmpty)
             {
-                return -1;
+                return true;
             }
 
-            nuint bufferLength = (uint)buffer.Length;
-            fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer))
+            nuint bufferLength = (uint)value.Length;
+            fixed (char* pBuffer = &MemoryMarshal.GetReference(value))
             {
                 nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength);
                 Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength);
-                return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement;
+                return idxOfFirstNonAsciiElement == bufferLength;
             }
         }
 
         /// <summary>
-        /// Determines whether the provided value contains only ASCII bytes.
-        /// </summary>
-        /// <param name="value">The value to inspect.</param>
-        /// <returns>True if <paramref name="value"/> contains only ASCII bytes or is
-        /// empty; False otherwise.</returns>
-        public static unsafe bool IsValid(ReadOnlySpan<byte> value) => GetIndexOfFirstNonAsciiByte(value) < 0;
-
-        /// <summary>
-        /// Determines whether the provided value contains only ASCII chars.
-        /// </summary>
-        /// <param name="value">The value to inspect.</param>
-        /// <returns>True if <paramref name="value"/> contains only ASCII chars or is
-        /// empty; False otherwise.</returns>
-        public static unsafe bool IsValid(ReadOnlySpan<char> value) => GetIndexOfFirstNonAsciiChar(value) < 0;
-
-        /// <summary>
         /// Determines whether the provided value is ASCII byte.
         /// </summary>
         /// <param name="value">The value to inspect.</param>
index ae3d8b5..199d494 100644 (file)
@@ -29,8 +29,7 @@ namespace System.Text.Unicode
             // First, we'll handle the common case of all-ASCII. If this is able to
             // consume the entire buffer, we'll skip the remainder of this method's logic.
 
-            int firstNonAsciiIndex = Ascii.GetIndexOfFirstNonAsciiChar(new ReadOnlySpan<char>(pInputBuffer, inputLength));
-            int numAsciiCharsConsumedJustNow = firstNonAsciiIndex < 0 ? inputLength : firstNonAsciiIndex;
+            int numAsciiCharsConsumedJustNow = (int)Ascii.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength);
             Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength);
 
             pInputBuffer += (uint)numAsciiCharsConsumedJustNow;
index b1c908b..6dc91d8 100644 (file)
@@ -26,23 +26,25 @@ namespace System.Text.Unicode
             Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null.");
 
             // First, try vectorized conversion.
-            OperationStatus status = Ascii.ToUtf16(new ReadOnlySpan<byte>(pInputBuffer, inputLength), new Span<char>(pOutputBuffer, outputCharsRemaining), out int bytesConsumed);
+            {
+                nuint numElementsConverted = Ascii.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining));
 
-            pInputBuffer += bytesConsumed;
-            pOutputBuffer += bytesConsumed;
+                pInputBuffer += numElementsConverted;
+                pOutputBuffer += numElementsConverted;
 
-            // Quick check - did we just end up consuming the entire input buffer?
-            // If so, short-circuit the remainder of the method.
+                // Quick check - did we just end up consuming the entire input buffer?
+                // If so, short-circuit the remainder of the method.
 
-            if (status == OperationStatus.Done)
-            {
-                pInputBufferRemaining = pInputBuffer;
-                pOutputBufferRemaining = pOutputBuffer;
-                return OperationStatus.Done;
-            }
+                if ((int)numElementsConverted == inputLength)
+                {
+                    pInputBufferRemaining = pInputBuffer;
+                    pOutputBufferRemaining = pOutputBuffer;
+                    return OperationStatus.Done;
+                }
 
-            inputLength -= bytesConsumed;
-            outputCharsRemaining -= bytesConsumed;
+                inputLength -= (int)numElementsConverted;
+                outputCharsRemaining -= (int)numElementsConverted;
+            }
 
             if (inputLength < sizeof(uint))
             {
@@ -846,23 +848,23 @@ namespace System.Text.Unicode
             // First, try vectorized conversion.
 
             {
-                OperationStatus status = Ascii.FromUtf16(new ReadOnlySpan<char>(pInputBuffer, inputLength), new Span<byte>(pOutputBuffer, outputBytesRemaining), out int charsConsumed);
+                nuint numElementsConverted = Ascii.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining));
 
-                pInputBuffer += charsConsumed;
-                pOutputBuffer += charsConsumed;
+                pInputBuffer += numElementsConverted;
+                pOutputBuffer += numElementsConverted;
 
                 // Quick check - did we just end up consuming the entire input buffer?
                 // If so, short-circuit the remainder of the method.
 
-                if (status == OperationStatus.Done)
+                if ((int)numElementsConverted == inputLength)
                 {
                     pInputBufferRemaining = pInputBuffer;
                     pOutputBufferRemaining = pOutputBuffer;
                     return OperationStatus.Done;
                 }
 
-                inputLength -= charsConsumed;
-                outputBytesRemaining -= charsConsumed;
+                inputLength -= (int)numElementsConverted;
+                outputBytesRemaining -= (int)numElementsConverted;
             }
 
             if (inputLength < CharsPerDWord)
index 8e08a4a..91bd846 100644 (file)
@@ -27,19 +27,20 @@ namespace System.Text.Unicode
             Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");
 
             // First, try to drain off as many ASCII bytes as we can from the beginning.
-            int indexOfFirstNonAscii = Ascii.GetIndexOfFirstNonAsciiByte(new ReadOnlySpan<byte>(pInputBuffer, inputLength));
+            nuint numAsciiBytesCounted = Ascii.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength);
+            pInputBuffer += numAsciiBytesCounted;
+
             // Quick check - did we just end up consuming the entire input buffer?
             // If so, short-circuit the remainder of the method.
-            if (indexOfFirstNonAscii < 0)
+
+            inputLength -= (int)numAsciiBytesCounted;
+            if (inputLength == 0)
             {
                 utf16CodeUnitCountAdjustment = 0;
                 scalarCountAdjustment = 0;
-                return pInputBuffer + inputLength;
+                return pInputBuffer;
             }
 
-            pInputBuffer += indexOfFirstNonAscii;
-            inputLength -= indexOfFirstNonAscii;
-
 #if DEBUG
             // Keep these around for final validation at the end of the method.
             byte* pOriginalInputBuffer = pInputBuffer;