Improve Guid parsing performance for "D", "N", "B", and "P" (#55792)
authorStephen Toub <stoub@microsoft.com>
Thu, 22 Jul 2021 18:12:17 +0000 (14:12 -0400)
committerGitHub <noreply@github.com>
Thu, 22 Jul 2021 18:12:17 +0000 (14:12 -0400)
* Improve Guid parsing for "D", "N", "B", and "P"

* Update src/libraries/System.Private.CoreLib/src/System/Guid.cs

src/libraries/System.Private.CoreLib/src/System/Guid.cs

index fa7d6ac..041bc5c 100644 (file)
@@ -197,6 +197,13 @@ namespace System
             {
                 return Unsafe.As<GuidResult, Guid>(ref Unsafe.AsRef(in this));
             }
+
+            public void ReverseAbcEndianness()
+            {
+                _a = BinaryPrimitives.ReverseEndianness(_a);
+                _b = BinaryPrimitives.ReverseEndianness(_b);
+                _c = BinaryPrimitives.ReverseEndianness(_c);
+            }
         }
 
         // Creates a new guid based on the value in the string.  The value is made up
@@ -367,12 +374,6 @@ namespace System
             };
         }
 
-        // Two helpers used for parsing components:
-        // - Number.TryParseUInt32HexNumberStyle(..., NumberStyles.AllowHexSpecifier, ...)
-        //       Used when we expect the entire provided span to be filled with and only with hex digits and no overflow is possible
-        // - TryParseHex
-        //       Used when the component may have an optional '+' and "0x" prefix, when it may overflow, etc.
-
         private static bool TryParseExactB(ReadOnlySpan<char> guidString, ref GuidResult result)
         {
             // e.g. "{d85b1407-351d-4694-9392-03acc5870eb1}"
@@ -390,57 +391,86 @@ namespace System
         {
             // e.g. "d85b1407-351d-4694-9392-03acc5870eb1"
 
-            // Compat notes due to the previous implementation's implementation details.
-            // - Components may begin with "0x" or "0x+", but the expected length of each component
-            //   needs to include those prefixes, e.g. a four digit component could be "1234" or
-            //   "0x34" or "+0x4" or "+234", but not "0x1234" nor "+1234" nor "+0x1234".
-            // - "0X" is valid instead of "0x"
-
-            if ((uint)guidString.Length != 36)
+            if ((uint)guidString.Length != 36 || guidString[8] != '-' || guidString[13] != '-' || guidString[18] != '-' || guidString[23] != '-')
             {
-                result.SetFailure(overflow: false, nameof(SR.Format_GuidInvLen));
+                result.SetFailure(overflow: false, guidString.Length != 36 ? nameof(SR.Format_GuidInvLen) : nameof(SR.Format_GuidDashes));
                 return false;
             }
 
-            if (guidString[8] != '-' || guidString[13] != '-' || guidString[18] != '-' || guidString[23] != '-')
-            {
-                result.SetFailure(overflow: false, nameof(SR.Format_GuidDashes));
-                return false;
+            Span<byte> bytes = MemoryMarshal.AsBytes(new Span<GuidResult>(ref result, 1));
+            uint invalidIfFF = 0;
+            bytes[0] = DecodeByte(guidString[6],   guidString[7],  ref invalidIfFF);
+            bytes[1] = DecodeByte(guidString[4],   guidString[5],  ref invalidIfFF);
+            bytes[2] = DecodeByte(guidString[2],   guidString[3],  ref invalidIfFF);
+            bytes[3] = DecodeByte(guidString[0],   guidString[1],  ref invalidIfFF);
+            bytes[4] = DecodeByte(guidString[11],  guidString[12], ref invalidIfFF);
+            bytes[5] = DecodeByte(guidString[9],   guidString[10], ref invalidIfFF);
+            bytes[6] = DecodeByte(guidString[16],  guidString[17], ref invalidIfFF);
+            bytes[7] = DecodeByte(guidString[14],  guidString[15], ref invalidIfFF);
+            bytes[8] = DecodeByte(guidString[19],  guidString[20], ref invalidIfFF);
+            bytes[9] = DecodeByte(guidString[21],  guidString[22], ref invalidIfFF);
+            bytes[10] = DecodeByte(guidString[24], guidString[25], ref invalidIfFF);
+            bytes[11] = DecodeByte(guidString[26], guidString[27], ref invalidIfFF);
+            bytes[12] = DecodeByte(guidString[28], guidString[29], ref invalidIfFF);
+            bytes[13] = DecodeByte(guidString[30], guidString[31], ref invalidIfFF);
+            bytes[14] = DecodeByte(guidString[32], guidString[33], ref invalidIfFF);
+            bytes[15] = DecodeByte(guidString[34], guidString[35], ref invalidIfFF);
+
+            if (invalidIfFF != 0xFF)
+            {
+                if (!BitConverter.IsLittleEndian)
+                {
+                    result.ReverseAbcEndianness();
+                }
+
+                return true;
             }
 
-            if (TryParseHex(guidString.Slice(0, 8), out result._a) && // _a
-                TryParseHex(guidString.Slice(9, 4), out uint uintTmp)) // _b
+            // The 'D' format has some undesirable behavior leftover from its original implementation:
+            // - Components may begin with "0x" and/or "+", but the expected length of each component
+            //   needs to include those prefixes, e.g. a four digit component could be "1234" or
+            //   "0x34" or "+0x4" or "+234", but not "0x1234" nor "+1234" nor "+0x1234".
+            // - "0X" is valid instead of "0x"
+            // We continue to support these but expect them to be incredibly rare.  As such, we
+            // optimize for correctly formed strings where all the digits are valid hex, and only
+            // fall back to supporting these other forms if parsing fails.
+            if (guidString.IndexOfAny('X', 'x', '+') != -1 && TryCompatParsing(guidString, ref result))
             {
-                result._b = (ushort)uintTmp;
+                return true;
+            }
 
-                if (TryParseHex(guidString.Slice(14, 4), out uintTmp)) // _c
-                {
-                    result._c = (ushort)uintTmp;
+            result.SetFailure(overflow: false, nameof(SR.Format_GuidInvalidChar));
+            return false;
 
-                    if (TryParseHex(guidString.Slice(19, 4), out uintTmp)) // _d, _e
+            static bool TryCompatParsing(ReadOnlySpan<char> guidString, ref GuidResult result)
+            {
+                if (TryParseHex(guidString.Slice(0, 8), out result._a) && // _a
+                    TryParseHex(guidString.Slice(9, 4), out uint uintTmp)) // _b
+                {
+                    result._b = (ushort)uintTmp;
+                    if (TryParseHex(guidString.Slice(14, 4), out uintTmp)) // _c
                     {
-                        // _d, _e must be stored as a big-endian ushort
-                        result._de = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp;
-
-                        if (TryParseHex(guidString.Slice(24, 4), out uintTmp)) // _f, _g
+                        result._c = (ushort)uintTmp;
+                        if (TryParseHex(guidString.Slice(19, 4), out uintTmp)) // _d, _e
                         {
-                            // _f, _g must be stored as a big-endian ushort
-                            result._fg = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp;
-
-                            if (Number.TryParseUInt32HexNumberStyle(guidString.Slice(28, 8), NumberStyles.AllowHexSpecifier, out uintTmp) == Number.ParsingStatus.OK) // _h, _i, _j, _k
+                            result._de = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp;
+                            if (TryParseHex(guidString.Slice(24, 4), out uintTmp)) // _f, _g
                             {
-                                // _h, _i, _j, _k must be stored as a big-endian uint
-                                result._hijk = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness(uintTmp) : uintTmp;
-
-                                return true;
+                                result._fg = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp;
+
+                                // Unlike the other components, this one never allowed 0x or +, so we can parse it as straight hex.
+                                if (Number.TryParseUInt32HexNumberStyle(guidString.Slice(28, 8), NumberStyles.AllowHexSpecifier, out uintTmp) == Number.ParsingStatus.OK) // _h, _i, _j, _k
+                                {
+                                    result._hijk = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness(uintTmp) : uintTmp;
+                                    return true;
+                                }
                             }
                         }
                     }
                 }
-            }
 
-            result.SetFailure(overflow: false, nameof(SR.Format_GuidInvalidChar));
-            return false;
+                return false;
+            }
         }
 
         private static bool TryParseExactN(ReadOnlySpan<char> guidString, ref GuidResult result)
@@ -453,28 +483,33 @@ namespace System
                 return false;
             }
 
-            if (Number.TryParseUInt32HexNumberStyle(guidString.Slice(0, 8), NumberStyles.AllowHexSpecifier, out result._a) == Number.ParsingStatus.OK && // _a
-                Number.TryParseUInt32HexNumberStyle(guidString.Slice(8, 8), NumberStyles.AllowHexSpecifier, out uint uintTmp) == Number.ParsingStatus.OK) // _b, _c
-            {
-                // _b, _c are independently in machine-endian order
-                if (BitConverter.IsLittleEndian) { uintTmp = BitOperations.RotateRight(uintTmp, 16); }
-                result._bc = uintTmp;
-
-                if (Number.TryParseUInt32HexNumberStyle(guidString.Slice(16, 8), NumberStyles.AllowHexSpecifier, out uintTmp) == Number.ParsingStatus.OK) // _d, _e, _f, _g
+            Span<byte> bytes = MemoryMarshal.AsBytes(new Span<GuidResult>(ref result, 1));
+            uint invalidIfFF = 0;
+            bytes[0] = DecodeByte(guidString[6], guidString[7], ref invalidIfFF);
+            bytes[1] = DecodeByte(guidString[4], guidString[5], ref invalidIfFF);
+            bytes[2] = DecodeByte(guidString[2], guidString[3], ref invalidIfFF);
+            bytes[3] = DecodeByte(guidString[0], guidString[1], ref invalidIfFF);
+            bytes[4] = DecodeByte(guidString[10], guidString[11], ref invalidIfFF);
+            bytes[5] = DecodeByte(guidString[8], guidString[9], ref invalidIfFF);
+            bytes[6] = DecodeByte(guidString[14], guidString[15], ref invalidIfFF);
+            bytes[7] = DecodeByte(guidString[12], guidString[13], ref invalidIfFF);
+            bytes[8] = DecodeByte(guidString[16], guidString[17], ref invalidIfFF);
+            bytes[9] = DecodeByte(guidString[18], guidString[19], ref invalidIfFF);
+            bytes[10] = DecodeByte(guidString[20], guidString[21], ref invalidIfFF);
+            bytes[11] = DecodeByte(guidString[22], guidString[23], ref invalidIfFF);
+            bytes[12] = DecodeByte(guidString[24], guidString[25], ref invalidIfFF);
+            bytes[13] = DecodeByte(guidString[26], guidString[27], ref invalidIfFF);
+            bytes[14] = DecodeByte(guidString[28], guidString[29], ref invalidIfFF);
+            bytes[15] = DecodeByte(guidString[30], guidString[31], ref invalidIfFF);
+
+            if (invalidIfFF != 0xFF)
+            {
+                if (!BitConverter.IsLittleEndian)
                 {
-                    // _d, _e, _f, _g must be stored as a big-endian uint
-                    if (BitConverter.IsLittleEndian) { uintTmp = BinaryPrimitives.ReverseEndianness(uintTmp); }
-                    result._defg = uintTmp;
-
-                    if (Number.TryParseUInt32HexNumberStyle(guidString.Slice(24, 8), NumberStyles.AllowHexSpecifier, out uintTmp) == Number.ParsingStatus.OK) // _h, _i, _j, _k
-                    {
-                        // _h, _i, _j, _k must be stored as big-endian uint
-                        if (BitConverter.IsLittleEndian) { uintTmp = BinaryPrimitives.ReverseEndianness(uintTmp); }
-                        result._hijk = uintTmp;
-
-                        return true;
-                    }
+                    result.ReverseAbcEndianness();
                 }
+
+                return true;
             }
 
             result.SetFailure(overflow: false, nameof(SR.Format_GuidInvalidChar));
@@ -659,6 +694,24 @@ namespace System
             return true;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static byte DecodeByte(char ch1, char ch2, ref uint invalidIfFF)
+        {
+            // TODO https://github.com/dotnet/runtime/issues/13464:
+            // Replace the Unsafe.Add with HexConverter.FromChar once the bounds checks are eliminated.
+
+            uint h1 = 0xFF;
+            if (ch1 < HexConverter.CharToHexLookup.Length)
+                h1 = Unsafe.Add(ref MemoryMarshal.GetReference(HexConverter.CharToHexLookup), ch1);
+
+            uint h2 = 0xFF;
+            if (ch2 < HexConverter.CharToHexLookup.Length)
+                h2 = Unsafe.Add(ref MemoryMarshal.GetReference(HexConverter.CharToHexLookup), ch2);
+
+            invalidIfFF |= h1 | h2;
+            return (byte)(h1 << 4 | h2);
+        }
+
         private static bool TryParseHex(ReadOnlySpan<char> guidString, out ushort result, ref bool overflow)
         {
             bool success = TryParseHex(guidString, out uint tmp, ref overflow);