Optimize CheckIriUnicodeRange (#31860)
authorMiha Zupan <mihazupan.zupan1@gmail.com>
Tue, 18 Feb 2020 17:36:29 +0000 (18:36 +0100)
committerGitHub <noreply@github.com>
Tue, 18 Feb 2020 17:36:29 +0000 (18:36 +0100)
* Optimize CheckIriUnicodeRange

* Improve clarity of if check in CheckIriUnicodeRange

Co-Authored-By: Stephen Toub <stoub@microsoft.com>
* Improve clarity of if check in CheckIriUnicodeRange

* Optimize range checks that are equivalent mod 2^16

* Invert escape boolean condition

Co-authored-by: Stephen Toub <stoub@microsoft.com>
src/libraries/System.Private.Uri/src/System/IriHelper.cs
src/libraries/System.Private.Uri/src/System/Uri.cs
src/libraries/System.Private.Uri/src/System/UriHelper.cs

index c2d6a0b91703c456d8fb81ce8154b8f6acbb870b..9f6ec186d915fba2bf3e5c18415b587cce608be4 100644 (file)
@@ -3,7 +3,6 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Diagnostics;
-using System.Runtime.InteropServices;
 using System.Text;
 
 namespace System
@@ -12,6 +11,7 @@ namespace System
     {
         //
         // Checks if provided non surrogate char lies in iri range
+        // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
         //
         internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
         {
@@ -25,58 +25,27 @@ namespace System
         // Check if highSurr and lowSurr are a surrogate pair then
         // it checks if the combined char is in the range
         // Takes in isQuery because iri restrictions for query are different
+        // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
         //
-        internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, ref bool surrogatePair, bool isQuery)
+        internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
         {
-            bool inRange = false;
-            surrogatePair = false;
-
             Debug.Assert(char.IsHighSurrogate(highSurr));
 
-            if (char.IsSurrogatePair(highSurr, lowSurr))
+            if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
             {
-                surrogatePair = true;
-                ReadOnlySpan<char> chars = stackalloc char[2] { highSurr, lowSurr };
-                string surrPair = new string(chars);
-                if (((string.CompareOrdinal(surrPair, "\U00010000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0001FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00020000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0002FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00030000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0003FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00040000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0004FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00050000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0005FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00060000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0006FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00070000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0007FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00080000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0008FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U00090000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U0009FFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U000A0000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U000AFFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U000B0000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U000BFFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U000C0000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U000CFFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U000D0000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U000DFFFD") <= 0)) ||
-                    ((string.CompareOrdinal(surrPair, "\U000E1000") >= 0)
-                        && (string.CompareOrdinal(surrPair, "\U000EFFFD") <= 0)) ||
-                    (isQuery &&
-                        (((string.CompareOrdinal(surrPair, "\U000F0000") >= 0)
-                            && (string.CompareOrdinal(surrPair, "\U000FFFFD") <= 0)) ||
-                            ((string.CompareOrdinal(surrPair, "\U00100000") >= 0)
-                            && (string.CompareOrdinal(surrPair, "\U0010FFFD") <= 0)))))
-                {
-                    inRange = true;
-                }
+                isSurrogatePair = true;
+
+                // U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
+                // U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
+                // U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).
+
+                return ((rune.Value & 0xFFFF) < 0xFFFE)
+                    && ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000))
+                    && (isQuery || rune.Value < 0xF0000);
             }
 
-            return inRange;
+            isSurrogatePair = false;
+            return false;
         }
 
         //
@@ -150,7 +119,7 @@ namespace System
                             int startSeq = next;
                             int byteCount = 1;
                             // lazy initialization of max size, will reuse the array for next sequences
-                            if ((object?)bytes == null)
+                            if (bytes is null)
                                 bytes = new byte[end - next];
 
                             bytes[0] = (byte)ch;
@@ -220,7 +189,7 @@ namespace System
                 {
                     // unicode
 
-                    bool escape;
+                    bool isInIriUnicodeRange;
                     bool surrogatePair = false;
 
                     char ch2 = '\0';
@@ -228,14 +197,22 @@ namespace System
                     if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                     {
                         ch2 = pInput[next + 1];
-                        escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
+                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                     }
                     else
                     {
-                        escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
+                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                     }
 
-                    if (escape)
+                    if (isInIriUnicodeRange)
+                    {
+                        dest.Append(ch);
+                        if (surrogatePair)
+                        {
+                            dest.Append(ch2);
+                        }
+                    }
+                    else
                     {
                         Rune rune;
                         if (surrogatePair)
@@ -255,14 +232,6 @@ namespace System
                             UriHelper.EscapeAsciiChar(b, ref dest);
                         }
                     }
-                    else
-                    {
-                        dest.Append(ch);
-                        if (surrogatePair)
-                        {
-                            dest.Append(ch2);
-                        }
-                    }
 
                     if (surrogatePair)
                     {
index 9fe6e9ed9a0d66269f8a92795a55ae09937ddc7e..e8d936349be31f606eecf99b433cb27b7e66db11 100644 (file)
@@ -4435,8 +4435,7 @@ namespace System
                         {
                             if ((i + 1) < end)
                             {
-                                bool surrPair = false;
-                                valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], ref surrPair, true);
+                                valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], out _, true);
                             }
                         }
                         else
index 915931fb620c251d83c61ca8e22aa759b07c8a67..95e2c0537bad2a79ef083fd9a81f7a30a9b5d605 100644 (file)
@@ -575,12 +575,12 @@ namespace System
                     if (iriParsing)
                     {
                         if (!isHighSurr)
+                        {
                             inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery);
+                        }
                         else
                         {
-                            bool surrPair = false;
-                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1],
-                                                                   ref surrPair, isQuery);
+                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], out _, isQuery);
                         }
                     }