From b00f349611915f2fbc543e445fa1c9636047fa1d Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Tue, 18 Feb 2020 18:36:29 +0100 Subject: [PATCH] Optimize CheckIriUnicodeRange (#31860) * Optimize CheckIriUnicodeRange * Improve clarity of if check in CheckIriUnicodeRange Co-Authored-By: Stephen Toub * Improve clarity of if check in CheckIriUnicodeRange * Optimize range checks that are equivalent mod 2^16 * Invert escape boolean condition Co-authored-by: Stephen Toub --- .../System.Private.Uri/src/System/IriHelper.cs | 87 +++++++--------------- src/libraries/System.Private.Uri/src/System/Uri.cs | 3 +- .../System.Private.Uri/src/System/UriHelper.cs | 6 +- 3 files changed, 32 insertions(+), 64 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/IriHelper.cs b/src/libraries/System.Private.Uri/src/System/IriHelper.cs index c2d6a0b..9f6ec18 100644 --- a/src/libraries/System.Private.Uri/src/System/IriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/IriHelper.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System.Diagnostics; -using System.Runtime.InteropServices; using System.Text; namespace System @@ -12,6 +11,7 @@ namespace System { // // Checks if provided non surrogate char lies in iri range + // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2 // internal static bool CheckIriUnicodeRange(char unicode, bool isQuery) { @@ -25,58 +25,27 @@ namespace System // Check if highSurr and lowSurr are a surrogate pair then // it checks if the combined char is in the range // Takes in isQuery because iri restrictions for query are different + // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2 // - internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, ref bool surrogatePair, bool isQuery) + internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery) { - bool inRange = false; - surrogatePair = false; - Debug.Assert(char.IsHighSurrogate(highSurr)); - if (char.IsSurrogatePair(highSurr, lowSurr)) + if (Rune.TryCreate(highSurr, lowSurr, out Rune rune)) { - surrogatePair = true; - ReadOnlySpan chars = stackalloc char[2] { highSurr, lowSurr }; - string surrPair = new string(chars); - if (((string.CompareOrdinal(surrPair, "\U00010000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0001FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00020000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0002FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00030000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0003FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00040000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0004FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00050000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0005FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00060000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0006FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00070000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0007FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00080000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0008FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00090000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0009FFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U000A0000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000AFFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U000B0000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000BFFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U000C0000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000CFFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U000D0000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000DFFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U000E1000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000EFFFD") <= 0)) || - (isQuery && - (((string.CompareOrdinal(surrPair, "\U000F0000") >= 0) - && (string.CompareOrdinal(surrPair, "\U000FFFFD") <= 0)) || - ((string.CompareOrdinal(surrPair, "\U00100000") >= 0) - && (string.CompareOrdinal(surrPair, "\U0010FFFD") <= 0))))) - { - inRange = true; - } + isSurrogatePair = true; + + // U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it. + // U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF. + // U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true). + + return ((rune.Value & 0xFFFF) < 0xFFFE) + && ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000)) + && (isQuery || rune.Value < 0xF0000); } - return inRange; + isSurrogatePair = false; + return false; } // @@ -150,7 +119,7 @@ namespace System int startSeq = next; int byteCount = 1; // lazy initialization of max size, will reuse the array for next sequences - if ((object?)bytes == null) + if (bytes is null) bytes = new byte[end - next]; bytes[0] = (byte)ch; @@ -220,7 +189,7 @@ namespace System { // unicode - bool escape; + bool isInIriUnicodeRange; bool surrogatePair = false; char ch2 = '\0'; @@ -228,14 +197,22 @@ namespace System if ((char.IsHighSurrogate(ch)) && (next + 1 < end)) { ch2 = pInput[next + 1]; - escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query); + isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query); } else { - escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query); + isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query); } - if (escape) + if (isInIriUnicodeRange) + { + dest.Append(ch); + if (surrogatePair) + { + dest.Append(ch2); + } + } + else { Rune rune; if (surrogatePair) @@ -255,14 +232,6 @@ namespace System UriHelper.EscapeAsciiChar(b, ref dest); } } - else - { - dest.Append(ch); - if (surrogatePair) - { - dest.Append(ch2); - } - } if (surrogatePair) { diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 9fe6e9e..e8d9363 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -4435,8 +4435,7 @@ namespace System { if ((i + 1) < end) { - bool surrPair = false; - valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], ref surrPair, true); + valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], out _, true); } } else diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 915931f..95e2c05 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -575,12 +575,12 @@ namespace System if (iriParsing) { if (!isHighSurr) + { inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery); + } else { - bool surrPair = false; - inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], - ref surrPair, isQuery); + inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], out _, isQuery); } } -- 2.7.4