From d4f630f0a63769faf96289eaa33c185def9f4220 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Tue, 19 Jun 2018 18:50:38 -0700 Subject: [PATCH] Avoid extra allocation in IDNA (dotnet/coreclr#18551) * Avoid extra allocation in IDNA * cache the indexd value in StringBuilder Commit migrated from https://github.com/dotnet/coreclr/commit/5d34cc6642c3f8d406f0c4bdf74b7be26b1aa0df --- .../src/System/Globalization/CharUnicodeInfo.cs | 41 +++++++++++++++++++--- .../src/System/Globalization/IdnMapping.cs | 17 +++++---- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index 0cd8429..4acb67e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -13,6 +13,7 @@ //////////////////////////////////////////////////////////////////////////// using System.Diagnostics; +using System.Text; namespace System.Globalization { @@ -29,6 +30,7 @@ namespace System.Globalization internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; + internal const int HIGH_SURROGATE_RANGE = 0x3FF; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; @@ -56,10 +58,10 @@ namespace System.Globalization if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; - if (temp1 >= 0 && temp1 <= 0x3ff) + if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE) { int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; - if (temp2 >= 0 && temp2 <= 0x3ff) + if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); @@ -68,6 +70,29 @@ namespace System.Globalization } return ((int)s[index]); } + + internal static int InternalConvertToUtf32(StringBuilder s, int index) + { + Debug.Assert(s != null, "s != null"); + Debug.Assert(index >= 0 && index < s.Length, "index < s.Length"); + + int c = (int)s[index]; + if (index < s.Length - 1) + { + int temp1 = c - HIGH_SURROGATE_START; + if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE) + { + int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; + if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE) + { + // Convert the surrogate to UTF32 and get the result. + return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); + } + } + } + return c; + } + //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s @@ -99,10 +124,10 @@ namespace System.Globalization if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; - if (temp1 >= 0 && temp1 <= 0x3ff) + if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE) { int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; - if (temp2 >= 0 && temp2 <= 0x3ff) + if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE) { // Convert the surrogate to UTF32 and get the result. charLength++; @@ -368,6 +393,14 @@ namespace System.Globalization return ((BidiCategory) InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } + internal static BidiCategory GetBidiCategory(StringBuilder s, int index) + { + Debug.Assert(s != null, "s can not be null"); + Debug.Assert(index >= 0 && index < s.Length, "invalid index"); ; + + return ((BidiCategory) InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); + } + //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 8c9c2de..e732ffa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -281,7 +281,7 @@ namespace System.Globalization // Need to validate entire string length, 1 shorter if last char wasn't a dot if (unicode.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1)) - throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1)), nameof(unicode)); // If last char wasn't a dot we need to check for trailing - @@ -521,7 +521,7 @@ namespace System.Globalization // Throw if we're too long if (output.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)) - throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)), nameof(unicode)); // Return our output string return output.ToString(); @@ -603,7 +603,7 @@ namespace System.Globalization // Throw if we're too long if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)) - throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)), nameof(ascii)); // output stringbuilder @@ -637,7 +637,7 @@ namespace System.Globalization throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii)); // See if this section's ASCII or ACE - if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot || + if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot || string.Compare(ascii, iAfterLastDot, c_strAcePrefix, 0, c_strAcePrefix.Length, StringComparison.OrdinalIgnoreCase) != 0) { // Its ASCII, copy it @@ -715,7 +715,7 @@ namespace System.Globalization i += (int)(digit * w); int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias; - if (digit < t) + if (digit < t) break; Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)"); if (w > c_maxint / (c_punycodeBase - t)) @@ -777,7 +777,7 @@ namespace System.Globalization bool bRightToLeft = false; // Check for RTL. If right-to-left, then 1st & last chars must be RTL - BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot); + BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output, iOutputAfterLastDot); if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic) { // It has to be right to left. @@ -788,11 +788,11 @@ namespace System.Globalization for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++) { // This might happen if we run into a pair - if (Char.IsLowSurrogate(output.ToString(), iTest)) + if (Char.IsLowSurrogate(output[iTest])) continue; // Check to see if its LTR - eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest); + eBidi = CharUnicodeInfo.GetBidiCategory(output, iTest); if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) || (!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic))) throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii)); @@ -897,6 +897,5 @@ namespace System.Globalization // 0-25 map to a-z or A-Z return (char)(d + 'a'); } - } } -- 2.7.4