Avoid extra allocation in IDNA (dotnet/coreclr#18551)
authorTarek Mahmoud Sayed <tarekms@microsoft.com>
Wed, 20 Jun 2018 01:50:38 +0000 (18:50 -0700)
committerGitHub <noreply@github.com>
Wed, 20 Jun 2018 01:50:38 +0000 (18:50 -0700)
* Avoid extra allocation in IDNA

* cache the indexd value in StringBuilder

Commit migrated from https://github.com/dotnet/coreclr/commit/5d34cc6642c3f8d406f0c4bdf74b7be26b1aa0df

src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs

index 0cd8429..4acb67e 100644 (file)
@@ -13,6 +13,7 @@
 ////////////////////////////////////////////////////////////////////////////
 
 using System.Diagnostics;
+using System.Text;
 
 namespace System.Globalization
 {
@@ -29,6 +30,7 @@ namespace System.Globalization
         internal const char HIGH_SURROGATE_END = '\udbff';
         internal const char LOW_SURROGATE_START = '\udc00';
         internal const char LOW_SURROGATE_END = '\udfff';
+        internal const int  HIGH_SURROGATE_RANGE = 0x3FF;
 
         internal const int UNICODE_CATEGORY_OFFSET = 0;
         internal const int BIDI_CATEGORY_OFFSET = 1;
@@ -56,10 +58,10 @@ namespace System.Globalization
             if (index < s.Length - 1)
             {
                 int temp1 = (int)s[index] - HIGH_SURROGATE_START;
-                if (temp1 >= 0 && temp1 <= 0x3ff)
+                if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE)
                 {
                     int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
-                    if (temp2 >= 0 && temp2 <= 0x3ff)
+                    if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE)
                     {
                         // Convert the surrogate to UTF32 and get the result.
                         return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
@@ -68,6 +70,29 @@ namespace System.Globalization
             }
             return ((int)s[index]);
         }
+
+        internal static int InternalConvertToUtf32(StringBuilder s, int index)
+        {
+            Debug.Assert(s != null, "s != null");
+            Debug.Assert(index >= 0 && index < s.Length, "index < s.Length");
+
+            int c = (int)s[index];
+            if (index < s.Length - 1)
+            {
+                int temp1 = c - HIGH_SURROGATE_START;
+                if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE)
+                {
+                    int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
+                    if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE)
+                    {
+                        // Convert the surrogate to UTF32 and get the result.
+                        return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
+                    }
+                }
+            }
+            return c;
+        }
+
         ////////////////////////////////////////////////////////////////////////
         //
         // Convert a character or a surrogate pair starting at index of string s
@@ -99,10 +124,10 @@ namespace System.Globalization
             if (index < s.Length - 1)
             {
                 int temp1 = (int)s[index] - HIGH_SURROGATE_START;
-                if (temp1 >= 0 && temp1 <= 0x3ff)
+                if (temp1 >= 0 && temp1 <= HIGH_SURROGATE_RANGE)
                 {
                     int temp2 = (int)s[index + 1] - LOW_SURROGATE_START;
-                    if (temp2 >= 0 && temp2 <= 0x3ff)
+                    if (temp2 >= 0 && temp2 <= HIGH_SURROGATE_RANGE)
                     {
                         // Convert the surrogate to UTF32 and get the result.
                         charLength++;
@@ -368,6 +393,14 @@ namespace System.Globalization
             return ((BidiCategory) InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET));
         }
 
+        internal static BidiCategory GetBidiCategory(StringBuilder s, int index)
+        {
+            Debug.Assert(s != null, "s can not be null");
+            Debug.Assert(index >= 0 && index < s.Length, "invalid index"); ;
+
+            return ((BidiCategory) InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET));
+        }
+
         ////////////////////////////////////////////////////////////////////////
         //
         // Get the Unicode category of the character starting at index.  If the character is in BMP, charLength will return 1.
index 8c9c2de..e732ffa 100644 (file)
@@ -281,7 +281,7 @@ namespace System.Globalization
 
             // Need to validate entire string length, 1 shorter if last char wasn't a dot
             if (unicode.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1))
-                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, 
+                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                         c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1)), nameof(unicode));
 
             // If last char wasn't a dot we need to check for trailing -
@@ -521,7 +521,7 @@ namespace System.Globalization
 
             // Throw if we're too long
             if (output.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1))
-                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, 
+                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                 c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)), nameof(unicode));
             // Return our output string
             return output.ToString();
@@ -603,7 +603,7 @@ namespace System.Globalization
 
             // Throw if we're too long
             if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1))
-                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, 
+                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                             c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)), nameof(ascii));
 
             // output stringbuilder
@@ -637,7 +637,7 @@ namespace System.Globalization
                     throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
 
                 // See if this section's ASCII or ACE
-                if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot || 
+                if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot ||
                     string.Compare(ascii, iAfterLastDot, c_strAcePrefix, 0, c_strAcePrefix.Length, StringComparison.OrdinalIgnoreCase) != 0)
                 {
                     // Its ASCII, copy it
@@ -715,7 +715,7 @@ namespace System.Globalization
 
                             i += (int)(digit * w);
                             int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
-                            if (digit < t) 
+                            if (digit < t)
                                 break;
                             Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)");
                             if (w > c_maxint / (c_punycodeBase - t))
@@ -777,7 +777,7 @@ namespace System.Globalization
                     bool bRightToLeft = false;
 
                     // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
-                    BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot);
+                    BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output, iOutputAfterLastDot);
                     if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
                     {
                         // It has to be right to left.
@@ -788,11 +788,11 @@ namespace System.Globalization
                     for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
                     {
                         // This might happen if we run into a pair
-                        if (Char.IsLowSurrogate(output.ToString(), iTest)) 
+                        if (Char.IsLowSurrogate(output[iTest]))
                             continue;
 
                         // Check to see if its LTR
-                        eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest);
+                        eBidi = CharUnicodeInfo.GetBidiCategory(output, iTest);
                         if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) ||
                             (!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)))
                             throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
@@ -897,6 +897,5 @@ namespace System.Globalization
             //  0-25 map to a-z or A-Z
             return (char)(d + 'a');
         }
-        
     }
 }