Share ICU culture init for unix/win with validation (#46660)
authorAdeel Mujahid <3840695+am11@users.noreply.github.com>
Fri, 8 Jan 2021 17:50:00 +0000 (19:50 +0200)
committerGitHub <noreply@github.com>
Fri, 8 Jan 2021 17:50:00 +0000 (09:50 -0800)
src/libraries/System.Globalization/tests/CultureInfo/GetCultureInfo.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Unix.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Windows.cs

index 8ddfe93..cde9df0 100644 (file)
@@ -10,22 +10,77 @@ namespace System.Globalization.Tests
     {
         public static bool PlatformSupportsFakeCulture => !PlatformDetection.IsWindows || (PlatformDetection.WindowsVersion >= 10 && !PlatformDetection.IsNetFramework);
 
+        public static IEnumerable<object[]> GetCultureInfoTestData()
+        {
+            yield return new object[] { "en" };
+            yield return new object[] { "en-US" };
+            yield return new object[] { "ja-JP" };
+            yield return new object[] { "ar-SA" };
+            yield return new object[] { "xx-XX" };
+            yield return new object[] { "de-AT-1901" };
+            yield return new object[] { "zh-Hans" };
+            yield return new object[] { "zh-Hans-HK" };
+            yield return new object[] { "zh-Hans-MO" };
+            yield return new object[] { "zh-Hans-TW" };
+            yield return new object[] { "zh-Hant" };
+            yield return new object[] { "zh-Hant-CN" };
+            yield return new object[] { "zh-Hant-SG" };
+
+            if (PlatformDetection.IsIcuGlobalization)
+            {
+                yield return new object[] { "x\u0000X-Yy", "x" }; // Null byte
+                yield return new object[] { "sgn-BE-FR" };
+                yield return new object[] { "zh-min-nan", "nan" };
+                yield return new object[] { "zh-cmn", "zh-CMN" };
+                yield return new object[] { "zh-CMN-HANS" };
+                yield return new object[] { "zh-cmn-Hant", "zh-CMN-HANT" };
+                yield return new object[] { "zh-gan", "gan" };
+                yield return new object[] { "zh-Hans-CN" };
+                yield return new object[] { "zh-Hans-SG" };
+                yield return new object[] { "zh-Hant-HK" };
+                yield return new object[] { "zh-Hant-MO" };
+                yield return new object[] { "zh-Hant-TW" };
+                yield return new object[] { "zh-yue", "yue" };
+                yield return new object[] { "zh-wuu", "wuu" };
+            }
+            else
+            {
+                yield return new object[] { "sgn-BE-FR", "sgn-BE-fr" };
+                yield return new object[] { "zh-Hans-CN", "zh-CN" };
+                yield return new object[] { "zh-Hans-SG", "zh-SG" };
+                yield return new object[] { "zh-Hant-HK", "zh-HK" };
+                yield return new object[] { "zh-Hant-MO", "zh-MO" };
+                yield return new object[] { "zh-Hant-TW", "zh-TW" };
+            }
+        }
+
         [ConditionalTheory(nameof(PlatformSupportsFakeCulture))]
-        [InlineData("en")]
-        [InlineData("en-US")]
-        [InlineData("ja-JP")]
-        [InlineData("ar-SA")]
-        [InlineData("xx-XX")]
-        public void GetCultureInfo(string name)
+        [MemberData(nameof(GetCultureInfoTestData))]
+        public void GetCultureInfo(string name, string expected = null)
         {
-            Assert.Equal(name, CultureInfo.GetCultureInfo(name).Name);
-            Assert.Equal(name, CultureInfo.GetCultureInfo(name, predefinedOnly: false).Name);
+            if (expected == null) expected = name;
+            Assert.Equal(expected, CultureInfo.GetCultureInfo(name).Name);
+            Assert.Equal(expected, CultureInfo.GetCultureInfo(name, predefinedOnly: false).Name);
         }
 
         [ConditionalTheory(nameof(PlatformSupportsFakeCulture))]
+        [InlineData("z")]
         [InlineData("en@US")]
         [InlineData("\uFFFF")]
+        [InlineData("\u0080")]
+        [InlineData("-foo")]
+        [InlineData("foo-")]
+        [InlineData("/foo")]
+        [InlineData("_bar")]
+        [InlineData("bar_")]
+        [InlineData("bar/")]
+        [InlineData("foo__bar")]
+        [InlineData("foo--bar")]
+        [InlineData("foo-_bar")]
+        [InlineData("foo_-bar")]
+        [InlineData("foo/bar")]
         [InlineData("/")]
+        [InlineData("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234")] // > 85 characters
         public void TestInvalidCultureNames(string name)
         {
             Assert.Throws<CultureNotFoundException>(() => CultureInfo.GetCultureInfo(name));
index 855d11a..8be3854 100644 (file)
@@ -13,6 +13,69 @@ namespace System.Globalization
         private const int ICU_ULOC_KEYWORD_AND_VALUES_CAPACITY = 100; // max size of keyword or value
         private const int ICU_ULOC_FULLNAME_CAPACITY = 157;           // max size of locale name
 
+        /// <summary>
+        /// This method uses the sRealName field (which is initialized by the constructor before this is called) to
+        /// initialize the rest of the state of CultureData based on the underlying OS globalization library.
+        /// </summary>
+        private bool InitIcuCultureDataCore()
+        {
+            Debug.Assert(_sRealName != null);
+            Debug.Assert(!GlobalizationMode.Invariant);
+
+            const string ICU_COLLATION_KEYWORD = "@collation=";
+            string realNameBuffer = _sRealName;
+
+            // Basic validation
+            if (!IsValidCultureName(realNameBuffer, out var index))
+            {
+                return false;
+            }
+
+            // Replace _ (alternate sort) with @collation= for ICU
+            ReadOnlySpan<char> alternateSortName = default;
+            if (index > 0)
+            {
+                alternateSortName = realNameBuffer.AsSpan(index + 1);
+                realNameBuffer = string.Concat(realNameBuffer.AsSpan(0, index), ICU_COLLATION_KEYWORD, alternateSortName);
+            }
+
+            // Get the locale name from ICU
+            if (!GetLocaleName(realNameBuffer, out _sWindowsName))
+            {
+                return false; // fail
+            }
+
+            // Replace the ICU collation keyword with an _
+            Debug.Assert(_sWindowsName != null);
+            index = _sWindowsName.IndexOf(ICU_COLLATION_KEYWORD, StringComparison.Ordinal);
+            if (index >= 0)
+            {
+                _sName = string.Concat(_sWindowsName.AsSpan(0, index), "_", alternateSortName);
+            }
+            else
+            {
+                _sName = _sWindowsName;
+            }
+            _sRealName = _sName;
+
+            _iLanguage = LCID;
+            if (_iLanguage == 0)
+            {
+                _iLanguage = CultureInfo.LOCALE_CUSTOM_UNSPECIFIED;
+            }
+
+            _bNeutral = TwoLetterISOCountryName.Length == 0;
+
+            _sSpecificCulture = _bNeutral ? IcuLocaleData.GetSpecificCultureName(_sRealName) : _sRealName;
+
+            // Remove the sort from sName unless custom culture
+            if (index > 0 && !_bNeutral && !IsCustomCultureId(_iLanguage))
+            {
+                _sName = _sWindowsName.Substring(0, index);
+            }
+            return true;
+        }
+
         internal static unsafe bool GetLocaleName(string localeName, out string? windowsName)
         {
             // Get the locale name from ICU
@@ -342,5 +405,59 @@ namespace System.Globalization
             Debug.Assert(!GlobalizationMode.UseNls);
             return IcuLocaleData.GetConsoleUICulture(cultureName);
         }
+
+        /// <summary>
+        /// Implementation of culture name validation.
+        /// </summary>
+        /// <remarks>
+        /// This is a fast approximate implementation based on BCP47 spec. It covers only parts of
+        /// the spec; such that, when it returns false, the input is definitely in incorrect format.
+        /// However, it returns true for some characters which are not allowed by the spec. It also
+        /// returns true for some inputs where spec specifies the lengths of subtags, but we are not
+        /// validating subtags individually to keep algorithm's computational complexity at O(n).
+        ///
+        /// Rules of implementation:
+        /// * Allow only letters, digits, - and '_' or \0 (NULL is for backward compatibility).
+        /// * Allow input length of zero (for invariant culture) or otherwise greater than 1 and less than or equal LocaleNameMaxLength.
+        /// * Disallow input that starts or ends with '-' or '_'.
+        /// * Disallow input that has any combination of consecutive '-' or '_'.
+        /// * Disallow input that has multiple '_'.
+        /// </remarks>
+        private static bool IsValidCultureName(string subject, out int indexOfUnderscore)
+        {
+            indexOfUnderscore = -1;
+
+            if (subject.Length == 0) return true; // Invariant Culture
+            if (subject.Length == 1 || subject.Length > LocaleNameMaxLength) return false;
+
+            bool seenUnderscore = false;
+            for (int i = 0; i < subject.Length; ++i)
+            {
+                char c = subject[i];
+
+                if ((uint)(c - 'A') <= ('Z' - 'A') || (uint)(c - 'a') <= ('z' - 'a') || (uint)(c - '0') <= ('9' - '0') || c == '\0')
+                {
+                    continue;
+                }
+
+                if (c == '_' || c == '-')
+                {
+                    if (i == 0 || i == subject.Length - 1) return false;
+                    if (subject[i - 1] == '_' || subject[i - 1] == '-') return false;
+                    if (c == '_')
+                    {
+                        if (seenUnderscore) return false; // only one _ is allowed
+                        seenUnderscore = true;
+                        indexOfUnderscore = i;
+                    }
+                }
+                else
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
     }
 }
index 5d35cfc..5db6bcf 100644 (file)
@@ -7,75 +7,7 @@ namespace System.Globalization
 {
     internal partial class CultureData
     {
-        private const string ICU_COLLATION_KEYWORD = "@collation=";
-
-        /// <summary>
-        /// This method uses the sRealName field (which is initialized by the constructor before this is called) to
-        /// initialize the rest of the state of CultureData based on the underlying OS globalization library.
-        /// </summary>
-        private bool InitCultureDataCore()
-        {
-            Debug.Assert(_sRealName != null);
-            Debug.Assert(!GlobalizationMode.Invariant);
-
-            string realNameBuffer = _sRealName;
-
-            // Basic validation
-            if (realNameBuffer.Contains('@'))
-            {
-                return false; // don't allow ICU variants to come in directly
-            }
-
-            // Replace _ (alternate sort) with @collation= for ICU
-            ReadOnlySpan<char> alternateSortName = default;
-            int index = realNameBuffer.IndexOf('_');
-            if (index > 0)
-            {
-                if (index >= (realNameBuffer.Length - 1) // must have characters after _
-                    || realNameBuffer.IndexOf('_', index + 1) >= 0) // only one _ allowed
-                {
-                    return false; // fail
-                }
-                alternateSortName = realNameBuffer.AsSpan(index + 1);
-                realNameBuffer = string.Concat(realNameBuffer.AsSpan(0, index), ICU_COLLATION_KEYWORD, alternateSortName);
-            }
-
-            // Get the locale name from ICU
-            if (!GetLocaleName(realNameBuffer, out _sWindowsName))
-            {
-                return false; // fail
-            }
-
-            // Replace the ICU collation keyword with an _
-            Debug.Assert(_sWindowsName != null);
-            index = _sWindowsName.IndexOf(ICU_COLLATION_KEYWORD, StringComparison.Ordinal);
-            if (index >= 0)
-            {
-                _sName = string.Concat(_sWindowsName.AsSpan(0, index), "_", alternateSortName);
-            }
-            else
-            {
-                _sName = _sWindowsName;
-            }
-            _sRealName = _sName;
-
-            _iLanguage = LCID;
-            if (_iLanguage == 0)
-            {
-                _iLanguage = CultureInfo.LOCALE_CUSTOM_UNSPECIFIED;
-            }
-
-            _bNeutral = TwoLetterISOCountryName.Length == 0;
-
-            _sSpecificCulture = _bNeutral ? IcuLocaleData.GetSpecificCultureName(_sRealName) : _sRealName;
-
-            // Remove the sort from sName unless custom culture
-            if (index > 0 && !_bNeutral && !IsCustomCultureId(_iLanguage))
-            {
-                _sName = _sWindowsName.Substring(0, index);
-            }
-            return true;
-        }
+        private bool InitCultureDataCore() => InitIcuCultureDataCore();
 
         private void InitUserOverride(bool useUserOverride)
         {
index 2c77cf6..cb21ae6 100644 (file)
@@ -42,6 +42,11 @@ namespace System.Globalization
         /// </summary>
         private unsafe bool InitCultureDataCore()
         {
+            if (!ShouldUseUserOverrideNlsData)
+            {
+                return InitIcuCultureDataCore();
+            }
+
             Debug.Assert(!GlobalizationMode.Invariant);
 
             int result;
@@ -63,8 +68,6 @@ namespace System.Globalization
             realNameBuffer = _sRealName;
 
             // Check for neutrality, don't expect to fail
-            // (buffer has our name in it, so we don't have to do the gc. stuff)
-
             result = GetLocaleInfoEx(realNameBuffer, Interop.Kernel32.LOCALE_INEUTRAL | Interop.Kernel32.LOCALE_RETURN_NUMBER, pBuffer, sizeof(int) / sizeof(char));
             if (result == 0)
             {