From d905f67f12c6b2eed918894e0642ec972a1d9fec Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sun, 19 Mar 2017 16:19:56 -0700 Subject: [PATCH] Invariant globalization (#10264) * Invariant Globalization Work * Convert the testing Exceptions to asserts * Remove un-needed comment * Fix typos * Fix unrelated typo * Address the PR feedback * More feedback addressing * More feedback addressing * Fix Linux break * More feedback addressing * cleanup --- src/corefx/System.Globalization.Native/icushim.cpp | 16 +- src/mscorlib/System.Private.CoreLib.csproj | 5 + .../Interop.Collation.cs | 2 + .../System.Globalization.Native/Interop.ICU.cs | 16 + src/mscorlib/src/SR.cs | 15 + src/mscorlib/src/System/AppDomainSetup.cs | 2 +- src/mscorlib/src/System/CLRConfig.cs | 15 + .../src/System/Globalization/CalendarData.Unix.cs | 4 + .../System/Globalization/CalendarData.Windows.cs | 13 + .../src/System/Globalization/CalendarData.cs | 2 + .../System/Globalization/CompareInfo.Invariant.cs | 238 +++++++ .../src/System/Globalization/CompareInfo.Unix.cs | 62 +- .../System/Globalization/CompareInfo.Windows.cs | 54 +- .../src/System/Globalization/CompareInfo.cs | 201 ++++-- .../src/System/Globalization/CultureData.Unix.cs | 14 +- .../System/Globalization/CultureData.Windows.cs | 10 +- .../src/System/Globalization/CultureData.cs | 297 +++++--- .../src/System/Globalization/CultureInfo.Unix.cs | 3 + .../System/Globalization/CultureInfo.Windows.cs | 6 + .../System/Globalization/GlobalizationMode.Unix.cs | 24 + .../Globalization/GlobalizationMode.Windows.cs | 14 + .../src/System/Globalization/GlobalizationMode.cs | 12 + .../src/System/Globalization/IdnMapping.Unix.cs | 6 + .../src/System/Globalization/IdnMapping.Windows.cs | 8 + .../src/System/Globalization/IdnMapping.cs | 745 +++++++++++++++++++++ .../System/Globalization/JapaneseCalendar.Unix.cs | 7 + .../src/System/Globalization/TextInfo.Unix.cs | 21 +- .../src/System/Globalization/TextInfo.Windows.cs | 88 ++- src/mscorlib/src/System/Globalization/TextInfo.cs | 132 +++- src/mscorlib/src/System/Text/Normalization.Unix.cs | 15 + .../src/System/Text/Normalization.Windows.cs | 14 + src/mscorlib/src/System/TimeZoneInfo.Unix.cs | 6 + src/vm/CMakeLists.txt | 1 + src/vm/clrconfignative.cpp | 21 + src/vm/clrconfignative.h | 14 + src/vm/ecalllist.h | 4 + src/vm/mscorlib.cpp | 1 + 37 files changed, 1850 insertions(+), 258 deletions(-) create mode 100644 src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.ICU.cs create mode 100644 src/mscorlib/src/System/Globalization/CompareInfo.Invariant.cs create mode 100644 src/mscorlib/src/System/Globalization/GlobalizationMode.Unix.cs create mode 100644 src/mscorlib/src/System/Globalization/GlobalizationMode.Windows.cs create mode 100644 src/mscorlib/src/System/Globalization/GlobalizationMode.cs create mode 100644 src/vm/clrconfignative.cpp create mode 100644 src/vm/clrconfignative.h diff --git a/src/corefx/System.Globalization.Native/icushim.cpp b/src/corefx/System.Globalization.Native/icushim.cpp index 1c2187b..f8f6dd3 100644 --- a/src/corefx/System.Globalization.Native/icushim.cpp +++ b/src/corefx/System.Globalization.Native/icushim.cpp @@ -170,9 +170,11 @@ bool FindLibWithMajorMinorSubVersion(int* majorVer, int* minorVer, int* subVer) return false; } -// This function is ran at the end of dlopen for the current shared library -__attribute__((constructor)) -void InitializeICUShim() +// GlobalizationNative_LoadICU +// This method get called from the managed side during the globalization initialization. +// This method shouldn't get called at all if we are running in globalization invariant mode +// return 0 if failed to load ICU and 1 otherwise +extern "C" int32_t GlobalizationNative_LoadICU() { int majorVer = -1; int minorVer = -1; @@ -185,8 +187,7 @@ void InitializeICUShim() !FindLibWithMajorVersion(&majorVer)) { // No usable ICU version found - fprintf(stderr, "No usable version of the ICU libraries was found\n"); - abort(); + return 0; } char symbolName[128]; @@ -211,8 +212,7 @@ void InitializeICUShim() sprintf(symbolName, "u_strlen%s", symbolVersion); if (dlsym(libicuuc, symbolName) == nullptr) { - fprintf(stderr, "ICU libraries use unknown symbol versioning\n"); - abort(); + return 0; } } } @@ -227,6 +227,8 @@ void InitializeICUShim() FOR_ALL_ICU_FUNCTIONS #undef PER_FUNCTION_BLOCK + + return 1; } __attribute__((destructor)) diff --git a/src/mscorlib/System.Private.CoreLib.csproj b/src/mscorlib/System.Private.CoreLib.csproj index d8382b2..be0b410 100644 --- a/src/mscorlib/System.Private.CoreLib.csproj +++ b/src/mscorlib/System.Private.CoreLib.csproj @@ -560,6 +560,7 @@ + @@ -572,6 +573,7 @@ + @@ -613,6 +615,7 @@ + @@ -620,6 +623,7 @@ + @@ -635,6 +639,7 @@ + diff --git a/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.Collation.cs b/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.Collation.cs index 79aedd7..5cd2ee5 100644 --- a/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.Collation.cs +++ b/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.Collation.cs @@ -60,6 +60,8 @@ internal static partial class Interop protected override bool ReleaseHandle() { + Debug.Assert(!GlobalizationMode.Invariant); + CloseSortHandle(handle); SetHandle(IntPtr.Zero); return true; diff --git a/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.ICU.cs b/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.ICU.cs new file mode 100644 index 0000000..c690884 --- /dev/null +++ b/src/mscorlib/src/Interop/Unix/System.Globalization.Native/Interop.ICU.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +internal static partial class Interop +{ + internal static partial class GlobalizationInterop + { + [DllImport(Libraries.GlobalizationInterop, EntryPoint = "GlobalizationNative_LoadICU")] + internal static extern int LoadICU(); + } +} diff --git a/src/mscorlib/src/SR.cs b/src/mscorlib/src/SR.cs index c11743a..7ef57ec 100644 --- a/src/mscorlib/src/SR.cs +++ b/src/mscorlib/src/SR.cs @@ -186,16 +186,31 @@ internal static class SR get { return Environment.GetResourceString("Argument_EmptyDecString"); } } + public static string Argument_IdnBadBidi + { + get { return Environment.GetResourceString("Argument_IdnBadBidi"); } + } + public static string Argument_IdnBadLabelSize { get { return Environment.GetResourceString("Argument_IdnBadLabelSize"); } } + public static string Argument_IdnBadNameSize + { + get { return Environment.GetResourceString("Argument_IdnBadNameSize"); } + } + public static string Argument_IdnBadPunycode { get { return Environment.GetResourceString("Argument_IdnBadPunycode"); } } + public static string Argument_IdnBadStd3 + { + get { return Environment.GetResourceString("Argument_IdnBadStd3"); } + } + public static string Argument_IdnIllegalName { get { return Environment.GetResourceString("Argument_IdnIllegalName"); } diff --git a/src/mscorlib/src/System/AppDomainSetup.cs b/src/mscorlib/src/System/AppDomainSetup.cs index a8a1d09..34e6cb6 100644 --- a/src/mscorlib/src/System/AppDomainSetup.cs +++ b/src/mscorlib/src/System/AppDomainSetup.cs @@ -268,7 +268,7 @@ namespace System #if !PLATFORM_UNIX trim = 8; #else - // For Unix platform, trim the first 7 charcaters only. + // For Unix platform, trim the first 7 characters only. // Trimming the first 8 characters will cause // the root path separator to be trimmed away, // and the absolute local path becomes a relative local path. diff --git a/src/mscorlib/src/System/CLRConfig.cs b/src/mscorlib/src/System/CLRConfig.cs index 98fe6f3..d97922f 100644 --- a/src/mscorlib/src/System/CLRConfig.cs +++ b/src/mscorlib/src/System/CLRConfig.cs @@ -9,6 +9,21 @@ using System.Security; namespace System { + // CLRConfig is mainly reading the config switch values. this is used when we cannot use the AppContext class + // one example, is using the context switch in the globalization code which require to read the switch very + // early even before the appdomain get initialized. + // In general AppContext should be used instead of CLRConfig if there is no reason prevent that. + internal class CLRConfig + { + internal static bool GetBoolValue(string switchName) + { + return GetConfigBoolValue(switchName); + } + + [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)] + [SuppressUnmanagedCodeSecurity] + private extern static bool GetConfigBoolValue(string configSwitchName); + } } // namespace System // file CLRConfig diff --git a/src/mscorlib/src/System/Globalization/CalendarData.Unix.cs b/src/mscorlib/src/System/Globalization/CalendarData.Unix.cs index 19c81f1..319f66a 100644 --- a/src/mscorlib/src/System/Globalization/CalendarData.Unix.cs +++ b/src/mscorlib/src/System/Globalization/CalendarData.Unix.cs @@ -66,6 +66,8 @@ namespace System.Globalization // Call native side to figure out which calendars are allowed internal static int GetCalendars(string localeName, bool useUserOverride, CalendarId[] calendars) { + Debug.Assert(!GlobalizationMode.Invariant); + // NOTE: there are no 'user overrides' on Linux int count = Interop.GlobalizationInterop.GetCalendars(localeName, calendars, calendars.Length); @@ -88,6 +90,8 @@ namespace System.Globalization private static bool GetCalendarInfo(string localeName, CalendarId calendarId, CalendarDataType dataType, out string calendarString) { + Debug.Assert(!GlobalizationMode.Invariant); + return Interop.CallStringMethod( (locale, calId, type, stringBuilder) => Interop.GlobalizationInterop.GetCalendarInfo( diff --git a/src/mscorlib/src/System/Globalization/CalendarData.Windows.cs b/src/mscorlib/src/System/Globalization/CalendarData.Windows.cs index 4565dff..206d077 100644 --- a/src/mscorlib/src/System/Globalization/CalendarData.Windows.cs +++ b/src/mscorlib/src/System/Globalization/CalendarData.Windows.cs @@ -22,6 +22,8 @@ namespace System.Globalization { private bool LoadCalendarDataFromSystem(String localeName, CalendarId calendarId) { + Debug.Assert(!GlobalizationMode.Invariant); + bool ret = true; uint useOverrides = this.bUseUserOverrides ? 0 : CAL_NOUSEROVERRIDE; @@ -116,6 +118,11 @@ namespace System.Globalization // Get native two digit year max internal static int GetTwoDigitYearMax(CalendarId calendarId) { + if (GlobalizationMode.Invariant) + { + return Invariant.iTwoDigitYearMax; + } + int twoDigitYearMax = -1; if (!CallGetCalendarInfoEx(null, calendarId, (uint)CAL_ITWODIGITYEARMAX, out twoDigitYearMax)) @@ -129,6 +136,8 @@ namespace System.Globalization // Call native side to figure out which calendars are allowed internal static int GetCalendars(String localeName, bool useUserOverride, CalendarId[] calendars) { + Debug.Assert(!GlobalizationMode.Invariant); + EnumCalendarsData data = new EnumCalendarsData(); data.userOverride = 0; data.calendars = new IntList(); @@ -173,6 +182,8 @@ namespace System.Globalization private static bool SystemSupportsTaiwaneseCalendar() { + Debug.Assert(!GlobalizationMode.Invariant); + string data; // Taiwanese calendar get listed as one of the optional zh-TW calendars only when having zh-TW UI return CallGetCalendarInfoEx("zh-TW", CalendarId.TAIWAN, CAL_SCALNAME, out data); @@ -474,6 +485,8 @@ namespace System.Globalization private static unsafe String GetUserDefaultLocaleName() { + Debug.Assert(!GlobalizationMode.Invariant); + const int LOCALE_NAME_MAX_LENGTH = 85; const uint LOCALE_SNAME = 0x0000005c; const string LOCALE_NAME_USER_DEFAULT = null; diff --git a/src/mscorlib/src/System/Globalization/CalendarData.cs b/src/mscorlib/src/System/Globalization/CalendarData.cs index 3a1e827..0991149 100644 --- a/src/mscorlib/src/System/Globalization/CalendarData.cs +++ b/src/mscorlib/src/System/Globalization/CalendarData.cs @@ -101,6 +101,8 @@ namespace System.Globalization { this.bUseUserOverrides = bUseUserOverrides; + Debug.Assert(!GlobalizationMode.Invariant); + if (!LoadCalendarDataFromSystem(localeName, calendarId)) { Debug.Assert(false, "[CalendarData] LoadCalendarDataFromSystem call isn't expected to fail for calendar " + calendarId + " locale " + localeName); diff --git a/src/mscorlib/src/System/Globalization/CompareInfo.Invariant.cs b/src/mscorlib/src/System/Globalization/CompareInfo.Invariant.cs new file mode 100644 index 0000000..2a20de7 --- /dev/null +++ b/src/mscorlib/src/System/Globalization/CompareInfo.Invariant.cs @@ -0,0 +1,238 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Diagnostics.Contracts; + +namespace System.Globalization +{ + public partial class CompareInfo + { + internal static unsafe int InvariantIndexOf(string source, string value, int startIndex, int count, bool ignoreCase) + { + Debug.Assert(source != null); + Debug.Assert(value != null); + Debug.Assert(startIndex >= 0 && startIndex < source.Length); + + fixed (char* pSource = source) fixed (char* pValue = value) + { + char* pSrc = &pSource[startIndex]; + int index = InvariantFindString(pSrc, count, pValue, value.Length, ignoreCase, start : true); + if (index >= 0) + { + return index + startIndex; + } + return -1; + } + } + + internal static unsafe int InvariantLastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase) + { + Debug.Assert(source != null); + Debug.Assert(value != null); + Debug.Assert(startIndex >= 0 && startIndex < source.Length); + + fixed (char* pSource = source) fixed (char* pValue = value) + { + char* pSrc = &pSource[startIndex - count + 1]; + int index = InvariantFindString(pSrc, count, pValue, value.Length, ignoreCase, start : false); + if (index >= 0) + { + return index + startIndex - count + 1; + } + return -1; + } + } + + private static unsafe int InvariantFindString(char* source, int sourceCount, char* value, int valueCount, bool ignoreCase, bool start) + { + int ctrSource = 0; // index value into source + int ctrValue = 0; // index value into value + char sourceChar; // Character for case lookup in source + char valueChar; // Character for case lookup in value + int lastSourceStart; + + Debug.Assert(source != null); + Debug.Assert(value != null); + Debug.Assert(sourceCount >= 0); + Debug.Assert(valueCount >= 0); + + if (valueCount == 0) + { + return start ? 0 : sourceCount - 1; + } + + if (sourceCount < valueCount) + { + return -1; + } + + if (start) + { + lastSourceStart = sourceCount - valueCount; + if (ignoreCase) + { + char firstValueChar = InvariantToUpper(value[0]); + for (ctrSource = 0; ctrSource <= lastSourceStart; ctrSource++) + { + sourceChar = InvariantToUpper(source[ctrSource]); + if (sourceChar != firstValueChar) + { + continue; + } + + for (ctrValue = 1; ctrValue < valueCount; ctrValue++) + { + sourceChar = InvariantToUpper(source[ctrSource + ctrValue]); + valueChar = InvariantToUpper(value[ctrValue]); + + if (sourceChar != valueChar) + { + break; + } + } + + if (ctrValue == valueCount) + { + return ctrSource; + } + } + } + else + { + char firstValueChar = value[0]; + for (ctrSource = 0; ctrSource <= lastSourceStart; ctrSource++) + { + sourceChar = source[ctrSource]; + if (sourceChar != firstValueChar) + { + continue; + } + + for (ctrValue = 1; ctrValue < valueCount; ctrValue++) + { + sourceChar = source[ctrSource + ctrValue]; + valueChar = value[ctrValue]; + + if (sourceChar != valueChar) + { + break; + } + } + + if (ctrValue == valueCount) + { + return ctrSource; + } + } + } + } + else + { + lastSourceStart = sourceCount - valueCount; + if (ignoreCase) + { + char firstValueChar = InvariantToUpper(value[0]); + for (ctrSource = lastSourceStart; ctrSource >= 0; ctrSource--) + { + sourceChar = InvariantToUpper(source[ctrSource]); + if (sourceChar != firstValueChar) + { + continue; + } + for (ctrValue = 1; ctrValue < valueCount; ctrValue++) + { + sourceChar = InvariantToUpper(source[ctrSource + ctrValue]); + valueChar = InvariantToUpper(value[ctrValue]); + + if (sourceChar != valueChar) + { + break; + } + } + + if (ctrValue == valueCount) + { + return ctrSource; + } + } + } + else + { + char firstValueChar = value[0]; + for (ctrSource = lastSourceStart; ctrSource >= 0; ctrSource--) + { + sourceChar = source[ctrSource]; + if (sourceChar != firstValueChar) + { + continue; + } + + for (ctrValue = 1; ctrValue < valueCount; ctrValue++) + { + sourceChar = source[ctrSource + ctrValue]; + valueChar = value[ctrValue]; + + if (sourceChar != valueChar) + { + break; + } + } + + if (ctrValue == valueCount) + { + return ctrSource; + } + } + } + } + + return -1; + } + + private static char InvariantToUpper(char c) + { + return (uint)(c - 'a') <= (uint)('z' - 'a') ? (char)(c - 0x20) : c; + } + + private unsafe SortKey InvariantCreateSortKey(string source, CompareOptions options) + { + if (source == null) { throw new ArgumentNullException(nameof(source)); } + Contract.EndContractBlock(); + + if ((options & ValidSortkeyCtorMaskOffFlags) != 0) + { + throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + } + + byte [] keyData; + if (source.Length == 0) + { + keyData = Array.Empty(); + } + else + { + // In the invariant mode, all string comparisons are done as ordinal so when generating the sort keys we generate it according to this fact + keyData = new byte[source.Length * sizeof(char)]; + + fixed (char* pChar = source) fixed (byte* pByte = keyData) + { + if ((options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0) + { + short *pShort = (short *) pByte; + for (int i=0; i> 24), diff --git a/src/mscorlib/src/System/Globalization/CompareInfo.Windows.cs b/src/mscorlib/src/System/Globalization/CompareInfo.Windows.cs index 0c87d9f..d20bb9f 100644 --- a/src/mscorlib/src/System/Globalization/CompareInfo.Windows.cs +++ b/src/mscorlib/src/System/Globalization/CompareInfo.Windows.cs @@ -12,24 +12,24 @@ namespace System.Globalization { public partial class CompareInfo { - internal unsafe CompareInfo(CultureInfo culture) - { - _name = culture._name; - InitSort(culture); - } - private unsafe void InitSort(CultureInfo culture) { _sortName = culture.SortName; - const uint LCMAP_SORTHANDLE = 0x20000000; - _name = culture._name; _sortName = culture.SortName; - IntPtr handle; - int ret = Interop.Kernel32.LCMapStringEx(_sortName, LCMAP_SORTHANDLE, null, 0, &handle, IntPtr.Size, null, null, IntPtr.Zero); - _sortHandle = ret > 0 ? handle : IntPtr.Zero; + if (_invariantMode) + { + _sortHandle = IntPtr.Zero; + } + else + { + const uint LCMAP_SORTHANDLE = 0x20000000; + IntPtr handle; + int ret = Interop.Kernel32.LCMapStringEx(_sortName, LCMAP_SORTHANDLE, null, 0, &handle, IntPtr.Size, null, null, IntPtr.Zero); + _sortHandle = ret > 0 ? handle : IntPtr.Zero; + } } private static unsafe int FindStringOrdinal( @@ -41,6 +41,8 @@ namespace System.Globalization int cchValue, bool bIgnoreCase) { + Debug.Assert(!GlobalizationMode.Invariant); + fixed (char* pSource = stringSource) fixed (char* pValue = value) { @@ -55,16 +57,20 @@ namespace System.Globalization } } - internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + internal static int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(source != null); Debug.Assert(value != null); return FindStringOrdinal(FIND_FROMSTART, source, startIndex, count, value, value.Length, ignoreCase); } - internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + internal static int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(source != null); Debug.Assert(value != null); @@ -73,6 +79,8 @@ namespace System.Globalization private unsafe int GetHashCodeOfStringCore(string source, CompareOptions options) { + Debug.Assert(!_invariantMode); + Debug.Assert(source != null); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -103,12 +111,16 @@ namespace System.Globalization private static unsafe int CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2) { + Debug.Assert(!GlobalizationMode.Invariant); + // Use the OS to compare and then convert the result to expected value by subtracting 2 return Interop.Kernel32.CompareStringOrdinal(string1, count1, string2, count2, true) - 2; } private unsafe int CompareString(string string1, int offset1, int length1, string string2, int offset2, int length2, CompareOptions options) { + Debug.Assert(!_invariantMode); + Debug.Assert(string1 != null); Debug.Assert(string2 != null); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -150,6 +162,8 @@ namespace System.Globalization int cchValue, int *pcchFound) { + Debug.Assert(!_invariantMode); + string localeName = _sortHandle != IntPtr.Zero ? null : _sortName; fixed (char* pLocaleName = localeName) @@ -175,6 +189,8 @@ namespace System.Globalization internal unsafe int IndexOfCore(String source, String target, int startIndex, int count, CompareOptions options, int* matchLengthPtr) { + Debug.Assert(!_invariantMode); + Debug.Assert(source != null); Debug.Assert(target != null); Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); @@ -216,6 +232,8 @@ namespace System.Globalization private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options) { + Debug.Assert(!_invariantMode); + Debug.Assert(!string.IsNullOrEmpty(source)); Debug.Assert(target != null); Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); @@ -245,6 +263,8 @@ namespace System.Globalization private unsafe bool StartsWith(string source, string prefix, CompareOptions options) { + Debug.Assert(!_invariantMode); + Debug.Assert(!string.IsNullOrEmpty(source)); Debug.Assert(!string.IsNullOrEmpty(prefix)); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -255,6 +275,8 @@ namespace System.Globalization private unsafe bool EndsWith(string source, string suffix, CompareOptions options) { + Debug.Assert(!_invariantMode); + Debug.Assert(!string.IsNullOrEmpty(source)); Debug.Assert(!string.IsNullOrEmpty(suffix)); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -352,6 +374,8 @@ namespace System.Globalization private unsafe SortKey CreateSortKey(String source, CompareOptions options) { + Debug.Assert(!_invariantMode); + if (source == null) { throw new ArgumentNullException(nameof(source)); } Contract.EndContractBlock(); @@ -397,6 +421,8 @@ namespace System.Globalization private static unsafe bool IsSortable(char* text, int length) { + Debug.Assert(!GlobalizationMode.Invariant); + return Interop.Kernel32.IsNLSDefinedString(Interop.Kernel32.COMPARE_STRING, 0, IntPtr.Zero, text, length); } @@ -440,6 +466,8 @@ namespace System.Globalization private unsafe SortVersion GetSortVersion() { + Debug.Assert(!_invariantMode); + Interop.Kernel32.NlsVersionInfoEx nlsVersion = new Interop.Kernel32.NlsVersionInfoEx(); Interop.Kernel32.GetNLSVersionEx(Interop.Kernel32.COMPARE_STRING, _sortName, &nlsVersion); return new SortVersion( diff --git a/src/mscorlib/src/System/Globalization/CompareInfo.cs b/src/mscorlib/src/System/Globalization/CompareInfo.cs index 94cfcad..285a81d 100644 --- a/src/mscorlib/src/System/Globalization/CompareInfo.cs +++ b/src/mscorlib/src/System/Globalization/CompareInfo.cs @@ -64,13 +64,23 @@ namespace System.Globalization // locale, which is what SCOMPAREINFO does. [OptionalField(VersionAdded = 2)] - private String _name; // The name used to construct this CompareInfo + private string _name; // The name used to construct this CompareInfo [NonSerialized] - private String _sortName; // The name that defines our behavior + private string _sortName; // The name that defines our behavior [OptionalField(VersionAdded = 3)] private SortVersion _sortVersion; + // _invariantMode is defined for the perf reason as accessing the instance field is faster than access the static property GlobalizationMode.Invariant + [NonSerialized] + private readonly bool _invariantMode = GlobalizationMode.Invariant; + + internal CompareInfo(CultureInfo culture) + { + _name = culture._name; + InitSort(culture); + } + /*=================================GetCompareInfo========================== **Action: Get the CompareInfo constructed from the data table in the specified assembly for the specified culture. ** Warning: The assembly versioning mechanism is dead! @@ -111,7 +121,7 @@ namespace System.Globalization ** ArgumentException if name is invalid. ============================================================================*/ // Assembly constructor should be deprecated, we don't act on the assembly information any more - public static CompareInfo GetCompareInfo(String name, Assembly assembly) + public static CompareInfo GetCompareInfo(string name, Assembly assembly) { if (name == null || assembly == null) { @@ -157,7 +167,7 @@ namespace System.Globalization ** ArgumentException if name is invalid. ============================================================================*/ - public static CompareInfo GetCompareInfo(String name) + public static CompareInfo GetCompareInfo(string name) { if (name == null) { @@ -170,6 +180,10 @@ namespace System.Globalization public static unsafe bool IsSortable(char ch) { + if (GlobalizationMode.Invariant) + { + return true; + } char *pChar = &ch; return IsSortable(pChar, 1); } @@ -182,11 +196,16 @@ namespace System.Globalization throw new ArgumentNullException(nameof(text)); } - if (0 == text.Length) + if (text.Length == 0) { // A zero length string is not invalid, but it is also not sortable. return (false); } + + if (GlobalizationMode.Invariant) + { + return true; + } fixed (char *pChar = text) { @@ -236,7 +255,7 @@ namespace System.Globalization // //////////////////////////////////////////////////////////////////////// - public virtual String Name + public virtual string Name { get { @@ -261,12 +280,12 @@ namespace System.Globalization // //////////////////////////////////////////////////////////////////////// - public virtual int Compare(String string1, String string2) + public virtual int Compare(string string1, string string2) { return (Compare(string1, string2, CompareOptions.None)); } - public unsafe virtual int Compare(String string1, String string2, CompareOptions options) + public unsafe virtual int Compare(string string1, string string2, CompareOptions options) { if (options == CompareOptions.OrdinalIgnoreCase) { @@ -280,6 +299,7 @@ namespace System.Globalization { throw new ArgumentException(SR.Argument_CompareOptionOrdinal, nameof(options)); } + return String.CompareOrdinal(string1, string2); } @@ -303,6 +323,14 @@ namespace System.Globalization return (1); // non-null > null } + if (_invariantMode) + { + if ((options & CompareOptions.IgnoreCase) != 0) + return CompareOrdinalIgnoreCase(string1, 0, string1.Length, string2, 0, string2.Length); + + return String.CompareOrdinal(string1, string2); + } + return CompareString(string1, 0, string1.Length, string2, 0, string2.Length, options); } @@ -320,26 +348,26 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////// - public unsafe virtual int Compare(String string1, int offset1, int length1, String string2, int offset2, int length2) + public unsafe virtual int Compare(string string1, int offset1, int length1, string string2, int offset2, int length2) { return Compare(string1, offset1, length1, string2, offset2, length2, 0); } - public unsafe virtual int Compare(String string1, int offset1, String string2, int offset2, CompareOptions options) + public virtual int Compare(string string1, int offset1, string string2, int offset2, CompareOptions options) { return Compare(string1, offset1, string1 == null ? 0 : string1.Length - offset1, string2, offset2, string2 == null ? 0 : string2.Length - offset2, options); } - public unsafe virtual int Compare(String string1, int offset1, String string2, int offset2) + public virtual int Compare(string string1, int offset1, string string2, int offset2) { return Compare(string1, offset1, string2, offset2, 0); } - public unsafe virtual int Compare(String string1, int offset1, int length1, String string2, int offset2, int length2, CompareOptions options) + public virtual int Compare(string string1, int offset1, int length1, string string2, int offset2, int length2, CompareOptions options) { if (options == CompareOptions.OrdinalIgnoreCase) { @@ -400,6 +428,15 @@ namespace System.Globalization return CompareOrdinal(string1, offset1, length1, string2, offset2, length2); } + + if (_invariantMode) + { + if ((options & CompareOptions.IgnoreCase) != 0) + return CompareOrdinalIgnoreCase(string1, offset1, length1, string2, offset2, length2); + + return CompareOrdinal(string1, offset1, length1, string2, offset2, length2); + } + return CompareString(string1, offset1, length1, string2, offset2, length2, options); @@ -417,7 +454,7 @@ namespace System.Globalization } // - // CompareOrdinalIgnoreCase compare two string oridnally with ignoring the case. + // CompareOrdinalIgnoreCase compare two string ordinally with ignoring the case. // it assumes the strings are Ascii string till we hit non Ascii character in strA or strB and then we continue the comparison by // calling the OS. // @@ -434,7 +471,10 @@ namespace System.Globalization char* a = ap + indexA; char* b = bp + indexB; - while (length != 0 && (*a <= 0x80) && (*b <= 0x80)) + // in InvariantMode we support all range and not only the ascii characters. + char maxChar = (char) (GlobalizationMode.Invariant ? 0xFFFF : 0x80); + + while (length != 0 && (*a <= maxChar) && (*b <= maxChar)) { int charA = *a; int charB = *b; @@ -450,7 +490,7 @@ namespace System.Globalization if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20; if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20; - //Return the (case-insensitive) difference between them. + // Return the (case-insensitive) difference between them. if (charA != charB) return charA - charB; @@ -462,6 +502,8 @@ namespace System.Globalization if (length == 0) return lengthA - lengthB; + Debug.Assert(!GlobalizationMode.Invariant); + range -= length; return CompareStringOrdinalIgnoreCase(a, lengthA - range, b, lengthB - range); @@ -476,7 +518,7 @@ namespace System.Globalization // String.Empty, true is returned. // //////////////////////////////////////////////////////////////////////// - public unsafe virtual bool IsPrefix(String source, String prefix, CompareOptions options) + public virtual bool IsPrefix(string source, string prefix, CompareOptions options) { if (source == null || prefix == null) { @@ -510,10 +552,15 @@ namespace System.Globalization throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); } + if (_invariantMode) + { + return source.StartsWith(prefix, (options & CompareOptions.IgnoreCase) != 0 ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); + } + return StartsWith(source, prefix, options); } - public virtual bool IsPrefix(String source, String prefix) + public virtual bool IsPrefix(string source, string prefix) { return (IsPrefix(source, prefix, 0)); } @@ -526,7 +573,7 @@ namespace System.Globalization // String.Empty, true is returned. // //////////////////////////////////////////////////////////////////////// - public unsafe virtual bool IsSuffix(String source, String suffix, CompareOptions options) + public virtual bool IsSuffix(string source, string suffix, CompareOptions options) { if (source == null || suffix == null) { @@ -560,11 +607,16 @@ namespace System.Globalization throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); } + if (_invariantMode) + { + return source.EndsWith(suffix, (options & CompareOptions.IgnoreCase) != 0 ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); + } + return EndsWith(source, suffix, options); } - public virtual bool IsSuffix(String source, String suffix) + public virtual bool IsSuffix(string source, string suffix) { return (IsSuffix(source, suffix, 0)); } @@ -583,7 +635,7 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////// - public unsafe virtual int IndexOf(String source, char value) + public virtual int IndexOf(string source, char value) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -593,7 +645,7 @@ namespace System.Globalization } - public unsafe virtual int IndexOf(String source, String value) + public virtual int IndexOf(string source, string value) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -603,7 +655,7 @@ namespace System.Globalization } - public unsafe virtual int IndexOf(String source, char value, CompareOptions options) + public virtual int IndexOf(string source, char value, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -613,7 +665,7 @@ namespace System.Globalization } - public unsafe virtual int IndexOf(String source, String value, CompareOptions options) + public virtual int IndexOf(string source, string value, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -622,7 +674,7 @@ namespace System.Globalization return IndexOf(source, value, 0, source.Length, options); } - public unsafe virtual int IndexOf(String source, char value, int startIndex) + public virtual int IndexOf(string source, char value, int startIndex) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -631,7 +683,7 @@ namespace System.Globalization return IndexOf(source, value, startIndex, source.Length - startIndex, CompareOptions.None); } - public unsafe virtual int IndexOf(String source, String value, int startIndex) + public virtual int IndexOf(string source, string value, int startIndex) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -640,7 +692,7 @@ namespace System.Globalization return IndexOf(source, value, startIndex, source.Length - startIndex, CompareOptions.None); } - public unsafe virtual int IndexOf(String source, char value, int startIndex, CompareOptions options) + public virtual int IndexOf(string source, char value, int startIndex, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -650,7 +702,7 @@ namespace System.Globalization } - public unsafe virtual int IndexOf(String source, String value, int startIndex, CompareOptions options) + public virtual int IndexOf(string source, string value, int startIndex, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -660,18 +712,18 @@ namespace System.Globalization } - public unsafe virtual int IndexOf(String source, char value, int startIndex, int count) + public virtual int IndexOf(string source, char value, int startIndex, int count) { return IndexOf(source, value, startIndex, count, CompareOptions.None); } - public unsafe virtual int IndexOf(String source, String value, int startIndex, int count) + public virtual int IndexOf(string source, string value, int startIndex, int count) { return IndexOf(source, value, startIndex, count, CompareOptions.None); } - public unsafe virtual int IndexOf(String source, char value, int startIndex, int count, CompareOptions options) + public unsafe virtual int IndexOf(string source, char value, int startIndex, int count, CompareOptions options) { // Validate inputs if (source == null) @@ -693,12 +745,15 @@ namespace System.Globalization // Ordinal can't be selected with other flags if ((options & ValidIndexMaskOffFlags) != 0 && (options != CompareOptions.Ordinal)) throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + + if (_invariantMode) + return IndexOfOrdinal(source, new string(value, 1), startIndex, count, ignoreCase: (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); return IndexOfCore(source, new string(value, 1), startIndex, count, options, null); } - public unsafe virtual int IndexOf(String source, String value, int startIndex, int count, CompareOptions options) + public unsafe virtual int IndexOf(string source, string value, int startIndex, int count, CompareOptions options) { // Validate inputs if (source == null) @@ -741,9 +796,22 @@ namespace System.Globalization if ((options & ValidIndexMaskOffFlags) != 0 && (options != CompareOptions.Ordinal)) throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + if (_invariantMode) + return IndexOfOrdinal(source, value, startIndex, count, ignoreCase: (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); + return IndexOfCore(source, value, startIndex, count, options, null); } + internal int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + { + if (_invariantMode) + { + return InvariantIndexOf(source, value, startIndex, count, ignoreCase); + } + + return IndexOfOrdinalCore(source, value, startIndex, count, ignoreCase); + } + //////////////////////////////////////////////////////////////////////// // // LastIndexOf @@ -758,19 +826,18 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////// - public unsafe virtual int LastIndexOf(String source, char value) + public virtual int LastIndexOf(String source, char value) { if (source == null) throw new ArgumentNullException(nameof(source)); Contract.EndContractBlock(); // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, - source.Length, CompareOptions.None); + return LastIndexOf(source, value, source.Length - 1, source.Length, CompareOptions.None); } - public virtual int LastIndexOf(String source, String value) + public virtual int LastIndexOf(string source, string value) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -782,7 +849,7 @@ namespace System.Globalization } - public virtual int LastIndexOf(String source, char value, CompareOptions options) + public virtual int LastIndexOf(string source, char value, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); @@ -793,53 +860,52 @@ namespace System.Globalization source.Length, options); } - public unsafe virtual int LastIndexOf(String source, String value, CompareOptions options) + public virtual int LastIndexOf(string source, string value, CompareOptions options) { if (source == null) throw new ArgumentNullException(nameof(source)); Contract.EndContractBlock(); // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, - source.Length, options); + return LastIndexOf(source, value, source.Length - 1, source.Length, options); } - public unsafe virtual int LastIndexOf(String source, char value, int startIndex) + public virtual int LastIndexOf(string source, char value, int startIndex) { return LastIndexOf(source, value, startIndex, startIndex + 1, CompareOptions.None); } - public unsafe virtual int LastIndexOf(String source, String value, int startIndex) + public virtual int LastIndexOf(string source, string value, int startIndex) { return LastIndexOf(source, value, startIndex, startIndex + 1, CompareOptions.None); } - public unsafe virtual int LastIndexOf(String source, char value, int startIndex, CompareOptions options) + public virtual int LastIndexOf(string source, char value, int startIndex, CompareOptions options) { return LastIndexOf(source, value, startIndex, startIndex + 1, options); } - public unsafe virtual int LastIndexOf(String source, String value, int startIndex, CompareOptions options) + public virtual int LastIndexOf(string source, string value, int startIndex, CompareOptions options) { return LastIndexOf(source, value, startIndex, startIndex + 1, options); } - public unsafe virtual int LastIndexOf(String source, char value, int startIndex, int count) + public virtual int LastIndexOf(string source, char value, int startIndex, int count) { return LastIndexOf(source, value, startIndex, count, CompareOptions.None); } - public unsafe virtual int LastIndexOf(String source, String value, int startIndex, int count) + public virtual int LastIndexOf(string source, string value, int startIndex, int count) { return LastIndexOf(source, value, startIndex, count, CompareOptions.None); } - public unsafe virtual int LastIndexOf(String source, char value, int startIndex, int count, CompareOptions options) + public virtual int LastIndexOf(string source, char value, int startIndex, int count, CompareOptions options) { // Verify Arguments if (source == null) @@ -878,11 +944,14 @@ namespace System.Globalization return source.LastIndexOf(value.ToString(), startIndex, count, StringComparison.OrdinalIgnoreCase); } + if (_invariantMode) + return InvariantLastIndexOf(source, new string(value, 1), startIndex, count, (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); + return LastIndexOfCore(source, value.ToString(), startIndex, count, options); } - public unsafe virtual int LastIndexOf(String source, String value, int startIndex, int count, CompareOptions options) + public virtual int LastIndexOf(string source, string value, int startIndex, int count, CompareOptions options) { // Verify Arguments if (source == null) @@ -927,9 +996,22 @@ namespace System.Globalization return LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); } + if (_invariantMode) + return InvariantLastIndexOf(source, value, startIndex, count, (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); + return LastIndexOfCore(source, value, startIndex, count, options); } + internal int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) + { + if (_invariantMode) + { + return InvariantLastIndexOf(source, value, startIndex, count, ignoreCase); + } + + return LastIndexOfOrdinalCore(source, value, startIndex, count, ignoreCase); + } + //////////////////////////////////////////////////////////////////////// // // GetSortKey @@ -937,14 +1019,20 @@ namespace System.Globalization // Gets the SortKey for the given string with the given options. // //////////////////////////////////////////////////////////////////////// - public unsafe virtual SortKey GetSortKey(String source, CompareOptions options) + public virtual SortKey GetSortKey(string source, CompareOptions options) { + if (_invariantMode) + return InvariantCreateSortKey(source, options); + return CreateSortKey(source, options); } - public unsafe virtual SortKey GetSortKey(String source) + public virtual SortKey GetSortKey(string source) { + if (_invariantMode) + return InvariantCreateSortKey(source, CompareOptions.None); + return CreateSortKey(source, CompareOptions.None); } @@ -1065,7 +1153,7 @@ namespace System.Globalization // CompareInfo. // //////////////////////////////////////////////////////////////////////// - public override String ToString() + public override string ToString() { return ("CompareInfo - " + this.Name); } @@ -1076,7 +1164,18 @@ namespace System.Globalization { if (_sortVersion == null) { - _sortVersion = GetSortVersion(); + if (_invariantMode) + { + _sortVersion = new SortVersion(0, CultureInfo.LOCALE_INVARIANT, new Guid(0, 0, 0, 0, 0, 0, 0, + (byte) (CultureInfo.LOCALE_INVARIANT >> 24), + (byte) ((CultureInfo.LOCALE_INVARIANT & 0x00FF0000) >> 16), + (byte) ((CultureInfo.LOCALE_INVARIANT & 0x0000FF00) >> 8), + (byte) (CultureInfo.LOCALE_INVARIANT & 0xFF))); + } + else + { + _sortVersion = GetSortVersion(); + } } return _sortVersion; diff --git a/src/mscorlib/src/System/Globalization/CultureData.Unix.cs b/src/mscorlib/src/System/Globalization/CultureData.Unix.cs index 210f772..4f685de 100644 --- a/src/mscorlib/src/System/Globalization/CultureData.Unix.cs +++ b/src/mscorlib/src/System/Globalization/CultureData.Unix.cs @@ -27,7 +27,9 @@ namespace System.Globalization private unsafe bool InitCultureData() { Debug.Assert(_sRealName != null); - + + Debug.Assert(!GlobalizationMode.Invariant); + string alternateSortName = string.Empty; string realNameBuffer = _sRealName; @@ -120,6 +122,8 @@ namespace System.Globalization private string GetLocaleInfo(LocaleStringData type) { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(_sWindowsName != null, "[CultureData.GetLocaleInfo] Expected _sWindowsName to be populated already"); return GetLocaleInfo(_sWindowsName, type); } @@ -153,6 +157,8 @@ namespace System.Globalization private int GetLocaleInfo(LocaleNumberData type) { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(_sWindowsName != null, "[CultureData.GetLocaleInfo(LocaleNumberData)] Expected _sWindowsName to be populated already"); switch (type) @@ -296,11 +302,15 @@ namespace System.Globalization private static string LCIDToLocaleName(int culture) { + Debug.Assert(!GlobalizationMode.Invariant); + return LocaleData.LCIDToLocaleName(culture); } private static int LocaleNameToLCID(string cultureName) { + Debug.Assert(!GlobalizationMode.Invariant); + int lcid = LocaleData.GetLocaleDataNumericPart(cultureName, LocaleDataParts.Lcid); return lcid == -1 ? CultureInfo.LOCALE_CUSTOM_UNSPECIFIED : lcid; } @@ -349,6 +359,8 @@ namespace System.Globalization private static CultureInfo[] EnumCultures(CultureTypes types) { + Debug.Assert(!GlobalizationMode.Invariant); + if ((types & (CultureTypes.NeutralCultures | CultureTypes.SpecificCultures)) == 0) { return Array.Empty(); diff --git a/src/mscorlib/src/System/Globalization/CultureData.Windows.cs b/src/mscorlib/src/System/Globalization/CultureData.Windows.cs index 6ce5293..6d2678b 100644 --- a/src/mscorlib/src/System/Globalization/CultureData.Windows.cs +++ b/src/mscorlib/src/System/Globalization/CultureData.Windows.cs @@ -62,7 +62,7 @@ namespace System.Globalization /// private unsafe bool InitCultureData() { - const int LOCALE_NAME_MAX_LENGTH = 85; + Debug.Assert(!GlobalizationMode.Invariant); const uint LOCALE_ILANGUAGE = 0x00000001; const uint LOCALE_INEUTRAL = 0x00000071; @@ -193,6 +193,8 @@ namespace System.Globalization internal static unsafe int GetLocaleInfoEx(string lpLocaleName, uint lcType, char* lpLCData, int cchData) { + Debug.Assert(!GlobalizationMode.Invariant); + return Interop.Kernel32.GetLocaleInfoEx(lpLocaleName, lcType, (IntPtr)lpLCData, cchData); } @@ -665,11 +667,15 @@ namespace System.Globalization private static int LocaleNameToLCID(string cultureName) { + Debug.Assert(!GlobalizationMode.Invariant); + return Interop.Kernel32.LocaleNameToLCID(cultureName, Interop.Kernel32.LOCALE_ALLOW_NEUTRAL_NAMES); } private static unsafe string LCIDToLocaleName(int culture) { + Debug.Assert(!GlobalizationMode.Invariant); + char *pBuffer = stackalloc char[Interop.Kernel32.LOCALE_NAME_MAX_LENGTH + 1]; // +1 for the null termination int length = Interop.Kernel32.LCIDToLocaleName(culture, pBuffer, Interop.Kernel32.LOCALE_NAME_MAX_LENGTH + 1, Interop.Kernel32.LOCALE_ALLOW_NEUTRAL_NAMES); @@ -718,6 +724,8 @@ namespace System.Globalization private static CultureInfo[] EnumCultures(CultureTypes types) { + Debug.Assert(!GlobalizationMode.Invariant); + uint flags = 0; #pragma warning disable 618 diff --git a/src/mscorlib/src/System/Globalization/CultureData.cs b/src/mscorlib/src/System/Globalization/CultureData.cs index 8e17654..0dcebf4 100644 --- a/src/mscorlib/src/System/Globalization/CultureData.cs +++ b/src/mscorlib/src/System/Globalization/CultureData.cs @@ -53,6 +53,7 @@ namespace System.Globalization // internal partial class CultureData { + private const int LOCALE_NAME_MAX_LENGTH = 85; private const int undef = -1; // Override flag @@ -159,7 +160,7 @@ namespace System.Globalization // Region Name to Culture Name mapping table // (In future would be nice to be in registry or something) - //Using a property so we avoid creating the dictionary untill we need it + //Using a property so we avoid creating the dictionary until we need it private static StringStringDictionary RegionNames { get @@ -429,10 +430,125 @@ namespace System.Globalization types &= (~CultureTypes.WindowsOnlyCultures); } + if (GlobalizationMode.Invariant) + { + // in invariant mode we always return invariant culture only from the enumeration + return new CultureInfo[1] { new CultureInfo("") }; + } + #pragma warning restore 618 return EnumCultures(types); } + private static CultureData CreateCultureWithInvariantData() + { + // Make a new culturedata + CultureData invariant = new CultureData(); + + // Basics + // Note that we override the resources since this IS NOT supposed to change (by definition) + invariant._bUseOverrides = false; + invariant._sRealName = ""; // Name you passed in (ie: en-US, en, or de-DE_phoneb) + invariant._sWindowsName = ""; // Name OS thinks the object is (ie: de-DE_phoneb, or en-US (even if en was passed in)) + + // Identity + invariant._sName = ""; // locale name (ie: en-us) + invariant._sParent = ""; // Parent name (which may be a custom locale/culture) + invariant._bNeutral = false; // Flags for the culture (ie: neutral or not right now) + invariant._sEnglishDisplayName = "Invariant Language (Invariant Country)"; // English pretty name for this locale + invariant._sNativeDisplayName = "Invariant Language (Invariant Country)"; // Native pretty name for this locale + invariant._sSpecificCulture = ""; // The culture name to be used in CultureInfo.CreateSpecificCulture() + + // Language + invariant._sISO639Language = "iv"; // ISO 639 Language Name + invariant._sISO639Language2 = "ivl"; // 3 char ISO 639 lang name 2 + invariant._sLocalizedLanguage = "Invariant Language"; // Display name for this Language + invariant._sEnglishLanguage = "Invariant Language"; // English name for this language + invariant._sNativeLanguage = "Invariant Language"; // Native name of this language + invariant._sAbbrevLang = "IVL"; // abbreviated language name (Windows Language Name) + invariant._sConsoleFallbackName = ""; // The culture name for the console fallback UI culture + invariant._iInputLanguageHandle = 0x07F; // input language handle + + // Region + invariant._sRegionName = "IV"; // (RegionInfo) + invariant._sEnglishCountry = "Invariant Country"; // english country name (RegionInfo) + invariant._sNativeCountry = "Invariant Country"; // native country name (Windows Only) + invariant._sISO3166CountryName = "IV"; // (RegionInfo), ie: US + invariant._sISO3166CountryName2 = "ivc"; // 3 char ISO 3166 country name 2 2(RegionInfo) + invariant._iGeoId = 244; // GeoId (Windows Only) + + // Numbers + invariant._sPositiveSign = "+"; // positive sign + invariant._sNegativeSign = "-"; // negative sign + invariant._iDigits = 2; // number of fractional digits + invariant._iNegativeNumber = 1; // negative number format + invariant._waGrouping = new int[] { 3 }; // grouping of digits + invariant._sDecimalSeparator = "."; // decimal separator + invariant._sThousandSeparator = ","; // thousands separator + invariant._sNaN = "NaN"; // Not a Number + invariant._sPositiveInfinity = "Infinity"; // + Infinity + invariant._sNegativeInfinity = "-Infinity"; // - Infinity + + // Percent + invariant._iNegativePercent = 0; // Negative Percent (0-3) + invariant._iPositivePercent = 0; // Positive Percent (0-11) + invariant._sPercent = "%"; // Percent (%) symbol + invariant._sPerMille = "\x2030"; // PerMille symbol + + // Currency + invariant._sCurrency = "\x00a4"; // local monetary symbol: for international monetary symbol + invariant._sIntlMonetarySymbol = "XDR"; // international monetary symbol (RegionInfo) + invariant._sEnglishCurrency = "International Monetary Fund"; // English name for this currency (Windows Only) + invariant._sNativeCurrency = "International Monetary Fund"; // Native name for this currency (Windows Only) + invariant._iCurrencyDigits = 2; // # local monetary fractional digits + invariant._iCurrency = 0; // positive currency format + invariant._iNegativeCurrency = 0; // negative currency format + invariant._waMonetaryGrouping = new int[] { 3 }; // monetary grouping of digits + invariant._sMonetaryDecimal = "."; // monetary decimal separator + invariant._sMonetaryThousand = ","; // monetary thousands separator + + // Misc + invariant._iMeasure = 0; // system of measurement 0=metric, 1=US (RegionInfo) + invariant._sListSeparator = ","; // list separator + + // Time + invariant._sTimeSeparator = ":"; + invariant._sAM1159 = "AM"; // AM designator + invariant._sPM2359 = "PM"; // PM designator + invariant._saLongTimes = new String[] { "HH:mm:ss" }; // time format + invariant._saShortTimes = new String[] { "HH:mm", "hh:mm tt", "H:mm", "h:mm tt" }; // short time format + invariant._saDurationFormats = new String[] { "HH:mm:ss" }; // time duration format + + + // Calendar specific data + invariant._iFirstDayOfWeek = 0; // first day of week + invariant._iFirstWeekOfYear = 0; // first week of year + invariant._waCalendars = new CalendarId[] { CalendarId.GREGORIAN }; // all available calendar type(s). The first one is the default calendar + + // Store for specific data about each calendar + invariant._calendars = new CalendarData[CalendarData.MAX_CALENDARS]; + invariant._calendars[0] = CalendarData.Invariant; + + // Text information + invariant._iReadingLayout = 0; + + // These are desktop only, not coreclr + + invariant._iLanguage = CultureInfo.LOCALE_INVARIANT; // locale ID (0409) - NO sort information + invariant._iDefaultAnsiCodePage = 1252; // default ansi code page ID (ACP) + invariant._iDefaultOemCodePage = 437; // default oem code page ID (OCP or OEM) + invariant._iDefaultMacCodePage = 10000; // default macintosh code page + invariant._iDefaultEbcdicCodePage = 037; // default EBCDIC code page + + if (GlobalizationMode.Invariant) + { + invariant._sLocalizedDisplayName = invariant._sNativeDisplayName; + invariant._sLocalizedCountry = invariant._sNativeCountry; + } + + return invariant; + } + ///////////////////////////////////////////////////////////////////////// // Build our invariant information // @@ -444,104 +560,8 @@ namespace System.Globalization { if (s_Invariant == null) { - // Make a new culturedata - CultureData invariant = new CultureData(); - - // Basics - // Note that we override the resources since this IS NOT supposed to change (by definition) - invariant._bUseOverrides = false; - invariant._sRealName = ""; // Name you passed in (ie: en-US, en, or de-DE_phoneb) - invariant._sWindowsName = ""; // Name OS thinks the object is (ie: de-DE_phoneb, or en-US (even if en was passed in)) - - // Identity - invariant._sName = ""; // locale name (ie: en-us) - invariant._sParent = ""; // Parent name (which may be a custom locale/culture) - invariant._bNeutral = false; // Flags for the culture (ie: neutral or not right now) - invariant._sEnglishDisplayName = "Invariant Language (Invariant Country)"; // English pretty name for this locale - invariant._sNativeDisplayName = "Invariant Language (Invariant Country)"; // Native pretty name for this locale - invariant._sSpecificCulture = ""; // The culture name to be used in CultureInfo.CreateSpecificCulture() - - // Language - invariant._sISO639Language = "iv"; // ISO 639 Language Name - invariant._sISO639Language2 = "ivl"; // 3 char ISO 639 lang name 2 - invariant._sLocalizedLanguage = "Invariant Language"; // Display name for this Language - invariant._sEnglishLanguage = "Invariant Language"; // English name for this language - invariant._sNativeLanguage = "Invariant Language"; // Native name of this language - invariant._sAbbrevLang = "IVL"; // abbreviated language name (Windows Language Name) - invariant._sConsoleFallbackName = ""; // The culture name for the console fallback UI culture - invariant._iInputLanguageHandle = 0x07F; // input language handle - - // Region - invariant._sRegionName = "IV"; // (RegionInfo) - invariant._sEnglishCountry = "Invariant Country"; // english country name (RegionInfo) - invariant._sNativeCountry = "Invariant Country"; // native country name (Windows Only) - invariant._sISO3166CountryName = "IV"; // (RegionInfo), ie: US - invariant._sISO3166CountryName2 = "ivc"; // 3 char ISO 3166 country name 2 2(RegionInfo) - invariant._iGeoId = 244; // GeoId (Windows Only) - - // Numbers - invariant._sPositiveSign = "+"; // positive sign - invariant._sNegativeSign = "-"; // negative sign - invariant._iDigits = 2; // number of fractional digits - invariant._iNegativeNumber = 1; // negative number format - invariant._waGrouping = new int[] { 3 }; // grouping of digits - invariant._sDecimalSeparator = "."; // decimal separator - invariant._sThousandSeparator = ","; // thousands separator - invariant._sNaN = "NaN"; // Not a Number - invariant._sPositiveInfinity = "Infinity"; // + Infinity - invariant._sNegativeInfinity = "-Infinity"; // - Infinity - - // Percent - invariant._iNegativePercent = 0; // Negative Percent (0-3) - invariant._iPositivePercent = 0; // Positive Percent (0-11) - invariant._sPercent = "%"; // Percent (%) symbol - invariant._sPerMille = "\x2030"; // PerMille symbol - - // Currency - invariant._sCurrency = "\x00a4"; // local monetary symbol: for international monetary symbol - invariant._sIntlMonetarySymbol = "XDR"; // international monetary symbol (RegionInfo) - invariant._sEnglishCurrency = "International Monetary Fund"; // English name for this currency (Windows Only) - invariant._sNativeCurrency = "International Monetary Fund"; // Native name for this currency (Windows Only) - invariant._iCurrencyDigits = 2; // # local monetary fractional digits - invariant._iCurrency = 0; // positive currency format - invariant._iNegativeCurrency = 0; // negative currency format - invariant._waMonetaryGrouping = new int[] { 3 }; // monetary grouping of digits - invariant._sMonetaryDecimal = "."; // monetary decimal separator - invariant._sMonetaryThousand = ","; // monetary thousands separator - - // Misc - invariant._iMeasure = 0; // system of measurement 0=metric, 1=US (RegionInfo) - invariant._sListSeparator = ","; // list separator - - // Time - invariant._sAM1159 = "AM"; // AM designator - invariant._sPM2359 = "PM"; // PM designator - invariant._saLongTimes = new String[] { "HH:mm:ss" }; // time format - invariant._saShortTimes = new String[] { "HH:mm", "hh:mm tt", "H:mm", "h:mm tt" }; // short time format - invariant._saDurationFormats = new String[] { "HH:mm:ss" }; // time duration format - - - // Calendar specific data - invariant._iFirstDayOfWeek = 0; // first day of week - invariant._iFirstWeekOfYear = 0; // first week of year - invariant._waCalendars = new CalendarId[] { CalendarId.GREGORIAN }; // all available calendar type(s). The first one is the default calendar - - // Store for specific data about each calendar - invariant._calendars = new CalendarData[CalendarData.MAX_CALENDARS]; - invariant._calendars[0] = CalendarData.Invariant; - - // Text information - invariant._iReadingLayout = 0; - - // These are desktop only, not coreclr - - invariant._iLanguage = CultureInfo.LOCALE_INVARIANT; // locale ID (0409) - NO sort information - invariant._iDefaultAnsiCodePage = 1252; // default ansi code page ID (ACP) - invariant._iDefaultOemCodePage = 437; // default oem code page ID (OCP or OEM) - invariant._iDefaultMacCodePage = 10000; // default macintosh code page - invariant._iDefaultEbcdicCodePage = 037; // default EBCDIC code page // Remember it - s_Invariant = invariant; + s_Invariant = CreateCultureWithInvariantData(); } return s_Invariant; } @@ -606,8 +626,72 @@ namespace System.Globalization return culture; } + private static unsafe string NormalizeCultureName(string name, out bool isNeutralName) + { + isNeutralName = true; + int i = 0; + + Debug.Assert(name.Length <= LOCALE_NAME_MAX_LENGTH); + + char *pName = stackalloc char[LOCALE_NAME_MAX_LENGTH]; + bool changed = false; + + while (i < name.Length && name[i] != '-' && name[i] != '_') + { + if (name[i] >= 'A' && name[i] <= 'Z') + { + // lowercase characters before '-' + pName[i] = (char) (((int)name[i]) + 0x20); + changed = true; + } + else + { + pName[i] = name[i]; + } + i++; + } + + if (i < name.Length) + { + // this is not perfect to detect the non neutral cultures but it is good enough when we are running in invariant mode + isNeutralName = false; + } + + while (i < name.Length) + { + if (name[i] >= 'a' && name[i] <= 'z') + { + pName[i] = (char) (((int)name[i]) - 0x20); + changed = true; + } + else + { + pName[i] = name[i]; + } + i++; + } + + if (changed) + return new string(pName, 0, name.Length); + + return name; + } + private static CultureData CreateCultureData(string cultureName, bool useUserOverride) { + if (GlobalizationMode.Invariant) + { + CultureInfo.VerifyCultureName(cultureName, true); + CultureData cd = CreateCultureWithInvariantData(); + cd._bUseOverrides = useUserOverride; + cd._sName = NormalizeCultureName(cultureName, out cd._bNeutral); + cd._sRealName = cd._sName; + cd._sWindowsName = cd._sName; + cd._iLanguage = CultureInfo.LOCALE_CUSTOM_UNSPECIFIED; + + return cd; + } + CultureData culture = new CultureData(); culture._bUseOverrides = useUserOverride; culture._sRealName = cultureName; @@ -665,6 +749,12 @@ namespace System.Globalization if (culture == CultureInfo.LOCALE_INVARIANT) return Invariant; + + if (GlobalizationMode.Invariant) + { + // LCID is not supported in the InvariantMode + throw new CultureNotFoundException(nameof(culture), culture, SR.Argument_CultureNotSupported); + } // Convert the lcid to a name, then use that // Note that this will return neutral names (unlike Vista native API) @@ -1412,6 +1502,8 @@ namespace System.Globalization { if (_saLongTimes == null) { + Debug.Assert(!GlobalizationMode.Invariant); + String[] longTimes = GetTimeFormats(); if (longTimes == null || longTimes.Length == 0) { @@ -1435,6 +1527,8 @@ namespace System.Globalization { if (_saShortTimes == null) { + Debug.Assert(!GlobalizationMode.Invariant); + // Try to get the short times from the OS/culture.dll String[] shortTimes = null; shortTimes = GetShortTimeFormats(); @@ -1967,6 +2061,11 @@ namespace System.Globalization { get { + if (GlobalizationMode.Invariant) + { + return CultureInfo.GetCalendarInstance(CalendarIds[0]); + } + CalendarId defaultCalId = (CalendarId)GetLocaleInfo(LocaleNumberData.CalendarType); if (defaultCalId == 0) @@ -2198,7 +2297,7 @@ namespace System.Globalization internal void GetNFIValues(NumberFormatInfo nfi) { - if (this.IsInvariantCulture) + if (GlobalizationMode.Invariant || this.IsInvariantCulture) { // FUTURE: NumberFormatInfo already has default values for many of these fields. Can we not do this? nfi.positiveSign = _sPositiveSign; diff --git a/src/mscorlib/src/System/Globalization/CultureInfo.Unix.cs b/src/mscorlib/src/System/Globalization/CultureInfo.Unix.cs index d7950b0..4d98dd6 100644 --- a/src/mscorlib/src/System/Globalization/CultureInfo.Unix.cs +++ b/src/mscorlib/src/System/Globalization/CultureInfo.Unix.cs @@ -15,6 +15,9 @@ namespace System.Globalization internal static CultureInfo GetUserDefaultCulture() { + if (GlobalizationMode.Invariant) + return CultureInfo.InvariantCulture; + CultureInfo cultureInfo = null; string localeName; if (CultureData.GetDefaultLocaleName(out localeName)) diff --git a/src/mscorlib/src/System/Globalization/CultureInfo.Windows.cs b/src/mscorlib/src/System/Globalization/CultureInfo.Windows.cs index a7f43f2..827330b 100644 --- a/src/mscorlib/src/System/Globalization/CultureInfo.Windows.cs +++ b/src/mscorlib/src/System/Globalization/CultureInfo.Windows.cs @@ -44,6 +44,9 @@ namespace System.Globalization internal static CultureInfo GetUserDefaultCulture() { + if (GlobalizationMode.Invariant) + return CultureInfo.InvariantCulture; + const uint LOCALE_SNAME = 0x0000005c; const string LOCALE_NAME_USER_DEFAULT = null; const string LOCALE_NAME_SYSTEM_DEFAULT = "!x-sys-default-locale"; @@ -69,6 +72,9 @@ namespace System.Globalization private static CultureInfo GetUserDefaultUILanguage() { + if (GlobalizationMode.Invariant) + return CultureInfo.InvariantCulture; + const uint MUI_LANGUAGE_NAME = 0x8; // Use ISO language (culture) name convention uint langCount = 0; uint bufLen = 0; diff --git a/src/mscorlib/src/System/Globalization/GlobalizationMode.Unix.cs b/src/mscorlib/src/System/Globalization/GlobalizationMode.Unix.cs new file mode 100644 index 0000000..6896029 --- /dev/null +++ b/src/mscorlib/src/System/Globalization/GlobalizationMode.Unix.cs @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Globalization +{ + internal sealed partial class GlobalizationMode + { + private static bool GetGlobalizationInvariantMode() + { + bool invariantEnabled = CLRConfig.GetBoolValue(c_InvariantModeConfigSwitch); + if (!invariantEnabled) + { + if (Interop.GlobalizationInterop.LoadICU() == 0) + { + string message = "Couldn't find a valid ICU package installed on the system. " + + "Set the configuration flag System.Globalization.Invariant to true if you want to run with no globalization support."; + Environment.FailFast(message); + } + } + return invariantEnabled; + } + } +} diff --git a/src/mscorlib/src/System/Globalization/GlobalizationMode.Windows.cs b/src/mscorlib/src/System/Globalization/GlobalizationMode.Windows.cs new file mode 100644 index 0000000..1be79f3 --- /dev/null +++ b/src/mscorlib/src/System/Globalization/GlobalizationMode.Windows.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Globalization +{ + internal sealed partial class GlobalizationMode + { + private static bool GetGlobalizationInvariantMode() + { + return CLRConfig.GetBoolValue(c_InvariantModeConfigSwitch); + } + } +} diff --git a/src/mscorlib/src/System/Globalization/GlobalizationMode.cs b/src/mscorlib/src/System/Globalization/GlobalizationMode.cs new file mode 100644 index 0000000..8f83098 --- /dev/null +++ b/src/mscorlib/src/System/Globalization/GlobalizationMode.cs @@ -0,0 +1,12 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Globalization +{ + internal sealed partial class GlobalizationMode + { + private const string c_InvariantModeConfigSwitch = "System.Globalization.Invariant"; + internal static bool Invariant { get; } = GetGlobalizationInvariantMode(); + } +} diff --git a/src/mscorlib/src/System/Globalization/IdnMapping.Unix.cs b/src/mscorlib/src/System/Globalization/IdnMapping.Unix.cs index 58f4cca..aab957a 100644 --- a/src/mscorlib/src/System/Globalization/IdnMapping.Unix.cs +++ b/src/mscorlib/src/System/Globalization/IdnMapping.Unix.cs @@ -8,6 +8,8 @@ namespace System.Globalization { private unsafe string GetAsciiCore(char* unicode, int count) { + Debug.Assert(!GlobalizationMode.Invariant); + uint flags = Flags; CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); @@ -48,6 +50,8 @@ namespace System.Globalization private unsafe string GetUnicodeCore(char* ascii, int count) { + Debug.Assert(!GlobalizationMode.Invariant); + uint flags = Flags; CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); @@ -69,6 +73,8 @@ namespace System.Globalization private unsafe string GetUnicodeCore(char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) { + Debug.Assert(!GlobalizationMode.Invariant); + int realLen = Interop.GlobalizationInterop.ToUnicode(flags, ascii, count, output, outputLength); if (realLen == 0) diff --git a/src/mscorlib/src/System/Globalization/IdnMapping.Windows.cs b/src/mscorlib/src/System/Globalization/IdnMapping.Windows.cs index 5950a54..d3f05bb 100644 --- a/src/mscorlib/src/System/Globalization/IdnMapping.Windows.cs +++ b/src/mscorlib/src/System/Globalization/IdnMapping.Windows.cs @@ -11,6 +11,8 @@ namespace System.Globalization { private unsafe string GetAsciiCore(char* unicode, int count) { + Debug.Assert(!GlobalizationMode.Invariant); + uint flags = Flags; // Determine the required length @@ -39,6 +41,8 @@ namespace System.Globalization private unsafe string GetAsciiCore(char* unicode, int count, uint flags, char* output, int outputLength) { + Debug.Assert(!GlobalizationMode.Invariant); + int length = Interop.Normaliz.IdnToAscii(flags, new IntPtr(unicode), count, new IntPtr(output), outputLength); if (length == 0) { @@ -50,6 +54,8 @@ namespace System.Globalization private unsafe string GetUnicodeCore(char* ascii, int count) { + Debug.Assert(!GlobalizationMode.Invariant); + uint flags = Flags; // Determine the required length @@ -78,6 +84,8 @@ namespace System.Globalization private unsafe string GetUnicodeCore(char* ascii, int count, uint flags, char* output, int outputLength) { + Debug.Assert(!GlobalizationMode.Invariant); + int length = Interop.Normaliz.IdnToUnicode(flags, new IntPtr(ascii), count, new IntPtr(output), outputLength); if (length == 0) { diff --git a/src/mscorlib/src/System/Globalization/IdnMapping.cs b/src/mscorlib/src/System/Globalization/IdnMapping.cs index 8424472..1f6bd9b 100644 --- a/src/mscorlib/src/System/Globalization/IdnMapping.cs +++ b/src/mscorlib/src/System/Globalization/IdnMapping.cs @@ -25,6 +25,7 @@ // RFC 3492 - Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA) using System.Diagnostics.Contracts; +using System.Text; namespace System.Globalization { @@ -85,6 +86,11 @@ namespace System.Globalization throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode)); } + if (GlobalizationMode.Invariant) + { + return GetAsciiInvariant(unicode, index, count); + } + unsafe { fixed (char* pUnicode = unicode) @@ -126,6 +132,11 @@ namespace System.Globalization throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); Contract.EndContractBlock(); + if (GlobalizationMode.Invariant) + { + return GetUnicodeInvariant(ascii, index, count); + } + unsafe { fixed (char* pAscii = ascii) @@ -148,5 +159,739 @@ namespace System.Globalization { return (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000); } + + // + // Invariant implementation + // + + private const char c_delimiter = '-'; + private const string c_strAcePrefix = "xn--"; + private const int c_labelLimit = 63; // Not including dots + private const int c_defaultNameLimit = 255; // Including dots + private const int c_initialN = 0x80; + private const int c_maxint = 0x7ffffff; + private const int c_initialBias = 72; + private const int c_punycodeBase = 36; + private const int c_tmin = 1; + private const int c_tmax = 26; + private const int c_skew = 38; + private const int c_damp = 700; + + + // Legal "dot" separators (i.e: . in www.microsoft.com) + private static char[] c_Dots = { '.', '\u3002', '\uFF0E', '\uFF61' }; + + private string GetAsciiInvariant(string unicode, int index, int count) + { + if (index > 0 || count < unicode.Length) + { + unicode = unicode.Substring(index, count); + } + + // Check for ASCII only string, which will be unchanged + if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) + { + return unicode; + } + + // Cannot be null terminated (normalization won't help us with this one, and + // may have returned false before checking the whole string above) + Debug.Assert(count >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); + if (unicode[unicode.Length - 1] <= 0x1f) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + } + + // Have to correctly IDNA normalize the string and Unassigned flags + bool bHasLastDot = (unicode.Length > 0) && IsDot(unicode[unicode.Length - 1]); + + // Make sure we didn't normalize away something after a last dot + if ((!bHasLastDot) && unicode.Length > 0 && IsDot(unicode[unicode.Length - 1])) + { + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + } + + // May need to check Std3 rules again for non-ascii + if (UseStd3AsciiRules) + { + ValidateStd3AndAscii(unicode, true, false); + } + + // Go ahead and encode it + return PunycodeEncode(unicode); + } + + // See if we're only ASCII + static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCheckAscii) + { + // If its empty, then its too small + if (unicode.Length == 0) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + int iLastDot = -1; + + // Loop the whole string + for (int i = 0; i < unicode.Length; i++) + { + // Aren't allowing control chars (or 7f, but idn tables catch that, they don't catch \0 at end though) + if (unicode[i] <= 0x1f) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, i ), nameof(unicode)); + } + + // If its Unicode or a control character, return false (non-ascii) + if (bCheckAscii && unicode[i] >= 0x7f) + return false; + + // Check for dots + if (IsDot(unicode[i])) + { + // Can't have 2 dots in a row + if (i == iLastDot + 1) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + // If its too far between dots then fail + if (i - iLastDot > c_labelLimit + 1) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + // If validating Std3, then char before dot can't be - char + if (bUseStd3 && i > 0) + ValidateStd3(unicode[i - 1], true); + + // Remember where the last dot is + iLastDot = i; + continue; + } + + // If necessary, make sure its a valid std3 character + if (bUseStd3) + { + ValidateStd3(unicode[i], (i == iLastDot + 1)); + } + } + + // If we never had a dot, then we need to be shorter than the label limit + if (iLastDot == -1 && unicode.Length > c_labelLimit) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + // Need to validate entire string length, 1 shorter if last char wasn't a dot + if (unicode.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1)) + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + c_defaultNameLimit - (IsDot(unicode[unicode.Length - 1]) ? 0 : 1)), nameof(unicode)); + + // If last char wasn't a dot we need to check for trailing - + if (bUseStd3 && !IsDot(unicode[unicode.Length - 1])) + ValidateStd3(unicode[unicode.Length - 1], true); + + return true; + } + + /* PunycodeEncode() converts Unicode to Punycode. The input */ + /* is represented as an array of Unicode code points (not code */ + /* units; surrogate pairs are not allowed), and the output */ + /* will be represented as an array of ASCII code points. The */ + /* output string is *not* null-terminated; it will contain */ + /* zeros if and only if the input contains zeros. (Of course */ + /* the caller can leave room for a terminator and add one if */ + /* needed.) The input_length is the number of code points in */ + /* the input. The output_length is an in/out argument: the */ + /* caller passes in the maximum number of code points that it */ + + /* can receive, and on successful return it will contain the */ + /* number of code points actually output. The case_flags array */ + /* holds input_length boolean values, where nonzero suggests that */ + /* the corresponding Unicode character be forced to uppercase */ + /* after being decoded (if possible), and zero suggests that */ + /* it be forced to lowercase (if possible). ASCII code points */ + /* are encoded literally, except that ASCII letters are forced */ + /* to uppercase or lowercase according to the corresponding */ + /* uppercase flags. If case_flags is a null pointer then ASCII */ + /* letters are left as they are, and other code points are */ + /* treated as if their uppercase flags were zero. The return */ + /* value can be any of the punycode_status values defined above */ + /* except punycode_bad_input; if not punycode_success, then */ + /* output_size and output might contain garbage. */ + static string PunycodeEncode(string unicode) + { + // 0 length strings aren't allowed + if (unicode.Length == 0) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + StringBuilder output = new StringBuilder(unicode.Length); + int iNextDot = 0; + int iAfterLastDot = 0; + int iOutputAfterLastDot = 0; + + // Find the next dot + while (iNextDot < unicode.Length) + { + // Find end of this segment + iNextDot = unicode.IndexOfAny(c_Dots, iAfterLastDot); + Contract.Assert(iNextDot <= unicode.Length, "[IdnMapping.punycode_encode]IndexOfAny is broken"); + if (iNextDot < 0) + iNextDot = unicode.Length; + + // Only allowed to have empty . section at end (www.microsoft.com.) + if (iNextDot == iAfterLastDot) + { + // Only allowed to have empty sections as trailing . + if (iNextDot != unicode.Length) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + // Last dot, stop + break; + } + + // We'll need an Ace prefix + output.Append(c_strAcePrefix); + + // Everything resets every segment. + bool bRightToLeft = false; + + // Check for RTL. If right-to-left, then 1st & last chars must be RTL + BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iAfterLastDot); + if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic) + { + // It has to be right to left. + bRightToLeft = true; + + // Check last char + int iTest = iNextDot - 1; + if (Char.IsLowSurrogate(unicode, iTest)) + { + iTest--; + } + + eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iTest); + if (eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic) + { + // Oops, last wasn't RTL, last should be RTL if first is RTL + throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode)); + } + } + + // Handle the basic code points + int basicCount; + int numProcessed = 0; // Num code points that have been processed so far (this segment) + for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++) + { + // Can't be lonely surrogate because it would've thrown in normalization + Debug.Assert(Char.IsLowSurrogate(unicode, basicCount) == false, "[IdnMapping.punycode_encode]Unexpected low surrogate"); + + // Double check our bidi rules + BidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount); + + // If we're RTL, we can't have LTR chars + if (bRightToLeft && testBidi == BidiCategory.LeftToRight) + { + // Oops, throw error + throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode)); + } + + // If we're not RTL we can't have RTL chars + if (!bRightToLeft && (testBidi == BidiCategory.RightToLeft || testBidi == BidiCategory.RightToLeftArabic)) + { + // Oops, throw error + throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode)); + } + + // If its basic then add it + if (Basic(unicode[basicCount])) + { + output.Append(EncodeBasic(unicode[basicCount])); + numProcessed++; + } + // If its a surrogate, skip the next since our bidi category tester doesn't handle it. + else if (Char.IsSurrogatePair(unicode, basicCount)) + basicCount++; + } + + int numBasicCodePoints = numProcessed; // number of basic code points + + // Stop if we ONLY had basic code points + if (numBasicCodePoints == iNextDot - iAfterLastDot) + { + // Get rid of xn-- and this segments done + output.Remove(iOutputAfterLastDot, c_strAcePrefix.Length); + } + else + { + // If it has some non-basic code points the input cannot start with xn-- + if (unicode.Length - iAfterLastDot >= c_strAcePrefix.Length && + unicode.Substring(iAfterLastDot, c_strAcePrefix.Length).Equals( + c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(unicode)); + + // Need to do ACE encoding + int numSurrogatePairs = 0; // number of surrogate pairs so far + + // Add a delimiter (-) if we had any basic code points (between basic and encoded pieces) + if (numBasicCodePoints > 0) + { + output.Append(c_delimiter); + } + + // Initialize the state + int n = c_initialN; + int delta = 0; + int bias = c_initialBias; + + // Main loop + while (numProcessed < (iNextDot - iAfterLastDot)) + { + /* All non-basic code points < n have been */ + /* handled already. Find the next larger one: */ + int j; + int m; + int test = 0; + for (m = c_maxint, j = iAfterLastDot; + j < iNextDot; + j += IsSupplementary(test) ? 2 : 1) + { + test = Char.ConvertToUtf32(unicode, j); + if (test >= n && test < m) m = test; + } + + /* Increase delta enough to advance the decoder's */ + /* state to , but guard against overflow: */ + delta += (int)((m - n) * ((numProcessed - numSurrogatePairs) + 1)); + Debug.Assert(delta > 0, "[IdnMapping.cs]1 punycode_encode - delta overflowed int"); + n = m; + + for (j = iAfterLastDot; j < iNextDot; j+= IsSupplementary(test) ? 2 : 1) + { + // Make sure we're aware of surrogates + test = Char.ConvertToUtf32(unicode, j); + + // Adjust for character position (only the chars in our string already, some + // haven't been processed. + + if (test < n) + { + delta++; + Contract.Assert(delta > 0, "[IdnMapping.cs]2 punycode_encode - delta overflowed int"); + } + + if (test == n) + { + // Represent delta as a generalized variable-length integer: + int q, k; + for (q = delta, k = c_punycodeBase; ; k += c_punycodeBase) + { + int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias; + if (q < t) break; + Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_encode]Expected c_punycodeBase (36) to be != t"); + output.Append(EncodeDigit(t + (q - t) % (c_punycodeBase - t))); + q = (q - t) / (c_punycodeBase - t); + } + + output.Append(EncodeDigit(q)); + bias = Adapt(delta, (numProcessed - numSurrogatePairs) + 1, numProcessed == numBasicCodePoints); + delta = 0; + numProcessed++; + + if (IsSupplementary(m)) + { + numProcessed++; + numSurrogatePairs++; + } + } + } + ++delta; + ++n; + Debug.Assert(delta > 0, "[IdnMapping.cs]3 punycode_encode - delta overflowed int"); + } + } + + // Make sure its not too big + if (output.Length - iOutputAfterLastDot > c_labelLimit) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + + // Done with this segment, add dot if necessary + if (iNextDot != unicode.Length) + output.Append('.'); + + iAfterLastDot = iNextDot + 1; + iOutputAfterLastDot = output.Length; + } + + // Throw if we're too long + if (output.Length > c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)) + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + c_defaultNameLimit - (IsDot(unicode[unicode.Length-1]) ? 0 : 1)), nameof(unicode)); + // Return our output string + return output.ToString(); + } + + // Is it a dot? + // are we U+002E (., full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), or + // U+FF61 (halfwidth ideographic full stop). + // Note: IDNA Normalization gets rid of dots now, but testing for last dot is before normalization + private static bool IsDot(char c) + { + return c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61'; + } + + private static bool IsSupplementary(int cTest) + { + return cTest >= 0x10000; + } + + private static bool Basic(uint cp) + { + // Is it in ASCII range? + return cp < 0x80; + } + + // Validate Std3 rules for a character + private static void ValidateStd3(char c, bool bNextToDot) + { + // Check for illegal characters + if ((c <= ',' || c == '/' || (c >= ':' && c <= '@') || // Lots of characters not allowed + (c >= '[' && c <= '`') || (c >= '{' && c <= (char)0x7F)) || + (c == '-' && bNextToDot)) + throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c)); + } + + private string GetUnicodeInvariant(string ascii, int index, int count) + { + if (index > 0 || count < ascii.Length) + { + // We're only using part of the string + ascii = ascii.Substring(index, count); + } + // Convert Punycode to Unicode + string strUnicode = PunycodeDecode(ascii); + + // Output name MUST obey IDNA rules & round trip (casing differences are allowed) + if (!ascii.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + + return strUnicode; + } + + /* PunycodeDecode() converts Punycode to Unicode. The input is */ + /* represented as an array of ASCII code points, and the output */ + /* will be represented as an array of Unicode code points. The */ + /* input_length is the number of code points in the input. The */ + /* output_length is an in/out argument: the caller passes in */ + /* the maximum number of code points that it can receive, and */ + /* on successful return it will contain the actual number of */ + /* code points output. The case_flags array needs room for at */ + /* least output_length values, or it can be a null pointer if the */ + /* case information is not needed. A nonzero flag suggests that */ + /* the corresponding Unicode character be forced to uppercase */ + /* by the caller (if possible), while zero suggests that it be */ + /* forced to lowercase (if possible). ASCII code points are */ + /* output already in the proper case, but their flags will be set */ + /* appropriately so that applying the flags would be harmless. */ + /* The return value can be any of the punycode_status values */ + /* defined above; if not punycode_success, then output_length, */ + /* output, and case_flags might contain garbage. On success, the */ + /* decoder will never need to write an output_length greater than */ + /* input_length, because of how the encoding is defined. */ + + private static string PunycodeDecode(string ascii) + { + // 0 length strings aren't allowed + if (ascii.Length == 0) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii)); + + // Throw if we're too long + if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)) + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, + c_defaultNameLimit - (IsDot(ascii[ascii.Length-1]) ? 0 : 1)), nameof(ascii)); + + // output stringbuilder + StringBuilder output = new StringBuilder(ascii.Length); + + // Dot searching + int iNextDot = 0; + int iAfterLastDot = 0; + int iOutputAfterLastDot = 0; + + while (iNextDot < ascii.Length) + { + // Find end of this segment + iNextDot = ascii.IndexOf('.', iAfterLastDot); + if (iNextDot < 0 || iNextDot > ascii.Length) + iNextDot = ascii.Length; + + // Only allowed to have empty . section at end (www.microsoft.com.) + if (iNextDot == iAfterLastDot) + { + // Only allowed to have empty sections as trailing . + if (iNextDot != ascii.Length) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii)); + + // Last dot, stop + break; + } + + // In either case it can't be bigger than segment size + if (iNextDot - iAfterLastDot > c_labelLimit) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii)); + + // See if this section's ASCII or ACE + if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot || + !ascii.Substring(iAfterLastDot,c_strAcePrefix.Length).Equals(c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) + { + // Its ASCII, copy it + output.Append(ascii.Substring(iAfterLastDot, iNextDot - iAfterLastDot)); + } + else + { + // Not ASCII, bump up iAfterLastDot to be after ACE Prefix + iAfterLastDot += c_strAcePrefix.Length; + + // Get number of basic code points (where delimiter is) + // numBasicCodePoints < 0 if there're no basic code points + int iTemp = ascii.LastIndexOf(c_delimiter, iNextDot - 1); + + // Trailing - not allowed + if (iTemp == iNextDot - 1) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + int numBasicCodePoints; + if (iTemp <= iAfterLastDot) + numBasicCodePoints = 0; + else + { + numBasicCodePoints = iTemp - iAfterLastDot; + + // Copy all the basic code points, making sure they're all in the allowed range, + // and losing the casing for all of them. + for (int copyAscii = iAfterLastDot; copyAscii < iAfterLastDot + numBasicCodePoints; copyAscii++) + { + // Make sure we don't allow unicode in the ascii part + if (ascii[copyAscii] > 0x7f) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + // When appending make sure they get lower cased + output.Append((char)(ascii[copyAscii] >= 'A' && ascii[copyAscii] <='Z' ? ascii[copyAscii] - 'A' + 'a' : ascii[copyAscii])); + } + } + + // Get ready for main loop. Start at beginning if we didn't have any + // basic code points, otherwise start after the -. + // asciiIndex will be next character to read from ascii + int asciiIndex = iAfterLastDot + (numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0); + + // initialize our state + int n = c_initialN; + int bias = c_initialBias; + int i = 0; + + int w, k; + + // no Supplementary characters yet + int numSurrogatePairs = 0; + + // Main loop, read rest of ascii + while (asciiIndex < iNextDot) + { + /* Decode a generalized variable-length integer into delta, */ + /* which gets added to i. The overflow checking is easier */ + /* if we increase i as we go, then subtract off its starting */ + /* value at the end to obtain delta. */ + int oldi = i; + + for (w = 1, k = c_punycodeBase; ; k += c_punycodeBase) + { + // Check to make sure we aren't overrunning our ascii string + if (asciiIndex >= iNextDot) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + // decode the digit from the next char + int digit = DecodeDigit(ascii[asciiIndex++]); + + Debug.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0"); + if (digit > (c_maxint - i) / w) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + i += (int)(digit * w); + int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias; + if (digit < t) + break; + Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)"); + if (w > c_maxint / (c_punycodeBase - t)) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + w *= (c_punycodeBase - t); + } + + bias = Adapt(i - oldi, (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0); + + /* i was supposed to wrap around from output.Length to 0, */ + /* incrementing n each time, so we'll fix that now: */ + Debug.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0, + "[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment"); + if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > c_maxint - n) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1)); + i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1); + + // Make sure n is legal + if ((n < 0 || n > 0x10ffff) || (n >= 0xD800 && n <= 0xDFFF)) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + // insert n at position i of the output: Really tricky if we have surrogates + int iUseInsertLocation; + String strTemp = Char.ConvertFromUtf32(n); + + // If we have supplimentary characters + if (numSurrogatePairs > 0) + { + // Hard way, we have supplimentary characters + int iCount; + for (iCount = i, iUseInsertLocation = iOutputAfterLastDot; iCount > 0; iCount--, iUseInsertLocation++) + { + // If its a surrogate, we have to go one more + if (iUseInsertLocation >= output.Length) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + if (Char.IsSurrogate(output[iUseInsertLocation])) + iUseInsertLocation++; + } + } + else + { + // No Supplementary chars yet, just add i + iUseInsertLocation = iOutputAfterLastDot + i; + } + + // Insert it + output.Insert(iUseInsertLocation, strTemp); + + // If it was a surrogate increment our counter + if (IsSupplementary(n)) + numSurrogatePairs++; + + // Index gets updated + i++; + } + + // Do BIDI testing + bool bRightToLeft = false; + + // Check for RTL. If right-to-left, then 1st & last chars must be RTL + BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iOutputAfterLastDot); + if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic) + { + // It has to be right to left. + bRightToLeft = true; + } + + // Check the rest of them to make sure RTL/LTR is consistent + for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++) + { + // This might happen if we run into a pair + if (Char.IsLowSurrogate(output.ToString(), iTest)) + continue; + + // Check to see if its LTR + eBidi = CharUnicodeInfo.GetBidiCategory(output.ToString(), iTest); + if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) || + (!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic))) + throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii)); + } + + // Its also a requirement that the last one be RTL if 1st is RTL + if (bRightToLeft && eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic) + { + // Oops, last wasn't RTL, last should be RTL if first is RTL + throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii)); + } + } + + // See if this label was too long + if (iNextDot - iAfterLastDot > c_labelLimit) + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii)); + + // Done with this segment, add dot if necessary + if (iNextDot != ascii.Length) + output.Append('.'); + + iAfterLastDot = iNextDot + 1; + iOutputAfterLastDot = output.Length; + } + + // Throw if we're too long + if (output.Length > c_defaultNameLimit - (IsDot(output[output.Length-1]) ? 0 : 1)) + throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(output[output.Length-1]) ? 0 : 1)), nameof(ascii)); + + // Return our output string + return output.ToString(); + } + + // DecodeDigit(cp) returns the numeric value of a basic code */ + // point (for use in representing integers) in the range 0 to */ + // c_punycodeBase-1, or <0 if cp is does not represent a value. */ + + private static int DecodeDigit(char cp) + { + if (cp >= '0' && cp <= '9') + return cp - '0' + 26; + + // Two flavors for case differences + if (cp >= 'a' && cp <= 'z') + return cp - 'a'; + + if (cp >= 'A' && cp <= 'Z') + return cp - 'A'; + + // Expected 0-9, A-Z or a-z, everything else is illegal + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(cp)); + } + + private static int Adapt(int delta, int numpoints, bool firsttime) + { + uint k; + + delta = firsttime ? delta / c_damp : delta / 2; + Debug.Assert(numpoints != 0, "[IdnMapping.adapt]Expected non-zero numpoints."); + delta += delta / numpoints; + + for (k = 0; delta > ((c_punycodeBase - c_tmin) * c_tmax) / 2; k += c_punycodeBase) + { + delta /= c_punycodeBase - c_tmin; + } + + Debug.Assert(delta + c_skew != 0, "[IdnMapping.adapt]Expected non-zero delta+skew."); + return (int)(k + (c_punycodeBase - c_tmin + 1) * delta / (delta + c_skew)); + } + + /* EncodeBasic(bcp,flag) forces a basic code point to lowercase */ + /* if flag is false, uppercase if flag is true, and returns */ + /* the resulting code point. The code point is unchanged if it */ + /* is caseless. The behavior is undefined if bcp is not a basic */ + /* code point. */ + + static char EncodeBasic(char bcp) + { + if (HasUpperCaseFlag(bcp)) + bcp += (char)('a' - 'A'); + + return bcp; + } + + // Return whether a punycode code point is flagged as being upper case. + private static bool HasUpperCaseFlag(char punychar) + { + return (punychar >= 'A' && punychar <= 'Z'); + } + + /* EncodeDigit(d,flag) returns the basic code point whose value */ + /* (when used for representing integers) is d, which needs to be in */ + /* the range 0 to punycodeBase-1. The lowercase form is used unless flag is */ + /* true, in which case the uppercase form is used. */ + + private static char EncodeDigit(int d) + { + Debug.Assert(d >= 0 && d < c_punycodeBase, "[IdnMapping.encode_digit]Expected 0 <= d < punycodeBase"); + // 26-35 map to ASCII 0-9 + if (d > 25) return (char)(d - 26 + '0'); + + // 0-25 map to a-z or A-Z + return (char)(d + 'a'); + } + } } diff --git a/src/mscorlib/src/System/Globalization/JapaneseCalendar.Unix.cs b/src/mscorlib/src/System/Globalization/JapaneseCalendar.Unix.cs index b9bd94a..6f8e0ba 100644 --- a/src/mscorlib/src/System/Globalization/JapaneseCalendar.Unix.cs +++ b/src/mscorlib/src/System/Globalization/JapaneseCalendar.Unix.cs @@ -10,6 +10,11 @@ namespace System.Globalization { private static EraInfo[] GetJapaneseEras() { + if (GlobalizationMode.Invariant) + { + return null; + } + string[] eraNames; if (!CalendarData.EnumCalendarInfo("ja-JP", CalendarId.JAPAN, CalendarDataType.EraNames, out eraNames)) { @@ -66,6 +71,8 @@ namespace System.Globalization private static bool GetJapaneseEraStartDate(int era, out DateTime dateTime) { + Debug.Assert(!GlobalizationMode.Invariant); + dateTime = default(DateTime); int startYear; diff --git a/src/mscorlib/src/System/Globalization/TextInfo.Unix.cs b/src/mscorlib/src/System/Globalization/TextInfo.Unix.cs index 67836d8..f7f64c6 100644 --- a/src/mscorlib/src/System/Globalization/TextInfo.Unix.cs +++ b/src/mscorlib/src/System/Globalization/TextInfo.Unix.cs @@ -14,27 +14,14 @@ namespace System.Globalization [NonSerialized] private Tristate _needsTurkishCasing = Tristate.NotInitialized; - ////////////////////////////////////////////////////////////////////////// - //// - //// TextInfo Constructors - //// - //// Implements CultureInfo.TextInfo. - //// - ////////////////////////////////////////////////////////////////////////// - internal unsafe TextInfo(CultureData cultureData) - { - _cultureData = cultureData; - _cultureName = _cultureData.CultureName; - _textInfoName = _cultureData.STEXTINFO; - FinishInitialization(_textInfoName); - } - private void FinishInitialization(string textInfoName) { } private unsafe string ChangeCase(string s, bool toUpper) { + Debug.Assert(!_invariantMode); + Debug.Assert(s != null); if (s.Length == 0) @@ -79,6 +66,8 @@ namespace System.Globalization private unsafe char ChangeCase(char c, bool toUpper) { + Debug.Assert(!_invariantMode); + char dst = default(char); ChangeCase(&c, 1, &dst, 1, toUpper); @@ -101,6 +90,8 @@ namespace System.Globalization internal unsafe void ChangeCase(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper) { + Debug.Assert(!_invariantMode); + if (IsInvariant) { Interop.GlobalizationInterop.ChangeCaseInvariant(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper); diff --git a/src/mscorlib/src/System/Globalization/TextInfo.Windows.cs b/src/mscorlib/src/System/Globalization/TextInfo.Windows.cs index 052e95a..cc7c4df 100644 --- a/src/mscorlib/src/System/Globalization/TextInfo.Windows.cs +++ b/src/mscorlib/src/System/Globalization/TextInfo.Windows.cs @@ -8,24 +8,14 @@ namespace System.Globalization { public partial class TextInfo { - ////////////////////////////////////////////////////////////////////////// - //// - //// TextInfo Constructors - //// - //// Implements CultureInfo.TextInfo. - //// - ////////////////////////////////////////////////////////////////////////// - internal unsafe TextInfo(CultureData cultureData) - { - // This is our primary data source, we don't need most of the rest of this - _cultureData = cultureData; - _cultureName = _cultureData.CultureName; - _textInfoName = _cultureData.STEXTINFO; - FinishInitialization(_textInfoName); - } - private unsafe void FinishInitialization(string textInfoName) { + if (_invariantMode) + { + _sortHandle = IntPtr.Zero; + return; + } + const uint LCMAP_SORTHANDLE = 0x20000000; long handle; @@ -35,6 +25,8 @@ namespace System.Globalization private unsafe string ChangeCase(string s, bool toUpper) { + Debug.Assert(!_invariantMode); + Debug.Assert(s != null); // @@ -49,44 +41,44 @@ namespace System.Globalization { return s; } - else + + int ret; + + // Check for Invariant to avoid A/V in LCMapStringEx + uint linguisticCasing = IsInvariantLocale(_textInfoName) ? 0 : LCMAP_LINGUISTIC_CASING; + + // + // Create the result string. + // + string result = string.FastAllocateString(nLengthInput); + + fixed (char* pSource = s) + fixed (char* pResult = result) + { + ret = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _textInfoName, + linguisticCasing | (toUpper ? LCMAP_UPPERCASE : LCMAP_LOWERCASE), + pSource, + nLengthInput, + pResult, + nLengthInput, + null, + null, + _sortHandle); + } + + if (ret == 0) { - int ret; - - // Check for Invariant to avoid A/V in LCMapStringEx - uint linguisticCasing = IsInvariantLocale(_textInfoName) ? 0 : LCMAP_LINGUISTIC_CASING; - - // - // Create the result string. - // - string result = string.FastAllocateString(nLengthInput); - - fixed (char* pSource = s) - fixed (char* pResult = result) - { - ret = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _textInfoName, - toUpper ? LCMAP_UPPERCASE | linguisticCasing : LCMAP_LOWERCASE | linguisticCasing, - pSource, - nLengthInput, - pResult, - nLengthInput, - null, - null, - _sortHandle); - } - - if (0 == ret) - { - throw new InvalidOperationException(SR.InvalidOperation_ReadOnly); - } - - Debug.Assert(ret == nLengthInput, "Expected getting the same length of the original string"); - return result; + throw new InvalidOperationException(SR.InvalidOperation_ReadOnly); } + + Debug.Assert(ret == nLengthInput, "Expected getting the same length of the original string"); + return result; } private unsafe char ChangeCase(char c, bool toUpper) { + Debug.Assert(!_invariantMode); + char retVal = '\0'; // Check for Invariant to avoid A/V in LCMapStringEx diff --git a/src/mscorlib/src/System/Globalization/TextInfo.cs b/src/mscorlib/src/System/Globalization/TextInfo.cs index b5e4e3d..fecd2b2 100644 --- a/src/mscorlib/src/System/Globalization/TextInfo.cs +++ b/src/mscorlib/src/System/Globalization/TextInfo.cs @@ -61,6 +61,10 @@ namespace System.Globalization [NonSerialized] private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized; + // _invariantMode is defined for the perf reason as accessing the instance field is faster than access the static property GlobalizationMode.Invariant + [NonSerialized] + private readonly bool _invariantMode = GlobalizationMode.Invariant; + // Invariant text info internal static TextInfo Invariant { @@ -73,6 +77,22 @@ namespace System.Globalization } internal volatile static TextInfo s_Invariant; + ////////////////////////////////////////////////////////////////////////// + //// + //// TextInfo Constructors + //// + //// Implements CultureInfo.TextInfo. + //// + ////////////////////////////////////////////////////////////////////////// + internal unsafe TextInfo(CultureData cultureData) + { + // This is our primary data source, we don't need most of the rest of this + _cultureData = cultureData; + _cultureName = _cultureData.CultureName; + _textInfoName = _cultureData.STEXTINFO; + FinishInitialization(_textInfoName); + } + [OnSerializing] private void OnSerializing(StreamingContext ctx) { } @@ -126,7 +146,7 @@ namespace System.Globalization return -1; } - return CompareInfo.IndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); + return CultureInfo.InvariantCulture.CompareInfo.IndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); } // Currently we don't have native functions to do this, so we do it the hard way @@ -137,7 +157,7 @@ namespace System.Globalization return -1; } - return CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); + return CultureInfo.InvariantCulture.CompareInfo.LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); } //////////////////////////////////////////////////////////////////////// @@ -311,10 +331,11 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////// public unsafe virtual char ToLower(char c) { - if (IsAscii(c) && IsAsciiCasingSameAsInvariant) + if (_invariantMode || (IsAscii(c) && IsAsciiCasingSameAsInvariant)) { return ToLowerAsciiInvariant(c); } + return (ChangeCase(c, toUpper: false)); } @@ -322,9 +343,106 @@ namespace System.Globalization { if (str == null) { throw new ArgumentNullException(nameof(str)); } + if (_invariantMode) + { + return ToLowerAsciiInvariant(str); + } + return ChangeCase(str, toUpper: false); } + private unsafe string ToLowerAsciiInvariant(string s) + { + if (s.Length == 0) + { + return string.Empty; + } + + fixed (char* pSource = s) + { + int i = 0; + while (i < s.Length) + { + if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A')) + { + break; + } + i++; + } + + if (i >= s.Length) + { + return s; + } + + string result = string.FastAllocateString(s.Length); + fixed (char* pResult = result) + { + for (int j = 0; j < i; j++) + { + pResult[j] = pSource[j]; + } + + pResult[i] = (Char)(pSource[i] | 0x20); + i++; + + while (i < s.Length) + { + pResult[i] = ToLowerAsciiInvariant(pSource[i]); + i++; + } + } + + return result; + } + } + + private unsafe string ToUpperAsciiInvariant(string s) + { + if (s.Length == 0) + { + return string.Empty; + } + + fixed (char* pSource = s) + { + int i = 0; + while (i < s.Length) + { + if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a')) + { + break; + } + i++; + } + + if (i >= s.Length) + { + return s; + } + + string result = string.FastAllocateString(s.Length); + fixed (char* pResult = result) + { + for (int j = 0; j < i; j++) + { + pResult[j] = pSource[j]; + } + + pResult[i] = (char)(pSource[i] & ~0x20); + i++; + + while (i < s.Length) + { + pResult[i] = ToUpperAsciiInvariant(pSource[i]); + i++; + } + } + + return result; + } + } + private static Char ToLowerAsciiInvariant(Char c) { if ((uint)(c - 'A') <= (uint)('Z' - 'A')) @@ -344,10 +462,11 @@ namespace System.Globalization //////////////////////////////////////////////////////////////////////// public unsafe virtual char ToUpper(char c) { - if (IsAscii(c) && IsAsciiCasingSameAsInvariant) + if (_invariantMode || (IsAscii(c) && IsAsciiCasingSameAsInvariant)) { return ToUpperAsciiInvariant(c); } + return (ChangeCase(c, toUpper: true)); } @@ -355,6 +474,11 @@ namespace System.Globalization { if (str == null) { throw new ArgumentNullException(nameof(str)); } + if (_invariantMode) + { + return ToUpperAsciiInvariant(str); + } + return ChangeCase(str, toUpper: true); } diff --git a/src/mscorlib/src/System/Text/Normalization.Unix.cs b/src/mscorlib/src/System/Text/Normalization.Unix.cs index d49bdc6..8b1d790 100644 --- a/src/mscorlib/src/System/Text/Normalization.Unix.cs +++ b/src/mscorlib/src/System/Text/Normalization.Unix.cs @@ -4,6 +4,7 @@ using System.Security; using System.Text; +using System.Globalization; namespace System.Text { @@ -13,6 +14,13 @@ namespace System.Text { ValidateArguments(strInput, normalizationForm); + if (GlobalizationMode.Invariant) + { + // In Invariant mode we assume all characters are normalized. + // This is because we don't support any linguistic operation on the strings + return true; + } + int ret = Interop.GlobalizationInterop.IsNormalized(normalizationForm, strInput, strInput.Length); if (ret == -1) @@ -27,6 +35,13 @@ namespace System.Text { ValidateArguments(strInput, normalizationForm); + if (GlobalizationMode.Invariant) + { + // In Invariant mode we assume all characters are normalized. + // This is because we don't support any linguistic operation on the strings + return strInput; + } + char[] buf = new char[strInput.Length]; for (int attempts = 2; attempts > 0; attempts--) diff --git a/src/mscorlib/src/System/Text/Normalization.Windows.cs b/src/mscorlib/src/System/Text/Normalization.Windows.cs index 1e08817..e3890b1 100644 --- a/src/mscorlib/src/System/Text/Normalization.Windows.cs +++ b/src/mscorlib/src/System/Text/Normalization.Windows.cs @@ -20,6 +20,13 @@ namespace System.Text { internal static bool IsNormalized(String strInput, NormalizationForm normForm) { + if (GlobalizationMode.Invariant) + { + // In Invariant mode we assume all characters are normalized. + // This is because we don't support any linguistic operation on the strings + return true; + } + Debug.Assert(strInput != null); // The only way to know if IsNormalizedString failed is through checking the Win32 last error @@ -49,6 +56,13 @@ namespace System.Text internal static String Normalize(String strInput, NormalizationForm normForm) { + if (GlobalizationMode.Invariant) + { + // In Invariant mode we assume all characters are normalized. + // This is because we don't support any linguistic operation on the strings + return strInput; + } + Debug.Assert(strInput != null); // we depend on Win32 last error when calling NormalizeString diff --git a/src/mscorlib/src/System/TimeZoneInfo.Unix.cs b/src/mscorlib/src/System/TimeZoneInfo.Unix.cs index 5ebd063..1ecf4d4 100644 --- a/src/mscorlib/src/System/TimeZoneInfo.Unix.cs +++ b/src/mscorlib/src/System/TimeZoneInfo.Unix.cs @@ -97,6 +97,12 @@ namespace System private void GetDisplayName(Interop.GlobalizationInterop.TimeZoneDisplayNameType nameType, ref string displayName) { + if (GlobalizationMode.Invariant) + { + displayName = _standardDisplayName; + return; + } + string timeZoneDisplayName; bool result = Interop.CallStringMethod( (locale, id, type, stringBuilder) => Interop.GlobalizationInterop.GetTimeZoneDisplayName( diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index 1dee95f..21f1659 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -139,6 +139,7 @@ set(VM_SOURCES_WKS cachelinealloc.cpp callhelpers.cpp ceemain.cpp + clrconfignative.cpp clrex.cpp clrprivbinderutil.cpp clrvarargs.cpp diff --git a/src/vm/clrconfignative.cpp b/src/vm/clrconfignative.cpp new file mode 100644 index 0000000..11b9eb5 --- /dev/null +++ b/src/vm/clrconfignative.cpp @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: clrconfiguration.cpp +// + +#include "common.h" +#include "clrconfignative.h" +#include + +BOOL QCALLTYPE ClrConfigNative::GetConfigBoolValue(LPCWSTR name) +{ + QCALL_CONTRACT; + + BOOL retValue = FALSE; + BEGIN_QCALL; + retValue = Configuration::GetKnobBooleanValue(name, FALSE); + END_QCALL; + return(retValue); +} diff --git a/src/vm/clrconfignative.h b/src/vm/clrconfignative.h new file mode 100644 index 0000000..a60d119 --- /dev/null +++ b/src/vm/clrconfignative.h @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef _CLRCONFIGNATIVE_H_ +#define _CLRCONFIGNATIVE_H_ + +class ClrConfigNative +{ +public: + static BOOL QCALLTYPE GetConfigBoolValue(LPCWSTR name); +}; + +#endif // _CLRCONFIGNATIVE_H_ diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index 929ae2e..6adc3a1 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -854,6 +854,9 @@ FCFuncStart(gCurrencyFuncs) FCFuncElement("FCallToDecimal", COMCurrency::DoToDecimal) FCFuncEnd() +FCFuncStart(gClrConfig) + QCFuncElement("GetConfigBoolValue", ClrConfigNative::GetConfigBoolValue) +FCFuncEnd() #if !defined(FEATURE_COREFX_GLOBALIZATION) FCFuncStart(gCompareInfoFuncs) @@ -1356,6 +1359,7 @@ FCClassElement("AssemblyName", "System.Reflection", gAssemblyNameFuncs) FCClassElement("Assert", "System.Diagnostics", gDiagnosticsAssert) FCClassElement("BCLDebug", "System", gBCLDebugFuncs) FCClassElement("Buffer", "System", gBufferFuncs) +FCClassElement("CLRConfig", "System", gClrConfig) FCClassElement("CompareInfo", "System.Globalization", gCompareInfoFuncs) FCClassElement("CompatibilitySwitch", "System.Runtime.Versioning", gCompatibilitySwitchFuncs) FCClassElement("CriticalHandle", "System.Runtime.InteropServices", gCriticalHandleFuncs) diff --git a/src/vm/mscorlib.cpp b/src/vm/mscorlib.cpp index 963e890..5deaaef 100644 --- a/src/vm/mscorlib.cpp +++ b/src/vm/mscorlib.cpp @@ -34,6 +34,7 @@ #include "excep.h" #include "fcall.h" #include "nlsinfo.h" +#include "clrconfignative.h" #include "commodule.h" #include "marshalnative.h" #include "system.h" -- 2.7.4