From eec801596f5d48066babacbc9d7e4b6d5e9b6fde Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 26 Nov 2018 21:08:10 -0800 Subject: [PATCH] Fix Rune.ToUpper / ToLower under GlobalizationMode.Invariant (dotnet/coreclr#21203) Commit migrated from https://github.com/dotnet/coreclr/commit/6af3c5dd51a2e6411d32fdbf2645ec0ea68b36ff --- .../System.Private.CoreLib/src/System/Text/Rune.cs | 70 ++++++++++++++++++---- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index d405b69..2ed76d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -161,14 +161,10 @@ namespace System.Text /// public int Value => (int)_value; - private static Rune ChangeCase(Rune rune, CultureInfo culture, bool toUpper) + private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool toUpper) { - if (culture == null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); - } - - var textInfo = culture.TextInfo; + Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); + Debug.Assert(textInfo != null, "This should've been checked by the caller."); Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count @@ -727,42 +723,90 @@ namespace System.Text return IsCategorySeparator(GetUnicodeCategoryNonAscii(value)); } - public static Rune ToLower(Rune value, CultureInfo culture) => ChangeCase(value, culture, toUpper: false); + public static Rune ToLower(Rune value, CultureInfo culture) + { + if (culture is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); + } + + // We don't want to special-case ASCII here since the specified culture might handle + // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead + // we'll just jump straight to the globalization tables if they're available. + + if (GlobalizationMode.Invariant) + { + return ToLowerInvariant(value); + } + + return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: false); + } public static Rune ToLowerInvariant(Rune value) { // Handle the most common case (ASCII data) first. Within the common case, we expect // that there'll be a mix of lowercase & uppercase chars, so make the conversion branchless. - if (value.IsAscii || GlobalizationMode.Invariant) + if (value.IsAscii) { // It's ok for us to use the UTF-16 conversion utility for this since the high // 16 bits of the value will never be set so will be left unchanged. return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(value._value)); } + if (GlobalizationMode.Invariant) + { + // If the value isn't ASCII and if the globalization tables aren't available, + // case changing has no effect. + return value; + } + // Non-ASCII data requires going through the case folding tables. - return ToLower(value, CultureInfo.InvariantCulture); + return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false); } - public static Rune ToUpper(Rune value, CultureInfo culture) => ChangeCase(value, culture, toUpper: true); + public static Rune ToUpper(Rune value, CultureInfo culture) + { + if (culture is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); + } + + // We don't want to special-case ASCII here since the specified culture might handle + // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead + // we'll just jump straight to the globalization tables if they're available. + + if (GlobalizationMode.Invariant) + { + return ToUpperInvariant(value); + } + + return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: true); + } public static Rune ToUpperInvariant(Rune value) { // Handle the most common case (ASCII data) first. Within the common case, we expect // that there'll be a mix of lowercase & uppercase chars, so make the conversion branchless. - if (value.IsAscii || GlobalizationMode.Invariant) + if (value.IsAscii) { // It's ok for us to use the UTF-16 conversion utility for this since the high // 16 bits of the value will never be set so will be left unchanged. return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(value._value)); } + if (GlobalizationMode.Invariant) + { + // If the value isn't ASCII and if the globalization tables aren't available, + // case changing has no effect. + return value; + } + // Non-ASCII data requires going through the case folding tables. - return ToUpper(value, CultureInfo.InvariantCulture); + return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true); } } } -- 2.7.4