From 43bc8e85d16db7a5a152bea86a55b492027bf420 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Mon, 17 Aug 2020 16:53:47 -0700 Subject: [PATCH] Ordinal Ignore Case Optimization (#40910) --- src/libraries/Common/src/Interop/Interop.Casing.cs | 3 + .../Common/src/Interop/Interop.Collation.cs | 9 - .../Unix/System.Globalization.Native/entrypoints.c | 3 +- .../Unix/System.Globalization.Native/pal_casing.c | 18 + .../Unix/System.Globalization.Native/pal_casing.h | 2 + .../System.Globalization.Native/pal_collation.c | 103 +--- .../System.Globalization.Native/pal_collation.h | 11 - .../src/System.Private.CoreLib.Shared.projitems | 2 + .../src/System/Globalization/CompareInfo.Icu.cs | 129 ----- .../System/Globalization/CompareInfo.Invariant.cs | 2 +- .../src/System/Globalization/CompareInfo.Nls.cs | 6 +- .../src/System/Globalization/CompareInfo.cs | 553 ++++----------------- .../src/System/Globalization/IdnMapping.cs | 2 +- .../src/System/Globalization/Ordinal.cs | 413 +++++++++++++++ .../src/System/Globalization/OrdinalCasing.Icu.cs | 444 +++++++++++++++++ .../src/System/Marvin.OrdinalIgnoreCase.cs | 2 +- .../src/System/MemoryExtensions.Globalization.cs | 18 +- .../src/System/String.Comparison.cs | 13 +- .../src/System/String.Searching.cs | 2 +- .../src/System/StringComparer.cs | 4 +- .../System.Runtime/tests/System/StringTests.cs | 126 ++++- 21 files changed, 1113 insertions(+), 752 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Globalization/OrdinalCasing.Icu.cs diff --git a/src/libraries/Common/src/Interop/Interop.Casing.cs b/src/libraries/Common/src/Interop/Interop.Casing.cs index 5e9ca3a..f02aea3 100644 --- a/src/libraries/Common/src/Interop/Interop.Casing.cs +++ b/src/libraries/Common/src/Interop/Interop.Casing.cs @@ -15,5 +15,8 @@ internal static partial class Interop [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_ChangeCaseTurkish")] internal static extern unsafe void ChangeCaseTurkish(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper); + + [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_InitOrdinalCasingPage")] + internal static extern unsafe void InitOrdinalCasingPage(int pageNumber, char* pTarget); } } diff --git a/src/libraries/Common/src/Interop/Interop.Collation.cs b/src/libraries/Common/src/Interop/Interop.Collation.cs index 40e1821..beb8e2c 100644 --- a/src/libraries/Common/src/Interop/Interop.Collation.cs +++ b/src/libraries/Common/src/Interop/Interop.Collation.cs @@ -24,12 +24,6 @@ internal static partial class Interop [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_LastIndexOf")] internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr); - [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")] - internal static extern unsafe int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast); - - [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")] - internal static extern unsafe int IndexOfOrdinalIgnoreCase(char* target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast); - [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")] [return: MarshalAs(UnmanagedType.Bool)] internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength); @@ -49,9 +43,6 @@ internal static partial class Interop [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_GetSortKey")] internal static extern unsafe int GetSortKey(IntPtr sortHandle, char* str, int strLength, byte* sortKey, int sortKeyLength, CompareOptions options); - [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_CompareStringOrdinalIgnoreCase")] - internal static extern unsafe int CompareStringOrdinalIgnoreCase(char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len); - [DllImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_GetSortVersion")] internal static extern int GetSortVersion(IntPtr sortHandle); } diff --git a/src/libraries/Native/Unix/System.Globalization.Native/entrypoints.c b/src/libraries/Native/Unix/System.Globalization.Native/entrypoints.c index ee028f2..7a7f5ea 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/entrypoints.c +++ b/src/libraries/Native/Unix/System.Globalization.Native/entrypoints.c @@ -29,7 +29,6 @@ FCFuncStart(gPalGlobalizationNative) QCFuncElement("ChangeCaseTurkish", GlobalizationNative_ChangeCaseTurkish) QCFuncElement("CloseSortHandle", GlobalizationNative_CloseSortHandle) QCFuncElement("CompareString", GlobalizationNative_CompareString) - QCFuncElement("CompareStringOrdinalIgnoreCase", GlobalizationNative_CompareStringOrdinalIgnoreCase) QCFuncElement("EndsWith", GlobalizationNative_EndsWith) QCFuncElement("EnumCalendarInfo", GlobalizationNative_EnumCalendarInfo) QCFuncElement("GetCalendarInfo", GlobalizationNative_GetCalendarInfo) @@ -49,8 +48,8 @@ FCFuncStart(gPalGlobalizationNative) QCFuncElement("GetSortVersion", GlobalizationNative_GetSortVersion) QCFuncElement("GetTimeZoneDisplayName", GlobalizationNative_GetTimeZoneDisplayName) QCFuncElement("IndexOf", GlobalizationNative_IndexOf) - QCFuncElement("IndexOfOrdinalIgnoreCase", GlobalizationNative_IndexOfOrdinalIgnoreCase) QCFuncElement("InitICUFunctions", GlobalizationNative_InitICUFunctions) + QCFuncElement("InitOrdinalCasingPage", GlobalizationNative_InitOrdinalCasingPage) QCFuncElement("IsNormalized", GlobalizationNative_IsNormalized) QCFuncElement("IsPredefinedLocale", GlobalizationNative_IsPredefinedLocale) QCFuncElement("LastIndexOf", GlobalizationNative_LastIndexOf) diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.c index d99ba79..ceca03b 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.c +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.c @@ -150,6 +150,24 @@ void GlobalizationNative_ChangeCaseTurkish( } } +void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget) +{ + pageNumber <<= 8; + for (int i = 0; i < 256; i++) + { + // Unfortunately, to ensure one-to-one simple mapping we have to call u_toupper on every character. + // Using string casing ICU APIs cannot give such results even when using NULL locale to force root behavior. + pTarget[i] = (UChar) u_toupper((UChar32)(pageNumber + i)); + } + + if (pageNumber == 0x0100) + { + // Disable Turkish I behavior on Ordinal operations + pTarget[0x31] = (UChar)0x0131; // Turkish lowercase i + pTarget[0x7F] = (UChar)0x017F; // // 017F;LATIN SMALL LETTER LONG S + } +} + #ifdef __clang__ #pragma clang diagnostic pop #endif diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.h b/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.h index 104065e..b49a775 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.h +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_casing.h @@ -21,3 +21,5 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper); + +PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget); diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c index c75d68a..72077cf 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c @@ -454,7 +454,7 @@ int32_t GlobalizationNative_CompareString( } if (lpStr2 == NULL) { - lpStr2 = &dummyChar; + lpStr2 = &dummyChar; } result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length); @@ -497,7 +497,7 @@ int32_t GlobalizationNative_IndexOf( return (result == UCOL_EQUAL) ? 0 : -1; } - + UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); @@ -606,61 +606,6 @@ static int AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two) } /* -Function: -IndexOfOrdinalIgnoreCase -*/ -int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase( - const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t findLast) -{ - int32_t result = -1; - - int32_t endIndex = cwSourceLength - cwTargetLength; - assert(endIndex >= 0); - - int32_t i = 0; - while (i <= endIndex) - { - int32_t srcIdx = i, trgIdx = 0; - const UChar *src = lpSource, *trg = lpTarget; - - int32_t match = TRUE; - while (trgIdx < cwTargetLength) - { - UChar32 srcCodepoint, trgCodepoint; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wsign-conversion" -#endif - U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint); - U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint); -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - - if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint)) - { - match = FALSE; - break; - } - } - - if (match) - { - result = i; - if (!findLast) - { - break; - } - } - - U16_FWD_1(lpSource, i, cwSourceLength); - } - - return result; -} - -/* collation element is an int used for sorting. It consists of 3 components: * primary - first 16 bits, representing the base letter * secondary - next 8 bits, typically an accent @@ -934,47 +879,3 @@ int32_t GlobalizationNative_GetSortKey( return result; } - -int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase( - const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length) -{ - assert(lpStr1 != NULL); - assert(cwStr1Length >= 0); - assert(lpStr2 != NULL); - assert(cwStr2Length >= 0); - - int32_t str1Idx = 0; - int32_t str2Idx = 0; - - while (str1Idx < cwStr1Length && str2Idx < cwStr2Length) - { - UChar32 str1Codepoint, str2Codepoint; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wsign-conversion" -#endif - U16_NEXT(lpStr1, str1Idx, cwStr1Length, str1Codepoint); - U16_NEXT(lpStr2, str2Idx, cwStr2Length, str2Codepoint); -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - - if (str1Codepoint != str2Codepoint && u_toupper(str1Codepoint) != u_toupper(str2Codepoint)) - { - return str1Codepoint < str2Codepoint ? -1 : 1; - } - } - - if (cwStr1Length < cwStr2Length) - { - return -1; - } - - if (cwStr2Length < cwStr1Length) - { - return 1; - } - - return 0; -} diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h index 64aa8f1..76bc3bb 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h @@ -39,12 +39,6 @@ PALEXPORT int32_t GlobalizationNative_LastIndexOf(SortHandle* pSortHandle, int32_t options, int32_t* pMatchedLength); -PALEXPORT int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(const UChar* lpTarget, - int32_t cwTargetLength, - const UChar* lpSource, - int32_t cwSourceLength, - int32_t findLast); - PALEXPORT int32_t GlobalizationNative_StartsWith(SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, @@ -67,8 +61,3 @@ PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options); - -PALEXPORT int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase(const UChar* lpStr1, - int32_t cwStr1Length, - const UChar* lpStr2, - int32_t cwStr2Length); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 27bccfc..ed4ccc2 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -339,6 +339,8 @@ + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 3b959d4..4d27d9e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -35,135 +35,6 @@ namespace System.Globalization } } - private static unsafe int IcuIndexOfOrdinalCore(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase, bool fromBeginning) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(!value.IsEmpty); - - // Ordinal (non-linguistic) comparisons require the length of the target string to be no greater - // than the length of the search space. Since our caller already checked for empty target strings, - // the below check also handles the case of empty search space strings. - - if (source.Length < value.Length) - { - return -1; - } - - Debug.Assert(!source.IsEmpty); - - if (ignoreCase) - { - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - fixed (char* pValue = &MemoryMarshal.GetReference(value)) - { - return Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: !fromBeginning); - } - } - - int startIndex, endIndex, jump; - if (fromBeginning) - { - // Left to right, from zero to last possible index in the source string. - // Incrementing by one after each iteration. Stop condition is last possible index plus 1. - startIndex = 0; - endIndex = source.Length - value.Length + 1; - jump = 1; - } - else - { - // Right to left, from first possible index in the source string to zero. - // Decrementing by one after each iteration. Stop condition is last possible index minus 1. - startIndex = source.Length - value.Length; - endIndex = -1; - jump = -1; - } - - for (int i = startIndex; i != endIndex; i += jump) - { - int valueIndex, sourceIndex; - - for (valueIndex = 0, sourceIndex = i; - valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; - valueIndex++, sourceIndex++) - ; - - if (valueIndex == value.Length) - { - return i; - } - } - - return -1; - } - - private static unsafe int IcuLastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - - Debug.Assert(source != null); - Debug.Assert(value != null); - - if (value.Length == 0) - { - return startIndex; - } - - if (count < value.Length) - { - return -1; - } - - // startIndex is the index into source where we start search backwards from. - // leftStartIndex is the index into source of the start of the string that is - // count characters away from startIndex. - int leftStartIndex = startIndex - count + 1; - - if (ignoreCase) - { - fixed (char* pSource = source) - { - int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true); - return lastIndex != -1 ? - leftStartIndex + lastIndex : - -1; - } - } - - for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--) - { - int valueIndex, sourceIndex; - - for (valueIndex = 0, sourceIndex = i; - valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; - valueIndex++, sourceIndex++) ; - - if (valueIndex == value.Length) { - return i; - } - } - - return -1; - } - - private static unsafe int IcuCompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - - Debug.Assert(count1 > 0); - Debug.Assert(count2 > 0); - - fixed (char* char1 = &string1) - fixed (char* char2 = &string2) - { - Debug.Assert(char1 != null); - Debug.Assert(char2 != null); - return Interop.Globalization.CompareStringOrdinalIgnoreCase(char1, count1, char2, count2); - } - } - private unsafe int IcuCompareString(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs index 1819ab2..3279678 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs @@ -39,7 +39,7 @@ namespace System.Globalization } } - private static unsafe int InvariantLastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase) + internal static unsafe int InvariantLastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase) { Debug.Assert(!string.IsNullOrEmpty(source)); Debug.Assert(value != null); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs index 0e0454b..396b3b0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs @@ -74,7 +74,7 @@ namespace System.Globalization } } - private static int NlsIndexOfOrdinalCore(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase, bool fromBeginning) + internal static int NlsIndexOfOrdinalCore(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase, bool fromBeginning) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); @@ -86,7 +86,7 @@ namespace System.Globalization return FindStringOrdinal(positionFlag, source, value, ignoreCase); } - private static int NlsLastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) + internal static int NlsLastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); @@ -181,7 +181,7 @@ namespace System.Globalization } } - private static unsafe int NlsCompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2) + internal static unsafe int NlsCompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs index 71e6502..148daeb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs @@ -303,7 +303,7 @@ namespace System.Globalization internal int CompareOptionIgnoreCase(ReadOnlySpan string1, ReadOnlySpan string2) { return GlobalizationMode.Invariant ? - CompareOrdinalIgnoreCase(string1, string2) : + Ordinal.CompareIgnoreCaseInvariantMode(ref MemoryMarshal.GetReference(string1), string1.Length, ref MemoryMarshal.GetReference(string2), string2.Length) : CompareStringCore(string1, string2, CompareOptions.IgnoreCase); } @@ -443,20 +443,19 @@ namespace System.Globalization { // Common case: caller is attempting to perform linguistic comparison. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { return CompareStringCore(string1, string2, options); } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return string1.SequenceCompareTo(string2); } + + return Ordinal.CompareStringIgnoreCase(ref MemoryMarshal.GetReference(string1), string1.Length, ref MemoryMarshal.GetReference(string2), string2.Length); } else { @@ -465,23 +464,18 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; - } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - goto ReturnOrdinalIgnoreCase; + return string1.SequenceCompareTo(string2); } - else + + if (options == CompareOptions.OrdinalIgnoreCase) { - ThrowCompareOptionsCheckFailed(options); + return Ordinal.CompareStringIgnoreCase(ref MemoryMarshal.GetReference(string1), string1.Length, ref MemoryMarshal.GetReference(string2), string2.Length); } - } - ReturnOrdinal: - return string1.SequenceCompareTo(string2); + ThrowCompareOptionsCheckFailed(options); - ReturnOrdinalIgnoreCase: - return CompareOrdinalIgnoreCase(string1, string2); + return -1; // make the compiler happy; + } } // Checks that 'CompareOptions' is valid for a call to Compare, throwing the appropriate @@ -517,228 +511,6 @@ namespace System.Globalization IcuCompareString(string1, string2, options); /// - /// CompareOrdinalIgnoreCase compare two string ordinally with ignoring the case. - /// it assumes the strings are Ascii string till we hit non Ascii character in strA or strB and then we continue the comparison by - /// calling the OS. - /// - internal static int CompareOrdinalIgnoreCase(string strA, int indexA, int lengthA, string strB, int indexB, int lengthB) - { - Debug.Assert(indexA + lengthA <= strA.Length); - Debug.Assert(indexB + lengthB <= strB.Length); - return CompareOrdinalIgnoreCase( - ref Unsafe.Add(ref strA.GetRawStringData(), indexA), - lengthA, - ref Unsafe.Add(ref strB.GetRawStringData(), indexB), - lengthB); - } - - internal static int CompareOrdinalIgnoreCase(ReadOnlySpan strA, ReadOnlySpan strB) - { - return CompareOrdinalIgnoreCase(ref MemoryMarshal.GetReference(strA), strA.Length, ref MemoryMarshal.GetReference(strB), strB.Length); - } - - internal static int CompareOrdinalIgnoreCase(string strA, string strB) - { - return CompareOrdinalIgnoreCase(ref strA.GetRawStringData(), strA.Length, ref strB.GetRawStringData(), strB.Length); - } - - internal static int CompareOrdinalIgnoreCase(ref char strA, int lengthA, ref char strB, int lengthB) - { - int length = Math.Min(lengthA, lengthB); - int range = length; - - ref char charA = ref strA; - ref char charB = ref strB; - - // in InvariantMode we support all range and not only the ascii characters. - char maxChar = (GlobalizationMode.Invariant ? (char)0xFFFF : (char)0x7F); - - while (length != 0 && charA <= maxChar && charB <= maxChar) - { - // Ordinal equals or lowercase equals if the result ends up in the a-z range - if (charA == charB || - ((charA | 0x20) == (charB | 0x20) && - (uint)((charA | 0x20) - 'a') <= (uint)('z' - 'a'))) - { - length--; - charA = ref Unsafe.Add(ref charA, 1); - charB = ref Unsafe.Add(ref charB, 1); - } - else - { - int currentA = charA; - int currentB = charB; - - // Uppercase both chars if needed - if ((uint)(charA - 'a') <= 'z' - 'a') - { - currentA -= 0x20; - } - if ((uint)(charB - 'a') <= 'z' - 'a') - { - currentB -= 0x20; - } - - // Return the (case-insensitive) difference between them. - return currentA - currentB; - } - } - - if (length == 0 || GlobalizationMode.Invariant) - { - return lengthA - lengthB; - } - - range -= length; - - return CompareStringOrdinalIgnoreCaseCore(ref charA, lengthA - range, ref charB, lengthB - range); - } - - internal static bool EqualsOrdinalIgnoreCase(ref char charA, ref char charB, int length) - { - IntPtr byteOffset = IntPtr.Zero; - -#if TARGET_64BIT - // Read 4 chars (64 bits) at a time from each string - while ((uint)length >= 4) - { - ulong valueA = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charA, byteOffset))); - ulong valueB = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charB, byteOffset))); - - // A 32-bit test - even with the bit-twiddling here - is more efficient than a 64-bit test. - ulong temp = valueA | valueB; - if (!Utf16Utility.AllCharsInUInt32AreAscii((uint)temp | (uint)(temp >> 32))) - { - goto NonAscii; // one of the inputs contains non-ASCII data - } - - // Generally, the caller has likely performed a first-pass check that the input strings - // are likely equal. Consider a dictionary which computes the hash code of its key before - // performing a proper deep equality check of the string contents. We want to optimize for - // the case where the equality check is likely to succeed, which means that we want to avoid - // branching within this loop unless we're about to exit the loop, either due to failure or - // due to us running out of input data. - - if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueA, valueB)) - { - return false; - } - - byteOffset += 8; - length -= 4; - } -#endif - - // Read 2 chars (32 bits) at a time from each string -#if TARGET_64BIT - if ((uint)length >= 2) -#else - while ((uint)length >= 2) -#endif - { - uint valueA = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charA, byteOffset))); - uint valueB = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charB, byteOffset))); - - if (!Utf16Utility.AllCharsInUInt32AreAscii(valueA | valueB)) - { - goto NonAscii; // one of the inputs contains non-ASCII data - } - - // Generally, the caller has likely performed a first-pass check that the input strings - // are likely equal. Consider a dictionary which computes the hash code of its key before - // performing a proper deep equality check of the string contents. We want to optimize for - // the case where the equality check is likely to succeed, which means that we want to avoid - // branching within this loop unless we're about to exit the loop, either due to failure or - // due to us running out of input data. - - if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueA, valueB)) - { - return false; - } - - byteOffset += 4; - length -= 2; - } - - if (length != 0) - { - Debug.Assert(length == 1); - - uint valueA = Unsafe.AddByteOffset(ref charA, byteOffset); - uint valueB = Unsafe.AddByteOffset(ref charB, byteOffset); - - if ((valueA | valueB) > 0x7Fu) - { - goto NonAscii; // one of the inputs contains non-ASCII data - } - - if (valueA == valueB) - { - return true; // exact match - } - - valueA |= 0x20u; - if ((uint)(valueA - 'a') > (uint)('z' - 'a')) - { - return false; // not exact match, and first input isn't in [A-Za-z] - } - - // The ternary operator below seems redundant but helps RyuJIT generate more optimal code. - // See https://github.com/dotnet/runtime/issues/4207. - return (valueA == (valueB | 0x20u)) ? true : false; - } - - Debug.Assert(length == 0); - return true; - - NonAscii: - // The non-ASCII case is factored out into its own helper method so that the JIT - // doesn't need to emit a complex prolog for its caller (this method). - return EqualsOrdinalIgnoreCaseNonAscii(ref Unsafe.AddByteOffset(ref charA, byteOffset), ref Unsafe.AddByteOffset(ref charB, byteOffset), length); - } - - private static bool EqualsOrdinalIgnoreCaseNonAscii(ref char charA, ref char charB, int length) - { - if (!GlobalizationMode.Invariant) - { - return CompareStringOrdinalIgnoreCaseCore(ref charA, length, ref charB, length) == 0; - } - else - { - // If we don't have localization tables to consult, we'll still perform a case-insensitive - // check for ASCII characters, but if we see anything outside the ASCII range we'll immediately - // fail if it doesn't have true bitwise equality. - - IntPtr byteOffset = IntPtr.Zero; - while (length != 0) - { - // Ordinal equals or lowercase equals if the result ends up in the a-z range - uint valueA = Unsafe.AddByteOffset(ref charA, byteOffset); - uint valueB = Unsafe.AddByteOffset(ref charB, byteOffset); - - if (valueA == valueB || - ((valueA | 0x20) == (valueB | 0x20) && - (uint)((valueA | 0x20) - 'a') <= (uint)('z' - 'a'))) - { - byteOffset += 2; - length--; - } - else - { - return false; - } - } - - return true; - } - } - - private static unsafe int CompareStringOrdinalIgnoreCaseCore(ref char string1, int count1, ref char string2, int count2) => - GlobalizationMode.UseNls ? - NlsCompareStringOrdinalIgnoreCase(ref string1, count1, ref string2, count2) : - IcuCompareStringOrdinalIgnoreCase(ref string1, count1, ref string2, count2); - - /// /// Determines whether prefix is a prefix of string. If prefix equals /// string.Empty, true is returned. /// @@ -784,20 +556,19 @@ namespace System.Globalization { // Common case: caller is attempting to perform a linguistic search. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { return StartsWithCore(source, prefix, options, matchLengthPtr: null); } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return source.StartsWith(prefix); } + + return source.StartsWithOrdinalIgnoreCase(prefix); } else { @@ -806,23 +577,18 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; + return source.StartsWith(prefix); } - else if (options == CompareOptions.OrdinalIgnoreCase) + + if (options == CompareOptions.OrdinalIgnoreCase) { - goto ReturnOrdinalIgnoreCase; + return source.StartsWithOrdinalIgnoreCase(prefix); } - else - { - ThrowCompareOptionsCheckFailed(options); - } - } - ReturnOrdinal: - return source.StartsWith(prefix); + ThrowCompareOptionsCheckFailed(options); - ReturnOrdinalIgnoreCase: - return source.StartsWithOrdinalIgnoreCase(prefix); + return false; // make the compiler happy; + } } /// @@ -928,20 +694,19 @@ namespace System.Globalization { // Common case: caller is attempting to perform a linguistic search. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { return EndsWithCore(source, suffix, options, matchLengthPtr: null); } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return source.EndsWith(suffix); } + + return source.EndsWithOrdinalIgnoreCase(suffix); } else { @@ -950,23 +715,18 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; + return source.EndsWith(suffix); } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - goto ReturnOrdinalIgnoreCase; - } - else + + if (options == CompareOptions.OrdinalIgnoreCase) { - ThrowCompareOptionsCheckFailed(options); + return source.EndsWithOrdinalIgnoreCase(suffix); } - } - ReturnOrdinal: - return source.EndsWith(suffix); + ThrowCompareOptionsCheckFailed(options); - ReturnOrdinalIgnoreCase: - return source.EndsWithOrdinalIgnoreCase(suffix); + return false; // make the compiler happy; + } } /// @@ -1192,7 +952,7 @@ namespace System.Globalization { // Common case: caller is attempting to perform a linguistic search. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { @@ -1205,14 +965,13 @@ namespace System.Globalization return IndexOfCore(source, value, options, matchLengthPtr: null, fromBeginning: true); } } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return source.IndexOf(value); } + + return Ordinal.IndexOfOrdinalIgnoreCase(source, value); } else { @@ -1221,23 +980,18 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; + return source.IndexOf(value); } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - goto ReturnOrdinalIgnoreCase; - } - else + + if (options == CompareOptions.OrdinalIgnoreCase) { - ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); + return Ordinal.IndexOfOrdinalIgnoreCase(source, value); } - } - ReturnOrdinal: - return source.IndexOf(value); + ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); - ReturnOrdinalIgnoreCase: - return IndexOfOrdinalIgnoreCase(source, value, fromBeginning: true); + return -1; // make the compiler happy; + } } /// @@ -1290,39 +1044,6 @@ namespace System.Globalization return IndexOf(source, valueAsUtf16.Slice(0, charCount), options); } - private static int IndexOfOrdinalCore(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase, bool fromBeginning) => - GlobalizationMode.UseNls ? - NlsIndexOfOrdinalCore(source, value, ignoreCase, fromBeginning) : - IcuIndexOfOrdinalCore(source, value, ignoreCase, fromBeginning); - - internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnlySpan value, bool fromBeginning) - { - if (value.IsEmpty) - { - // Empty target string trivially appears at all indexes of all search spaces. - - return (fromBeginning) ? 0 : source.Length; - } - - if (value.Length > source.Length) - { - // A non-linguistic search compares chars directly against one another, so large - // target strings can never be found inside small search spaces. This check also - // handles empty 'source' spans. - - return -1; - } - - if (GlobalizationMode.Invariant) - { - return InvariantIndexOf(source, value, ignoreCase: true, fromBeginning); - } - else - { - return IndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning); - } - } - /// /// IndexOf overload used when the caller needs the length of the matching substring. /// Caller needs to ensure is non-null and points @@ -1333,11 +1054,13 @@ namespace System.Globalization Debug.Assert(matchLengthPtr != null); *matchLengthPtr = 0; + int retVal = 0; + if ((options & ValidIndexMaskOffFlags) == 0) { // Common case: caller is attempting to perform a linguistic search. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { @@ -1351,13 +1074,14 @@ namespace System.Globalization return IndexOfCore(source, value, options, matchLengthPtr, fromBeginning); } } - else if ((options & CompareOptions.IgnoreCase) == 0) + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinal; + retVal = (fromBeginning) ? source.IndexOf(value) : source.LastIndexOf(value); } else { - goto ReturnOrdinalIgnoreCase; + retVal = fromBeginning ? Ordinal.IndexOfOrdinalIgnoreCase(source, value) : Ordinal.LastIndexOfOrdinalIgnoreCase(source, value); } } else @@ -1367,11 +1091,11 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; + retVal = (fromBeginning) ? source.IndexOf(value) : source.LastIndexOf(value); } else if (options == CompareOptions.OrdinalIgnoreCase) { - goto ReturnOrdinalIgnoreCase; + retVal = fromBeginning ? Ordinal.IndexOfOrdinalIgnoreCase(source, value) : Ordinal.LastIndexOfOrdinalIgnoreCase(source, value); } else { @@ -1379,15 +1103,6 @@ namespace System.Globalization } } - ReturnOrdinal: - int retVal = (fromBeginning) ? source.IndexOf(value) : source.LastIndexOf(value); - goto OrdinalReturn; - - ReturnOrdinalIgnoreCase: - retVal = IndexOfOrdinalIgnoreCase(source, value, fromBeginning); - goto OrdinalReturn; - - OrdinalReturn: // Both Ordinal and OrdinalIgnoreCase match by individual code points in a non-linguistic manner. // Non-BMP code points will never match BMP code points, so given UTF-16 inputs the match length // will always be equivalent to the target string length. @@ -1404,54 +1119,6 @@ namespace System.Globalization NlsIndexOfCore(source, target, options, matchLengthPtr, fromBeginning) : IcuIndexOfCore(source, target, options, matchLengthPtr, fromBeginning); - internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) - { - Debug.Assert(source != null); - Debug.Assert(value != null); - Debug.Assert((uint)startIndex <= (uint)source.Length); - Debug.Assert((uint)count <= (uint)(source.Length - startIndex)); - - // For ordinal (non-linguistic) comparisons, an empty target string is always - // found at the beginning of the search space, and a non-empty target string - // can never be found within an empty search space. This assumption is not - // valid for linguistic comparisons, including InvariantCulture comparisons. - - if (value.Length == 0) - { - return startIndex; - } - - if (count == 0) - { - return -1; - } - - int result; - - if (!ignoreCase) - { - result = SpanHelpers.IndexOf( - ref Unsafe.Add(ref source.GetRawStringData(), startIndex), - count, - ref value.GetRawStringData(), - value.Length); - } - else if (GlobalizationMode.Invariant) - { - result = InvariantIndexOf(source.AsSpan(startIndex, count), value, ignoreCase, fromBeginning: true); - } - else - { - result = IndexOfOrdinalCore(source.AsSpan(startIndex, count), value, ignoreCase, fromBeginning: true); - } - - if (result >= 0) - { - result += startIndex; - } - return result; - } - /// /// Returns the last index where value is found in string. The /// search starts from startIndex and ends at endIndex. Returns -1 if @@ -1659,7 +1326,7 @@ namespace System.Globalization { // Common case: caller is attempting to perform a linguistic search. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { @@ -1672,14 +1339,13 @@ namespace System.Globalization return IndexOfCore(source, value, options, matchLengthPtr: null, fromBeginning: false); } } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return source.LastIndexOf(value); } + + return Ordinal.LastIndexOfOrdinalIgnoreCase(source, value); } else { @@ -1688,25 +1354,16 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; + return source.LastIndexOf(value); } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - goto ReturnOrdinalIgnoreCase; - } - else + + if (options == CompareOptions.OrdinalIgnoreCase) { - throw new ArgumentException( - paramName: nameof(options), - message: SR.Argument_InvalidFlag); + return Ordinal.LastIndexOfOrdinalIgnoreCase(source, value); } - } - - ReturnOrdinal: - return source.LastIndexOf(value); - ReturnOrdinalIgnoreCase: - return IndexOfOrdinalIgnoreCase(source, value, fromBeginning: false); + throw new ArgumentException(paramName: nameof(options), message: SR.Argument_InvalidFlag); + } } /// @@ -1759,36 +1416,6 @@ namespace System.Globalization return LastIndexOf(source, valueAsUtf16.Slice(0, charCount), options); } - internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) - { - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(value != null); - - if (GlobalizationMode.Invariant) - { - return InvariantLastIndexOf(source, value, startIndex, count, ignoreCase); - } - - // For ordinal (non-linguistic) comparisons, an empty target string is always - // found at the end of the search space, and a non-empty target string - // can never be found within an empty search space. This assumption is not - // valid for linguistic comparisons, including InvariantCulture comparisons. - - if (value.Length == 0) - { - return startIndex + 1; // startIndex is the index of the last char to include in the search space - } - - if (count == 0) - { - return -1; - } - - return GlobalizationMode.UseNls ? - NlsLastIndexOfOrdinalCore(source, value, startIndex, count, ignoreCase) : - IcuLastIndexOfOrdinalCore(source, value, startIndex, count, ignoreCase); - } - /// /// Gets the SortKey for the given string with the given options. /// @@ -1922,20 +1549,19 @@ namespace System.Globalization { // Common case: caller is attempting to get a linguistic sort key. // Pass the flags down to NLS or ICU unless we're running in invariant - // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + // mode, at which point we normalize the flags to Ordinal[IgnoreCase]. if (!GlobalizationMode.Invariant) { return GetHashCodeOfStringCore(source, options); } - else if ((options & CompareOptions.IgnoreCase) == 0) - { - goto ReturnOrdinal; - } - else + + if ((options & CompareOptions.IgnoreCase) == 0) { - goto ReturnOrdinalIgnoreCase; + return string.GetHashCode(source); } + + return string.GetHashCodeOrdinalIgnoreCase(source); } else { @@ -1944,23 +1570,18 @@ namespace System.Globalization if (options == CompareOptions.Ordinal) { - goto ReturnOrdinal; - } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - goto ReturnOrdinalIgnoreCase; + return string.GetHashCode(source); } - else + + if (options == CompareOptions.OrdinalIgnoreCase) { - ThrowCompareOptionsCheckFailed(options); + return string.GetHashCodeOrdinalIgnoreCase(source); } - } - ReturnOrdinal: - return string.GetHashCode(source); + ThrowCompareOptionsCheckFailed(options); - ReturnOrdinalIgnoreCase: - return string.GetHashCodeOrdinalIgnoreCase(source); + return -1; // make the compiler happy; + } } private unsafe int GetHashCodeOfStringCore(ReadOnlySpan source, CompareOptions options) => diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index a291452..e0b9e82 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -157,7 +157,7 @@ namespace System.Globalization if (originalString.Length == inputLength && inputLength == outputLength && - CompareInfo.EqualsOrdinalIgnoreCase(ref *input, ref *output, inputLength)) + Ordinal.EqualsIgnoreCase(ref *input, ref *output, inputLength)) { return originalString; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs new file mode 100644 index 0000000..50bff43 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs @@ -0,0 +1,413 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Text.Unicode; +using System.Runtime.CompilerServices; +using Internal.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace System.Globalization +{ + internal static partial class Ordinal + { + internal static int CompareStringIgnoreCase(ref char strA, int lengthA, ref char strB, int lengthB) + { + int length = Math.Min(lengthA, lengthB); + int range = length; + + ref char charA = ref strA; + ref char charB = ref strB; + + // in InvariantMode we support all range and not only the ascii characters. + char maxChar = (GlobalizationMode.Invariant ? (char)0xFFFF : (char)0x7F); + + while (length != 0 && charA <= maxChar && charB <= maxChar) + { + // Ordinal equals or lowercase equals if the result ends up in the a-z range + if (charA == charB || + ((charA | 0x20) == (charB | 0x20) && + (uint)((charA | 0x20) - 'a') <= (uint)('z' - 'a'))) + { + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + } + else + { + int currentA = charA; + int currentB = charB; + + // Uppercase both chars if needed + if ((uint)(charA - 'a') <= 'z' - 'a') + { + currentA -= 0x20; + } + if ((uint)(charB - 'a') <= 'z' - 'a') + { + currentB -= 0x20; + } + + // Return the (case-insensitive) difference between them. + return currentA - currentB; + } + } + + if (length == 0 || GlobalizationMode.Invariant) + { + return lengthA - lengthB; + } + + range -= length; + + return CompareStringIgnoreCaseNonAscii(ref charA, lengthA - range, ref charB, lengthB - range); + } + + internal static int CompareStringIgnoreCaseNonAscii(ref char strA, int lengthA, ref char strB, int lengthB) + { + if (GlobalizationMode.Invariant) + { + return CompareIgnoreCaseInvariantMode(ref strA, lengthA, ref strB, lengthB); + } + + if (GlobalizationMode.UseNls) + { + return CompareInfo.NlsCompareStringOrdinalIgnoreCase(ref strA, lengthA, ref strB, lengthB); + } + + return OrdinalCasing.CompareStringIgnoreCase(ref strA, lengthA, ref strB, lengthB); + } + + internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length) + { + IntPtr byteOffset = IntPtr.Zero; + +#if TARGET_64BIT + // Read 4 chars (64 bits) at a time from each string + while ((uint)length >= 4) + { + ulong valueA = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charA, byteOffset))); + ulong valueB = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charB, byteOffset))); + + // A 32-bit test - even with the bit-twiddling here - is more efficient than a 64-bit test. + ulong temp = valueA | valueB; + if (!Utf16Utility.AllCharsInUInt32AreAscii((uint)temp | (uint)(temp >> 32))) + { + goto NonAscii; // one of the inputs contains non-ASCII data + } + + // Generally, the caller has likely performed a first-pass check that the input strings + // are likely equal. Consider a dictionary which computes the hash code of its key before + // performing a proper deep equality check of the string contents. We want to optimize for + // the case where the equality check is likely to succeed, which means that we want to avoid + // branching within this loop unless we're about to exit the loop, either due to failure or + // due to us running out of input data. + + if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueA, valueB)) + { + return false; + } + + byteOffset += 8; + length -= 4; + } +#endif + + // Read 2 chars (32 bits) at a time from each string +#if TARGET_64BIT + if ((uint)length >= 2) +#else + while ((uint)length >= 2) +#endif + { + uint valueA = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charA, byteOffset))); + uint valueB = Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.AddByteOffset(ref charB, byteOffset))); + + if (!Utf16Utility.AllCharsInUInt32AreAscii(valueA | valueB)) + { + goto NonAscii; // one of the inputs contains non-ASCII data + } + + // Generally, the caller has likely performed a first-pass check that the input strings + // are likely equal. Consider a dictionary which computes the hash code of its key before + // performing a proper deep equality check of the string contents. We want to optimize for + // the case where the equality check is likely to succeed, which means that we want to avoid + // branching within this loop unless we're about to exit the loop, either due to failure or + // due to us running out of input data. + + if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueA, valueB)) + { + return false; + } + + byteOffset += 4; + length -= 2; + } + + if (length != 0) + { + Debug.Assert(length == 1); + + uint valueA = Unsafe.AddByteOffset(ref charA, byteOffset); + uint valueB = Unsafe.AddByteOffset(ref charB, byteOffset); + + if ((valueA | valueB) > 0x7Fu) + { + goto NonAscii; // one of the inputs contains non-ASCII data + } + + if (valueA == valueB) + { + return true; // exact match + } + + valueA |= 0x20u; + if ((uint)(valueA - 'a') > (uint)('z' - 'a')) + { + return false; // not exact match, and first input isn't in [A-Za-z] + } + + // The ternary operator below seems redundant but helps RyuJIT generate more optimal code. + // See https://github.com/dotnet/runtime/issues/4207. + return (valueA == (valueB | 0x20u)) ? true : false; + } + + Debug.Assert(length == 0); + return true; + + NonAscii: + // The non-ASCII case is factored out into its own helper method so that the JIT + // doesn't need to emit a complex prolog for its caller (this method). + return CompareStringIgnoreCase(ref Unsafe.AddByteOffset(ref charA, byteOffset), length, ref Unsafe.AddByteOffset(ref charB, byteOffset), length) == 0; + } + + internal static int CompareIgnoreCaseInvariantMode(ref char strA, int lengthA, ref char strB, int lengthB) + { + Debug.Assert(GlobalizationMode.Invariant); + int length = Math.Min(lengthA, lengthB); + + ref char charA = ref strA; + ref char charB = ref strB; + + while (length != 0) + { + if (charA == charB) + { + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + continue; + } + + char aUpper = OrdinalCasing.ToUpperInvariantMode(charA); + char bUpper = OrdinalCasing.ToUpperInvariantMode(charB); + + if (aUpper == bUpper) + { + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + continue; + } + + return aUpper - bUpper; + } + + return lengthA - lengthB; + } + + internal static unsafe int IndexOf(string source, string value, int startIndex, int count, bool ignoreCase) + { + if (source == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); + } + + if (value == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) + { + // Bounds check failed - figure out exactly what went wrong so that we can + // surface the correct argument exception. + + if ((uint)startIndex > (uint)source.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_Count); + } + } + + int result = ignoreCase ? IndexOfOrdinalIgnoreCase(sourceSpan, value) : sourceSpan.IndexOf(value); + + return result >= 0 ? result + startIndex : result; + } + + internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnlySpan value) + { + if (value.Length == 0) + { + return 0; + } + + if (value.Length > source.Length) + { + // A non-linguistic search compares chars directly against one another, so large + // target strings can never be found inside small search spaces. This check also + // handles empty 'source' spans. + + return -1; + } + + if (GlobalizationMode.Invariant) + { + return CompareInfo.InvariantIndexOf(source, value, ignoreCase: true, fromBeginning: true); + } + + if (GlobalizationMode.UseNls) + { + return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true); + } + + return OrdinalCasing.IndexOf(source, value); + } + + internal static unsafe int LastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase) + { + if (source == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); + } + + if (value == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + if (value.Length == 0) + { + return startIndex + 1; // startIndex is the index of the last char to include in the search space + } + + if (count == 0) + { + return -1; + } + + if (GlobalizationMode.Invariant) + { + return CompareInfo.InvariantLastIndexOf(source, value, startIndex, count, ignoreCase); + } + + if (GlobalizationMode.UseNls) + { + return CompareInfo.NlsLastIndexOfOrdinalCore(source, value, startIndex, count, ignoreCase); + } + + if (!ignoreCase) + { + // startIndex is the index into source where we start search backwards from. + // leftStartIndex is the index into source of the start of the string that is + // count characters away from startIndex. + int leftStartIndex = startIndex - count + 1; + + for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--) + { + int valueIndex, sourceIndex; + + for (valueIndex = 0, sourceIndex = i; + valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; + valueIndex++, sourceIndex++) ; + + if (valueIndex == value.Length) { + return i; + } + } + + return -1; + } + + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) + { + // Bounds check failed - figure out exactly what went wrong so that we can + // surface the correct argument exception. + + if ((uint)startIndex > (uint)source.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_Count); + } + } + + int result = OrdinalCasing.LastIndexOf(sourceSpan, value); + + if (result >= 0) + { + result += startIndex; + } + return result; + } + + internal static int LastIndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnlySpan value) + { + if (value.Length == 0) + { + return source.Length; + } + + if (value.Length > source.Length) + { + // A non-linguistic search compares chars directly against one another, so large + // target strings can never be found inside small search spaces. This check also + // handles empty 'source' spans. + + return -1; + } + + if (GlobalizationMode.Invariant) + { + return CompareInfo.InvariantIndexOf(source, value, ignoreCase: true, fromBeginning: false); + } + + if (GlobalizationMode.UseNls) + { + return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: false); + } + + return OrdinalCasing.LastIndexOf(source, value); + } + + internal static int ToUpperOrdinal(ReadOnlySpan source, Span destination) + { + if (source.Overlaps(destination)) + throw new InvalidOperationException(SR.InvalidOperation_SpanOverlappedOperation); + + // Assuming that changing case does not affect length + if (destination.Length < source.Length) + return -1; + + if (GlobalizationMode.Invariant) + { + OrdinalCasing.ToUpperInvariantMode(source, destination); + return source.Length; + } + + if (GlobalizationMode.UseNls) + { + TextInfo.Invariant.ChangeCaseToUpper(source, destination); // this is the best so far for NLS. + return source.Length; + } + + OrdinalCasing.ToUpperOrdinal(source, destination); + return source.Length; + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/OrdinalCasing.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/OrdinalCasing.Icu.cs new file mode 100644 index 0000000..a10c39d --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/OrdinalCasing.Icu.cs @@ -0,0 +1,444 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; +using Internal.Runtime.CompilerServices; + +namespace System.Globalization +{ + internal static partial class OrdinalCasing + { + // s_noCasingPage means the Unicode page doesn't support any casing and no case translation is needed. + private static ushort [] s_noCasingPage = Array.Empty(); + + // s_basicLatin is covering the casing for the Basic Latin & C0 Controls range. + // we are not lazy initializing this range because it is the most common used range and we'll cache it anyway very early. + private static ushort [] s_basicLatin = + { + // Upper Casing + + /* 0000-000f */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + /* 0010-001f */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + /* 0020-002f */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + /* 0030-003f */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + /* 0040-004f */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + /* 0050-005f */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + /* 0060-006f */ 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + /* 0070-007f */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + /* 0080-008f */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + /* 0090-009f */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + /* 00a0-00af */ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + /* 00b0-00bf */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x039c, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + /* 00c0-00cf */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + /* 00d0-00df */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + /* 00e0-00ef */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + /* 00f0-00ff */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00f7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178, + }; + + // s_casingTable is covering the Unicode BMP plane only. Surrogate casing is handled separately. + // Every cell in the table is covering the casing of 256 characters in the BMP. + // Every cell is array of 512 character for uppercasing mapping. + private static ushort []?[] s_casingTable = + { + /* 0000-07FF */ s_basicLatin, null, null, null, null, null, null, null, + /* 0800-0FFF */ null, null, null, null, null, null, null, null, + /* 1000-17FF */ null, s_noCasingPage, null, null, s_noCasingPage, s_noCasingPage, null, null, + /* 1800-1FFF */ null, null, null, null, null, null, null, null, + /* 2000-27FF */ null, null, s_noCasingPage, s_noCasingPage, null, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 2800-2FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, null, null, null, null, null, + /* 3000-37FF */ null, null, null, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 3800-3FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 4000-47FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 4800-4FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 5000-57FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 5800-5FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 6000-67FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 6800-6FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 7000-77FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 7800-7FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 8000-87FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 8800-8FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 9000-97FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* 9800-9FFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, null, + /* A000-A7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, null, s_noCasingPage, null, null, + /* A800-AFFF */ null, null, null, null, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* B000-B7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* B800-BFFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* C000-C7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* C800-CFFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* D000-D7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, null, + /* D800-DFFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* E000-E7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* E800-EFFF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* F000-F7FF */ s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, s_noCasingPage, + /* F800-FFFF */ s_noCasingPage, s_noCasingPage, null, null, s_noCasingPage, null, null, null, + }; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static char ToUpper(char c) + { + int pageNumber = ((int)c) >> 8; + if (pageNumber == 0) // optimize for ASCII range + { + return (char) s_basicLatin[(int)c]; + } + + ushort[]? casingTable = s_casingTable[pageNumber]; + + if (casingTable == s_noCasingPage) + { + return c; + } + + if (casingTable == null) + { + casingTable = InitOrdinalCasingPage(pageNumber); + } + + return (char) casingTable[((int)c) & 0xFF]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static char ToUpperInvariantMode(char c) => c <= '\u00FF' ? (char) s_basicLatin[(int)c] : c; + + public static void ToUpperInvariantMode(this ReadOnlySpan source, Span destination) + { + for (int i = 0; i < source.Length; i++) + { + destination[i] = ToUpperInvariantMode(source[i]); + } + } + + internal static void ToUpperOrdinal(ReadOnlySpan source, Span destination) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + for (int i = 0; i < source.Length; i++) + { + char c = source[i]; + if (c <= '\u00FF') // optimize ASCII/Latin + { + destination[i] = (char)s_basicLatin[c]; + continue; + } + + if (char.IsHighSurrogate(c) && i < source.Length - 1 && char.IsLowSurrogate(source[i + 1])) + { + // well formed surrogates + ToUpperSurrogate(c, source[i + 1], out ushort h, out ushort l); + destination[i] = (char)h; + destination[i+1] = (char)l; + i++; // skip the low surrogate + continue; + } + + destination[i] = ToUpper(c); + } + } + + // For simplicity ToUpper doesn't expect the Surrogate be formed with + // S = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000 + // Instead it expect to have it in the form (H << 16) | L + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void ToUpperSurrogate(ushort h, ushort l, out ushort hr, out ushort lr) + { + switch (h) + { + case 0xD801: + // DESERET SMALL LETTERS 10428 ~ 1044F + if ((uint) (l - 0xdc28) <= (uint) (0xdc4f - 0xdc28)) + { + hr = h; + lr = (ushort) ((l - 0xdc28) + 0xdc00); + return; + } + + // OSAGE SMALL LETTERS 104D8 ~ 104FB + if ((uint) (l - 0xdcd8) <= (uint) (0xdcfb - 0xdcd8)) + { + hr = h; + lr = (ushort) ((l - 0xdcd8) + 0xdcb0); + return; + } + break; + + case 0xd803: + // OLD HUNGARIAN SMALL LETTERS 10CC0 ~ 10CF2 + if ((uint) (l - 0xdcc0) <= (uint) (0xdcf2 - 0xdcc0)) + { + hr = h; + lr = (ushort) ((l - 0xdcc0) + 0xdc80); + return; + } + break; + + case 0xd806: + // WARANG CITI SMALL LETTERS 118C0 ~ 118DF + if ((uint) (l - 0xdcc0) <= (uint) (0xdcdf - 0xdcc0)) + { + hr = h; + lr = (ushort) ((l - 0xdcc0) + 0xdca0); + return; + } + break; + + case 0xd81b: + // MEDEFAIDRIN SMALL LETTERS 16E60 ~ 16E7F + if ((uint) (l - 0xde60) <= (uint) (0xde7f - 0xde60)) + { + hr = h; + lr = (ushort) ((l - 0xde60) + 0xde40); + return; + } + break; + + case 0xd83a: + // ADLAM SMALL LETTERS 1E922 ~ 1E943 + if ((uint) (l - 0xdd22) <= (uint) (0xdd43 - 0xdd22)) + { + hr = h; + lr = (ushort) ((l - 0xdd22) + 0xdd00); + return; + } + break; + } + + hr = h; + lr = l; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool EqualSurrogate(char h1, char l1, char h2, char l2) + { + ToUpperSurrogate(h1, l1, out ushort hr1, out ushort lr1); + ToUpperSurrogate(h2, l2, out ushort hr2, out ushort lr2); + + return hr1 == hr2 && lr1 == lr2; + } + + internal static int CompareStringIgnoreCase(ref char strA, int lengthA, ref char strB, int lengthB) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + int length = Math.Min(lengthA, lengthB); + + ref char charA = ref strA; + ref char charB = ref strB; + + while (length != 0) + { + // optimize for Ascii cases + if (charA <= '\u00FF' || length == 1 || !char.IsHighSurrogate(charA) || !char.IsHighSurrogate(charB)) + { + if (charA == charB) + { + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + continue; + } + + char aUpper = OrdinalCasing.ToUpper(charA); + char bUpper = OrdinalCasing.ToUpper(charB); + + if (aUpper == bUpper) + { + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + continue; + } + + return aUpper - bUpper; + } + + // We come here only of we have valid high surrogates and length > 1 + + char a = charA; + char b = charB; + + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + + if (!char.IsLowSurrogate(charA) || !char.IsLowSurrogate(charB)) + { + // malformed Surrogates - should be rare cases + if (a != b) + { + return a - b; + } + + // Should be pointing to the right characters in the string to resume at. + // Just in case we could be pointing at high surrogate now. + continue; + } + + // we come here only if we have valid full surrogates + ToUpperSurrogate(a, charA, out ushort h1, out ushort l1); + ToUpperSurrogate(b, charB, out ushort h2, out ushort l2); + + if (h1 != h2) + { + return (int)h1 - (int)h2; + } + + if (l1 != l2) + { + return (int)l1 - (int)l2; + } + + length--; + charA = ref Unsafe.Add(ref charA, 1); + charB = ref Unsafe.Add(ref charB, 1); + } + + return lengthA - lengthB; + } + + internal static unsafe int IndexOf(ReadOnlySpan source, ReadOnlySpan value) + { + Debug.Assert(value.Length > 0); + Debug.Assert(value.Length <= source.Length); + + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pValue = &MemoryMarshal.GetReference(value)) + { + char* pSourceLimit = pSource + (source.Length - value.Length); + char* pValueLimit = pValue + value.Length - 1; + char* pCurrentSource = pSource; + + while (pCurrentSource <= pSourceLimit) + { + char *pVal = pValue; + char *pSrc = pCurrentSource; + + while (pVal <= pValueLimit) + { + if (!char.IsHighSurrogate(*pVal) || pVal == pValueLimit) + { + if (*pVal != *pSrc && ToUpper(*pVal) != ToUpper(*pSrc)) + break; // no match + + pVal++; + pSrc++; + continue; + } + + if (char.IsHighSurrogate(*pSrc) && char.IsLowSurrogate(*(pSrc + 1)) && char.IsLowSurrogate(*(pVal + 1))) + { + // Well formed surrogates + // both the source and the Value have well-formed surrogates. + if (!EqualSurrogate(*pSrc, *(pSrc + 1), *pVal, *(pVal + 1))) + break; // no match + + pSrc += 2; + pVal += 2; + continue; + } + + if (*pVal != *pSrc) + break; // no match + + pSrc++; + pVal++; + } + + if (pVal > pValueLimit) + { + // Found match. + return (int) (pCurrentSource - pSource); + } + + pCurrentSource++; + } + + return -1; + } + } + + internal static unsafe int LastIndexOf(ReadOnlySpan source, ReadOnlySpan value) + { + Debug.Assert(value.Length > 0); + Debug.Assert(value.Length <= source.Length); + + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pValue = &MemoryMarshal.GetReference(value)) + { + char* pValueLimit = pValue + value.Length - 1; + char* pCurrentSource = pSource + (source.Length - value.Length); + + while (pCurrentSource >= pSource) + { + char *pVal = pValue; + char *pSrc = pCurrentSource; + + while (pVal <= pValueLimit) + { + if (!char.IsHighSurrogate(*pVal) || pVal == pValueLimit) + { + if (*pVal != *pSrc && ToUpper(*pVal) != ToUpper(*pSrc)) + break; // no match + + pVal++; + pSrc++; + continue; + } + + if (char.IsHighSurrogate(*pSrc) && char.IsLowSurrogate(*(pSrc + 1)) && char.IsLowSurrogate(*(pVal + 1))) + { + // Well formed surrogates + // both the source and the Value have well-formed surrogates. + if (!EqualSurrogate(*pSrc, *(pSrc + 1), *pVal, *(pVal + 1))) + break; // no match + + pSrc += 2; + pVal += 2; + continue; + } + + if (*pVal != *pSrc) + break; // no match + + pSrc++; + pVal++; + } + + if (pVal > pValueLimit) + { + // Found match. + return (int) (pCurrentSource - pSource); + } + + pCurrentSource--; + } + + return -1; + } + } + + private static unsafe ushort [] InitOrdinalCasingPage(int pageNumber) + { + Debug.Assert(pageNumber >= 0 && pageNumber < 256); + + ushort [] casingTable = new ushort[256]; + fixed (ushort* table = casingTable) + { + char* pTable = (char*)table; + Interop.Globalization.InitOrdinalCasingPage(pageNumber, pTable); + } + s_casingTable[pageNumber] = casingTable; + return casingTable; + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs index 98bda07..c2e47dd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs @@ -70,7 +70,7 @@ namespace System char[]? borrowedArr = null; Span scratch = (uint)count <= 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(count)); - int charsWritten = new ReadOnlySpan(ref data, count).ToUpperInvariant(scratch); + int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal(new ReadOnlySpan(ref data, count), scratch); Debug.Assert(charsWritten == count); // invariant case conversion should involve simple folding; preserve code unit count // Slice the array to the size returned by ToUpperInvariant. diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs index 09f4dce..ce13637 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs @@ -83,7 +83,7 @@ namespace System return false; if (value.Length == 0) // span.Length == value.Length == 0 return true; - return CompareInfo.EqualsOrdinalIgnoreCase(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(value), span.Length); + return Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(value), span.Length); } /// @@ -114,7 +114,7 @@ namespace System default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); - return CompareInfo.CompareOrdinalIgnoreCase(span, other); + return Ordinal.CompareStringIgnoreCase(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(other), other.Length); } } @@ -130,11 +130,7 @@ namespace System if (comparisonType == StringComparison.Ordinal) { - return SpanHelpers.IndexOf( - ref MemoryMarshal.GetReference(span), - span.Length, - ref MemoryMarshal.GetReference(value), - value.Length); + return SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(value), value.Length); } switch (comparisonType) @@ -149,7 +145,7 @@ namespace System default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); - return CompareInfo.IndexOfOrdinalIgnoreCase(span, value, fromBeginning: true); + return Ordinal.IndexOfOrdinalIgnoreCase(span, value); } } @@ -184,7 +180,7 @@ namespace System default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); - return CompareInfo.IndexOfOrdinalIgnoreCase(span, value, fromBeginning: false); + return Ordinal.LastIndexOfOrdinalIgnoreCase(span, value); } } @@ -324,7 +320,7 @@ namespace System [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool EndsWithOrdinalIgnoreCase(this ReadOnlySpan span, ReadOnlySpan value) => value.Length <= span.Length - && CompareInfo.EqualsOrdinalIgnoreCase( + && Ordinal.EqualsIgnoreCase( ref Unsafe.Add(ref MemoryMarshal.GetReference(span), span.Length - value.Length), ref MemoryMarshal.GetReference(value), value.Length); @@ -361,7 +357,7 @@ namespace System [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool StartsWithOrdinalIgnoreCase(this ReadOnlySpan span, ReadOnlySpan value) => value.Length <= span.Length - && CompareInfo.EqualsOrdinalIgnoreCase(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(value), value.Length); + && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(value), value.Length); /// /// Returns an enumeration of from the provided span. diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 40f6317..c616ae9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -66,8 +66,9 @@ namespace System { Debug.Assert(strA.Length == strB.Length); - return CompareInfo.EqualsOrdinalIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length); + return Ordinal.EqualsIgnoreCase(ref strA.GetRawStringData(), ref strB.GetRawStringData(), strB.Length); } + private static unsafe int CompareOrdinalHelper(string strA, string strB) { Debug.Assert(strA != null); @@ -250,7 +251,7 @@ namespace System return CompareOrdinalHelper(strA, strB); case StringComparison.OrdinalIgnoreCase: - return CompareInfo.CompareOrdinalIgnoreCase(strA, strB); + return Ordinal.CompareStringIgnoreCase(ref strA.GetRawStringData(), strA.Length, ref strB.GetRawStringData(), strB.Length); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); @@ -416,7 +417,7 @@ namespace System default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); // CheckStringComparison validated these earlier - return CompareInfo.CompareOrdinalIgnoreCase(strA, indexA, lengthA, strB, indexB, lengthB); + return Ordinal.CompareStringIgnoreCase(ref Unsafe.Add(ref strA.GetRawStringData(), indexA), lengthA, ref Unsafe.Add(ref strB.GetRawStringData(), indexB), lengthB); } } @@ -569,7 +570,9 @@ namespace System return (uint)offset <= (uint)this.Length && this.AsSpan(offset).SequenceEqual(value); case StringComparison.OrdinalIgnoreCase: - return this.Length < value.Length ? false : (CompareInfo.CompareOrdinalIgnoreCase(this, this.Length - value.Length, value.Length, value, 0, value.Length) == 0); + return this.Length < value.Length ? + false : + (Ordinal.CompareStringIgnoreCase(ref Unsafe.Add(ref this.GetRawStringData(), this.Length - value.Length), value.Length, ref value.GetRawStringData(), value.Length) == 0); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); @@ -928,7 +931,7 @@ namespace System { return false; } - return CompareInfo.EqualsOrdinalIgnoreCase(ref this.GetRawStringData(), ref value.GetRawStringData(), value.Length); + return Ordinal.EqualsIgnoreCase(ref this.GetRawStringData(), ref value.GetRawStringData(), value.Length); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs index 0435fb3..86f772c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs @@ -340,7 +340,7 @@ namespace System case StringComparison.Ordinal: case StringComparison.OrdinalIgnoreCase: - return CompareInfo.Invariant.IndexOf(this, value, startIndex, count, GetCompareOptionsFromOrdinalStringComparison(comparisonType)); + return Ordinal.IndexOf(this, value, startIndex, count, comparisonType == StringComparison.OrdinalIgnoreCase); default: throw (value is null) diff --git a/src/libraries/System.Private.CoreLib/src/System/StringComparer.cs b/src/libraries/System.Private.CoreLib/src/System/StringComparer.cs index 39791cf..c0f685b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/StringComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/StringComparer.cs @@ -244,7 +244,7 @@ namespace System { return false; } - return CompareInfo.EqualsOrdinalIgnoreCase(ref x.GetRawStringData(), ref y.GetRawStringData(), x.Length); + return System.Globalization.Ordinal.EqualsIgnoreCase(ref x.GetRawStringData(), ref y.GetRawStringData(), x.Length); } return x.Equals(y); } @@ -338,7 +338,7 @@ namespace System return false; } - return CompareInfo.EqualsOrdinalIgnoreCase(ref x.GetRawStringData(), ref y.GetRawStringData(), x.Length); + return System.Globalization.Ordinal.EqualsIgnoreCase(ref x.GetRawStringData(), ref y.GetRawStringData(), x.Length); } public override int GetHashCode(string obj) diff --git a/src/libraries/System.Runtime/tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System/StringTests.cs index 12a0e4c..88f77c6 100644 --- a/src/libraries/System.Runtime/tests/System/StringTests.cs +++ b/src/libraries/System.Runtime/tests/System/StringTests.cs @@ -218,7 +218,7 @@ namespace System.Tests if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.CurrentCulture, true }; - + // CurrentCultureIgnoreCase yield return new object[] { "Hello", "ello", StringComparison.CurrentCultureIgnoreCase, true }; yield return new object[] { "Hello", "ELL", StringComparison.CurrentCultureIgnoreCase, true }; @@ -234,7 +234,7 @@ namespace System.Tests yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.CurrentCultureIgnoreCase, true }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.CurrentCultureIgnoreCase, true }; } - + // InvariantCulture yield return new object[] { "Hello", "ello", StringComparison.InvariantCulture, true }; yield return new object[] { "Hello", "ELL", StringComparison.InvariantCulture, false }; @@ -248,7 +248,7 @@ namespace System.Tests if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.InvariantCulture, true }; - + // InvariantCultureIgnoreCase yield return new object[] { "Hello", "ello", StringComparison.InvariantCultureIgnoreCase, true }; yield return new object[] { "Hello", "ELL", StringComparison.InvariantCultureIgnoreCase, true }; @@ -276,7 +276,7 @@ namespace System.Tests yield return new object[] { "Hello", "", StringComparison.Ordinal, true }; yield return new object[] { "Hello", "ell" + SoftHyphen, StringComparison.Ordinal, false }; yield return new object[] { "Hello", "Ell" + SoftHyphen, StringComparison.Ordinal, false }; - + // OrdinalIgnoreCase yield return new object[] { "Hello", "ello", StringComparison.OrdinalIgnoreCase, true }; yield return new object[] { "Hello", "ELL", StringComparison.OrdinalIgnoreCase, true }; @@ -624,7 +624,7 @@ namespace System.Tests yield return new object[] { "abc", "b", "LONG", StringComparison.CurrentCultureIgnoreCase, "aLONGc" }; yield return new object[] { "abc", "b", "d", StringComparison.CurrentCultureIgnoreCase, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.CurrentCultureIgnoreCase, "ac" }; - + if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.CurrentCultureIgnoreCase, "def" }; @@ -634,7 +634,7 @@ namespace System.Tests yield return new object[] { "abc", "b", "LONG", StringComparison.Ordinal, "aLONGc" }; yield return new object[] { "abc", "b", "d", StringComparison.Ordinal, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.Ordinal, "ac" }; - + if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.Ordinal, "abc" }; @@ -645,7 +645,7 @@ namespace System.Tests yield return new object[] { "abc", "b", "d", StringComparison.OrdinalIgnoreCase, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.OrdinalIgnoreCase, "ac" }; - + if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.OrdinalIgnoreCase, "abc" }; @@ -656,7 +656,7 @@ namespace System.Tests yield return new object[] { "abc", "b", "d", StringComparison.InvariantCulture, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.InvariantCulture, "ac" }; - + if (PlatformDetection.IsNotInvariantGlobalization) yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.InvariantCulture, "def" }; @@ -667,7 +667,7 @@ namespace System.Tests yield return new object[] { "abc", "b", "d", StringComparison.InvariantCultureIgnoreCase, "adc" }; yield return new object[] { "abc", "b", null, StringComparison.InvariantCultureIgnoreCase, "ac" }; - + if (PlatformDetection.IsNotInvariantGlobalization) { yield return new object[] { "abc", "abc" + SoftHyphen, "def", StringComparison.InvariantCultureIgnoreCase, "def" }; @@ -1153,6 +1153,114 @@ namespace System.Tests AssertExtensions.Throws("comparisonType", () => "foo".IndexOf('o', StringComparison.OrdinalIgnoreCase + 1)); } + public static IEnumerable IndexOf_String_StringComparison_TestData() + { + yield return new object[] { "Hello\uD801\uDC28", "\uD801\uDC4f", StringComparison.Ordinal, -1}; + yield return new object[] { "Hello\uD801\uDC28", "\uD801\uDC00", StringComparison.OrdinalIgnoreCase, 5}; + yield return new object[] { "Hello\u0200\u0202", "\u0201\u0203", StringComparison.OrdinalIgnoreCase, 5}; + yield return new object[] { "Hello\u0200\u0202", "\u0201\u0203", StringComparison.Ordinal, -1}; + yield return new object[] { "Hello\uD801\uDC00", "\uDC00", StringComparison.Ordinal, 6}; + yield return new object[] { "Hello\uD801\uDC00", "\uDC00", StringComparison.OrdinalIgnoreCase, 6}; + yield return new object[] { "Hello\uD801\uDC00", "\uD801", StringComparison.OrdinalIgnoreCase, 5}; + yield return new object[] { "Hello\uD801\uDC00", "\uD801\uDC00", StringComparison.Ordinal, 5}; + } + + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + [MemberData(nameof(IndexOf_String_StringComparison_TestData))] + public static void IndexOf_Ordinal_Misc(string source, string target, StringComparison stringComparison, int expected) + { + Assert.Equal(expected, source.IndexOf(target, stringComparison)); + } + + public static IEnumerable LastIndexOf_String_StringComparison_TestData() + { + yield return new object[] { "\uD801\uDC28Hello", "\uD801\uDC4f", 6, StringComparison.Ordinal, -1}; + yield return new object[] { "\uD801\uDC28Hello", "\uD801\uDC00", 6, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\uD801\uDC28Hello\uD801\uDC28", "\uD801\uDC00", 1, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\u0200\u0202Hello", "\u0201\u0203", 6, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\u0200\u0202Hello\u0200\u0202", "\u0201\u0203", 1, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\u0200\u0202Hello", "\u0201\u0203", 6, StringComparison.Ordinal, -1}; + yield return new object[] { "\uD801\uDC00Hello", "\uDC00", 6, StringComparison.Ordinal, 1}; + yield return new object[] { "\uD801\uDC00Hello\uDC00", "\uDC00", 3, StringComparison.Ordinal, 1}; + yield return new object[] { "\uD801\uDC00Hello", "\uDC00", 6, StringComparison.OrdinalIgnoreCase, 1}; + yield return new object[] { "\uD801\uDC00Hello\uDC00", "\uDC00", 4, StringComparison.OrdinalIgnoreCase, 1}; + yield return new object[] { "\uD801\uDC00Hello", "\uD801", 6, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\uD801\uD801Hello", "\uD801", 0, StringComparison.OrdinalIgnoreCase, 0}; + yield return new object[] { "\uD801\uDC00Hello", "\uD801\uDC00", 6, StringComparison.Ordinal, 0}; + } + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + [MemberData(nameof(LastIndexOf_String_StringComparison_TestData))] + public static void LastIndexOf_Ordinal_Misc(string source, string target, int startIndex, StringComparison stringComparison, int expected) + { + Assert.Equal(expected, source.LastIndexOf(target, startIndex, stringComparison)); + } + + public static IEnumerableOrdinal_String_StringComparison_TestData() + { + yield return new object[] { "\u0200\u0202", "\u0201\u0203", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\uD801\uDC28", "\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\u0200\u0202", "\u0201\u0203", StringComparison.Ordinal, false}; + yield return new object[] { "\uD801\uDC28", "\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "\uD801\uD801\uDC28", "\uD801\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\uD801\uD801\uDC28", "\uD801\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "\u0200\u0202", "\u0200\u0202", StringComparison.Ordinal, true}; + yield return new object[] { "\u0200\u0202", "\u0200\u0202", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\u0200\u0202", "\u0200\u0202A", StringComparison.Ordinal, false}; + yield return new object[] { "\u0200\u0202", "\u0200\u0202A", StringComparison.OrdinalIgnoreCase, false}; + } + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + [MemberData(nameof(Ordinal_String_StringComparison_TestData))] + public static void Compare_Ordinal_Misc(string source, string target, StringComparison stringComparison, bool expected) + { + Assert.Equal(expected, string.Compare(source, target, stringComparison) == 0); + Assert.Equal(expected, string.GetHashCode(source, stringComparison) == string.GetHashCode(target, stringComparison)); + } + + public static IEnumerableStartsWith_String_StringComparison_TestData() + { + yield return new object[] { "\u0200\u0202ABC", "\u0201\u0203", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\uD801\uDC28ABC", "\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\u0200\u0202AB", "\u0201\u0203", StringComparison.Ordinal, false}; + yield return new object[] { "\uD801\uDC28AB", "\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "\uD801\uD801\uDC28AAA", "\uD801\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\uD801\uD801\uDC28AAA", "\uD801\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "\u0200\u0202AAA", "\u0200\u0202", StringComparison.Ordinal, true}; + yield return new object[] { "\u0200\u0202AAA", "\u0200\u0202", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "\u0200\u0202AAA", "\u0200\u0202A", StringComparison.Ordinal, true}; + yield return new object[] { "\u0200\u0202AAA", "\u0200\u0202A", StringComparison.OrdinalIgnoreCase, true}; + } + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + [MemberData(nameof(StartsWith_String_StringComparison_TestData))] + public static void StartsWith_Ordinal_Misc(string source, string target, StringComparison stringComparison, bool expected) + { + Assert.Equal(expected, source.StartsWith(target, stringComparison)); + } + + public static IEnumerableEndsWith_String_StringComparison_TestData() + { + yield return new object[] { "ABC\u0200\u0202", "\u0201\u0203", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "ABC\uD801\uDC28", "\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "AB\u0200\u0202", "\u0201\u0203", StringComparison.Ordinal, false}; + yield return new object[] { "AB\uD801\uDC28", "\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "AAA\uD801\uD801\uDC28", "\uD801\uD801\uDC00", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "AAA\uD801\uD801\uDC28", "\uD801\uD801\uDC00", StringComparison.Ordinal, false}; + yield return new object[] { "AAA\u0200\u0202", "\u0200\u0202", StringComparison.Ordinal, true}; + yield return new object[] { "AAA\u0200\u0202", "\u0200\u0202", StringComparison.OrdinalIgnoreCase, true}; + yield return new object[] { "AAA\u0200\u0202A", "\u0200\u0202A", StringComparison.Ordinal, true}; + yield return new object[] { "AAA\u0200\u0202A", "\u0200\u0202A", StringComparison.OrdinalIgnoreCase, true}; + } + + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + [MemberData(nameof(EndsWith_String_StringComparison_TestData))] + public static void EndsWith_Ordinal_Misc(string source, string target, StringComparison stringComparison, bool expected) + { + Assert.Equal(expected, source.EndsWith(target, stringComparison)); + } + [Theory] [MemberData(nameof(Concat_Strings_2_3_4_TestData))] public static void Concat_Spans(string[] values, string expected) -- 2.7.4