From 734676f59e33e1806899e4a8d64291c7994cbeb7 Mon Sep 17 00:00:00 2001 From: stephentoub Date: Tue, 20 Oct 2015 22:32:14 -0400 Subject: [PATCH] Update IndexOfOrdinalIgnoreCase to use full code units Commit migrated from https://github.com/dotnet/coreclr/commit/91f0f8bb71ee74855bcd2e2bff0f088c3346cde7 --- .../System.Globalization.Native/collation.cpp | 119 ++++++++++----------- .../Interop.Collation.cs | 5 +- .../System/Globalization/CompareInfo.Unix.cs | 4 +- 3 files changed, 60 insertions(+), 68 deletions(-) diff --git a/src/coreclr/src/corefx/System.Globalization.Native/collation.cpp b/src/coreclr/src/corefx/System.Globalization.Native/collation.cpp index d82d8d7..ca82a1b 100644 --- a/src/coreclr/src/corefx/System.Globalization.Native/collation.cpp +++ b/src/coreclr/src/corefx/System.Globalization.Native/collation.cpp @@ -129,24 +129,24 @@ extern "C" int32_t LastIndexOf( Static Function: AreEqualOrdinalIgnoreCase */ -static bool AreEqualOrdinalIgnoreCase(UChar one, UChar two) +static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two) { - // Return whether the two characters are identical or would be identical if they were upper-cased. - - if (one == two) - { - return true; - } - - if (one == 0x0131 || two == 0x0131) - { - // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) - // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). - // We special case it to match the Windows invariant behavior. - return false; - } - - return u_toupper(one) == u_toupper(two); + // Return whether the two characters are identical or would be identical if they were upper-cased. + + if (one == two) + { + return true; + } + + if (one == 0x0131 || two == 0x0131) + { + // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) + // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). + // We special case it to match the Windows invariant behavior. + return false; + } + + return u_toupper(one) == u_toupper(two); } /* @@ -154,53 +154,48 @@ Function: IndexOfOrdinalIgnoreCase */ extern "C" int32_t -IndexOfOrdinalIgnoreCase(const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength) +IndexOfOrdinalIgnoreCase( + const UChar* lpTarget, int32_t cwTargetLength, + const UChar* lpSource, int32_t cwSourceLength, + int32_t findLast) { - int32_t endIndex = cwSourceLength - cwTargetLength; - assert(endIndex >= 0); - - for (int32_t i = 0; i <= endIndex; i++) - { - int32_t targetIdx = 0; - for (int32_t srcIdx = i; targetIdx < cwTargetLength; srcIdx++, targetIdx++) { - if (!AreEqualOrdinalIgnoreCase(lpSource[srcIdx], lpTarget[targetIdx])) { - break; - } - } - - if (targetIdx == cwTargetLength) { - return i; - } - } - - return -1; -} + int32_t result = -1; -/* -Function: -LastIndexOfOrdinalIgnoreCase -*/ -extern "C" int32_t -LastIndexOfOrdinalIgnoreCase(const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength) -{ - int32_t endIndex = cwSourceLength - cwTargetLength; - assert(endIndex >= 0); - - for (int32_t i = endIndex; i >= 0; i--) - { - int32_t targetIdx = 0; - for (int32_t srcIdx = i; targetIdx < cwTargetLength; srcIdx++, targetIdx++) { - if (!AreEqualOrdinalIgnoreCase(lpSource[srcIdx], lpTarget[targetIdx])) { - break; - } - } - - if (targetIdx == cwTargetLength) { - return i; - } - } - - return -1; + int32_t endIndex = cwSourceLength - cwTargetLength; + assert(endIndex >= 0); + + int32_t i = 0; + while (i <= endIndex) + { + int32_t srcIdx = i, trgIdx = 0; + const UChar *src = lpSource, *trg = lpTarget; + UChar32 srcCodepoint, trgCodepoint; + + bool match = true; + while (trgIdx < cwTargetLength) + { + U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint); + U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint); + if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint)) + { + match = false; + break; + } + } + + if (match) + { + result = i; + if (!findLast) + { + break; + } + } + + U16_FWD_1(lpSource, i, cwSourceLength); + } + + return result; } /* diff --git a/src/coreclr/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs b/src/coreclr/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs index 1a3654e..c236c03 100644 --- a/src/coreclr/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs +++ b/src/coreclr/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs @@ -19,10 +19,7 @@ internal static partial class Interop internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options); [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] - internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength); - - [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] - internal unsafe static extern int LastIndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength); + internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast); [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)] [return: MarshalAs(UnmanagedType.Bool)] diff --git a/src/coreclr/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs b/src/coreclr/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs index 8df1d79..9a2a35c 100644 --- a/src/coreclr/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs +++ b/src/coreclr/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs @@ -39,7 +39,7 @@ namespace System.Globalization { fixed (char* pSource = source) { - int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count); + int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false); return index != -1 ? startIndex + index : -1; @@ -88,7 +88,7 @@ namespace System.Globalization { fixed (char* pSource = source) { - int lastIndex = Interop.GlobalizationInterop.LastIndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count); + int lastIndex = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true); return lastIndex != -1 ? leftStartIndex + lastIndex : -1; -- 2.7.4