Update IndexOfOrdinalIgnoreCase to use full code units
authorstephentoub <stoub@microsoft.com>
Wed, 21 Oct 2015 02:32:14 +0000 (22:32 -0400)
committerstephentoub <stoub@microsoft.com>
Thu, 22 Oct 2015 18:31:59 +0000 (14:31 -0400)
Commit migrated from https://github.com/dotnet/coreclr/commit/91f0f8bb71ee74855bcd2e2bff0f088c3346cde7

src/coreclr/src/corefx/System.Globalization.Native/collation.cpp
src/coreclr/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs
src/coreclr/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs

index d82d8d7..ca82a1b 100644 (file)
@@ -129,24 +129,24 @@ extern "C" int32_t LastIndexOf(
 Static Function:
 AreEqualOrdinalIgnoreCase
 */
-static bool AreEqualOrdinalIgnoreCase(UChar one, UChar two)
+static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two)
 {
-       // Return whether the two characters are identical or would be identical if they were upper-cased.
-
-       if (one == two)
-       {
-               return true;
-       }
-
-       if (one == 0x0131 || two == 0x0131)
-       {
-               // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
-               // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
-               // We special case it to match the Windows invariant behavior.
-               return false;
-       }
-
-       return u_toupper(one) == u_toupper(two);
+    // Return whether the two characters are identical or would be identical if they were upper-cased.
+
+    if (one == two)
+    {
+        return true;
+    }
+
+    if (one == 0x0131 || two == 0x0131)
+    {
+        // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
+        // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
+        // We special case it to match the Windows invariant behavior.
+        return false;
+    }
+
+    return u_toupper(one) == u_toupper(two);
 }
 
 /*
@@ -154,53 +154,48 @@ Function:
 IndexOfOrdinalIgnoreCase
 */
 extern "C" int32_t
-IndexOfOrdinalIgnoreCase(const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength)
+IndexOfOrdinalIgnoreCase(
+    const UChar* lpTarget, int32_t cwTargetLength, 
+    const UChar* lpSource, int32_t cwSourceLength, 
+    int32_t findLast)
 {
-       int32_t endIndex = cwSourceLength - cwTargetLength;
-       assert(endIndex >= 0);
-
-       for (int32_t i = 0; i <= endIndex; i++)
-       {
-               int32_t targetIdx = 0;
-               for (int32_t srcIdx = i; targetIdx < cwTargetLength; srcIdx++, targetIdx++) {
-                       if (!AreEqualOrdinalIgnoreCase(lpSource[srcIdx], lpTarget[targetIdx])) {
-                               break;
-                       }
-               }
-
-               if (targetIdx == cwTargetLength) {
-                       return i;
-               }
-       }
-
-       return -1;
-}
+    int32_t result = -1;
 
-/*
-Function:
-LastIndexOfOrdinalIgnoreCase
-*/
-extern "C" int32_t
-LastIndexOfOrdinalIgnoreCase(const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength)
-{
-       int32_t endIndex = cwSourceLength - cwTargetLength;
-       assert(endIndex >= 0);
-
-       for (int32_t i = endIndex; i >= 0; i--)
-       {
-               int32_t targetIdx = 0;
-               for (int32_t srcIdx = i; targetIdx < cwTargetLength; srcIdx++, targetIdx++) {
-                       if (!AreEqualOrdinalIgnoreCase(lpSource[srcIdx], lpTarget[targetIdx])) {
-                               break;
-                       }
-               }
-
-               if (targetIdx == cwTargetLength) {
-                       return i;
-               }
-       }
-
-       return -1;
+    int32_t endIndex = cwSourceLength - cwTargetLength;
+    assert(endIndex >= 0);
+
+    int32_t i = 0;
+    while (i <= endIndex)
+    {
+        int32_t srcIdx = i, trgIdx = 0;
+        const UChar *src = lpSource, *trg = lpTarget;
+        UChar32 srcCodepoint, trgCodepoint;
+
+        bool match = true;
+        while (trgIdx < cwTargetLength)
+        {
+            U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint);
+            U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint);
+            if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint))
+            {
+                match = false; 
+                break;
+            }
+        }
+
+        if (match) 
+        {
+            result = i;
+            if (!findLast)
+            {
+                break;
+            }
+        }
+
+        U16_FWD_1(lpSource, i, cwSourceLength);
+    }
+
+    return result;
 }
 
 /*
index 1a3654e..c236c03 100644 (file)
@@ -19,10 +19,7 @@ internal static partial class Interop
         internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options);
 
         [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
-        internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength);
-
-        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
-        internal unsafe static extern int LastIndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength);
+        internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast);
 
         [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
         [return: MarshalAs(UnmanagedType.Bool)]
index 8df1d79..9a2a35c 100644 (file)
@@ -39,7 +39,7 @@ namespace System.Globalization
             {
                 fixed (char* pSource = source)
                 {
-                    int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count);
+                    int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
                     return index != -1 ?
                         startIndex + index :
                         -1;
@@ -88,7 +88,7 @@ namespace System.Globalization
             {
                 fixed (char* pSource = source)
                 {
-                    int lastIndex = Interop.GlobalizationInterop.LastIndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count);
+                    int lastIndex = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
                     return lastIndex != -1 ?
                         leftStartIndex + lastIndex :
                         -1;