Fix String.LastIndexOf with string has zero sort weights characters (#56504)
authorTarek Mahmoud Sayed <tarekms@microsoft.com>
Sat, 31 Jul 2021 01:06:08 +0000 (18:06 -0700)
committerGitHub <noreply@github.com>
Sat, 31 Jul 2021 01:06:08 +0000 (18:06 -0700)
* Fix String.LastIndexOf with string has zero sort weights characters

src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs

index 4a8d5bc..0e0a46c 100644 (file)
@@ -839,11 +839,35 @@ int32_t GlobalizationNative_LastIndexOf(
 
     result = usearch_last(pSearch, &err);
 
-    // if the search was successful,
-    // we'll try to get the matched string length.
-    if (result != USEARCH_DONE && pMatchedLength != NULL)
+    // if the search was successful, we'll try to get the matched string length.
+    if (result != USEARCH_DONE)
     {
-        *pMatchedLength = usearch_getMatchedLength(pSearch);
+        int32_t matchLength = -1;
+
+        if (pMatchedLength != NULL)
+        {
+            matchLength = usearch_getMatchedLength(pSearch);
+            *pMatchedLength = matchLength;
+        }
+
+        // In case the search result is pointing at the last character (including Surrogate case) of the source string, we need to check if the target string
+        // was constructed with characters which have no sort weights. The way we do that is to check that the matched length is 0.
+        // We need to update the returned index to have consistent behavior with Ordinal and NLS operations, and satisfy the condition:
+        //      index = source.LastIndexOf(value, comparisonType);
+        //      originalString.Substring(index).StartsWith(value, comparisonType) == true.
+        // https://github.com/dotnet/runtime/issues/13383
+        if (result >= cwSourceLength - 2)
+        {
+            if (pMatchedLength == NULL)
+            {
+                matchLength = usearch_getMatchedLength(pSearch);
+            }
+
+            if (matchLength == 0)
+            {
+                result = cwSourceLength;
+            }
+        }
     }
 
     RestoreSearchHandle(pSortHandle, pSearch, searchCacheSlot);
index 060648e..a400ded 100644 (file)
@@ -92,7 +92,13 @@ namespace System.Globalization.Tests
             // ICU matches weightless characters at 1 index prior to the end of the string
             yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0, 0 };
             yield return new object[] { s_invariantCompare, "", "\u200d", -1, 0, CompareOptions.None, 0, 0 };
-            yield return new object[] { s_invariantCompare, "hello", "\u200d", 4, 5, CompareOptions.IgnoreCase, useNls ? 5 : 4 , 0};
+            yield return new object[] { s_invariantCompare, "hello", "\u200d", 4, 5, CompareOptions.IgnoreCase, 5, 0};
+            yield return new object[] { s_invariantCompare, "hello", "\0", 4, 5, CompareOptions.None, useNls ? -1 : 5, 0};
+
+            yield return new object[] { s_invariantCompare, "A\u0303", "\u200d", 1, 2, CompareOptions.None, 2, 0}; // A +  ̃ = Ã
+            yield return new object[] { s_invariantCompare, "A\u0303\u200D", "\u200d", 2, 3, CompareOptions.None, 3, 0}; // A +  ̃ = Ã
+            yield return new object[] { s_invariantCompare, "\u0001F601", "\u200d", 1, 2, CompareOptions.None, 2, 0}; // \u0001F601 is GRINNING FACE WITH SMILING EYES surrogate character
+            yield return new object[] { s_invariantCompare, "AA\u200DA", "\u200d", 3, 4, CompareOptions.None, 4, 0};
 
             // Ignore symbols
             yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 };