[browser][non-icu] `HybridGlobalization` indexing (#85254)
authorIlona Tomkowicz <32700855+ilonatommy@users.noreply.github.com>
Thu, 18 May 2023 12:39:34 +0000 (14:39 +0200)
committerGitHub <noreply@github.com>
Thu, 18 May 2023 12:39:34 +0000 (14:39 +0200)
* A bit faster version of indexing. WIP

* Tiny speedup.

* Fixed IndexOf, ToDo: LastIndexOf.

* All tests pass.

* Updated docs.

* Update docs.

* Slicing + saving previous absolute index instead of pushing the iterator to the start position.

* Refactored.

* Fixed tests on browser.

* Str1 and str2 was confusing.

* Fix CI- correctly trimming Hybrid properties.

* Previous commit should target only Browser.

* Applied @mkhamoyan's suggestion to avoid code duplication.

* Applied @pavelsavara's review.

* Get rid of build errors.

* Revert.

19 files changed:
docs/design/features/hybrid-globalization.md
docs/workflow/trimming/feature-switches.md
src/libraries/Common/src/Interop/Browser/Interop.CompareInfo.cs
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs
src/libraries/System.Globalization/tests/Hybrid/Hybrid.WASM.Tests.csproj
src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Browser.xml [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs
src/libraries/System.Runtime/tests/TrimmingTests/System.Runtime.TrimmingTests.proj
src/mono/sample/wasm/browser-bench/String.cs
src/mono/wasm/runtime/corebindings.c
src/mono/wasm/runtime/es6/dotnet.es6.lib.js
src/mono/wasm/runtime/exports-linker.ts
src/mono/wasm/runtime/hybrid-globalization.ts

index 629d68e..d9f7e30 100644 (file)
@@ -198,3 +198,50 @@ Web API does not expose locale-sensitive endsWith/startsWith function. As a work
 
 - `IgnoreSymbols`
 Only comparisons that do not skip character types are allowed. E.g. `IgnoreSymbols` skips symbol-chars in comparison/indexing. All `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`.
+
+
+**String indexing**
+
+Affected public APIs:
+- CompareInfo.IndexOf
+- CompareInfo.LastIndexOf
+- String.IndexOf
+- String.LastIndexOf
+
+Web API does not expose locale-sensitive indexing function. There is a discussion on adding it: https://github.com/tc39/ecma402/issues/506. In the current state, as a workaround, locale-sensitive string segmenter combined with locale-sensitive comparison is used. This approach, beyond having the same compare option limitations as described under **String comparison**, has additional limitations connected with the workaround used. Information about additional limitations:
+
+- Support depends on [`Intl.segmenter's support`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter#browser_compatibility).
+
+- `IgnoreSymbols`
+
+Only comparisons that ignore types of characters but do not skip them are allowed. E.g. `IgnoreCase` ignores type (case) of characters but `IgnoreSymbols` skips symbol-chars in comparison/indexing. All `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`.
+
+- Some letters consist of more than one grapheme.
+
+Using locale-sensitive segmenter `Intl.Segmenter(locale, { granularity: "grapheme" })` does not guarantee that string will be segmented by letters but by graphemes. E.g. in `cs-CZ` and `sk-SK` "ch" is 1 letter, 2 graphemes. The following code with `HybridGlobalization` switched off returns -1 (not found) while with `HybridGlobalization` switched on, it returns 1.
+
+``` C#
+new CultureInfo("sk-SK").CompareInfo.IndexOf("ch", "h"); // -1 or 1
+```
+
+- Some graphemes consist of more than one character.
+E.g. `\r\n` that represents two characters in C#, is treated as one grapheme by the segmenter:
+
+``` JS
+const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
+Array.from(segmenter.segment("\r\n")) // {segment: '\r\n', index: 0, input: '\r\n'}
+```
+
+Because we are comparing grapheme-by-grapheme, character `\r` or character `\n` will not be found in `\r\n` string when `HybridGlobalization` is switched on.
+
+- Some graphemes have multi-grapheme equivalents.
+E.g. in `de-DE` ß (%u00DF) is one letter and one grapheme and "ss" is one letter and is recognized as two graphemes. Web API's equivalent of `IgnoreNonSpace` treats them as the same letter when comparing. Similar case: dz (%u01F3) and dz.
+``` JS
+"ß".localeCompare("ss", "de-DE", { sensitivity: "case" }); // 0
+```
+
+Using `IgnoreNonSpace` for these two with `HybridGlobalization` off, also returns 0 (they are equal). However, the workaround used in `HybridGlobalization` will compare them grapheme-by-grapheme and will return -1.
+
+``` C#
+new CultureInfo("de-DE").CompareInfo.IndexOf("strasse", "stra\u00DFe", 0, CompareOptions.IgnoreNonSpace); // 0 or -1
+```
index 0aa4429..2fda4b2 100644 (file)
@@ -14,6 +14,7 @@ configurations but their defaults might vary as any SDK can set the defaults dif
 | EventSourceSupport | System.Diagnostics.Tracing.EventSource.IsSupported | Any EventSource related code or logic is trimmed when set to false |
 | InvariantGlobalization | System.Globalization.Invariant | All globalization specific code and data is trimmed when set to true |
 | PredefinedCulturesOnly | System.Globalization.PredefinedCulturesOnly |  Don't allow creating a culture for which the platform does not have data |
+| HybridGlobalization | System.Globalization.Hybrid |  Properties connected with the mixed: platform-specific + icu-based globalization will be trimmed  |
 | UseSystemResourceKeys | System.Resources.UseSystemResourceKeys |  Any localizable resources for system assemblies is trimmed when set to true |
 | HttpActivityPropagationSupport | System.Net.Http.EnableActivityPropagation | Any dependency related to diagnostics support for System.Net.Http is trimmed when set to false |
 | UseNativeHttpHandler | System.Net.Http.UseNativeHttpHandler | HttpClient uses by default platform native implementation of HttpMessageHandler if set to true. |
index 5294d84..a536d9d 100644 (file)
@@ -15,5 +15,8 @@ internal static partial class Interop
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
+
+        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        internal static extern unsafe int IndexOf(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, bool fromBeginning);
     }
 }
index 256e394..55e3da3 100644 (file)
@@ -33,13 +33,16 @@ namespace System.Globalization.Tests
             yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", 0, 12, CompareOptions.Ordinal, -1, 0 };
 
             // Slovak
-            yield return new object[] { s_slovakCompare, "ch", "h", 0, 2, CompareOptions.None, -1, 0 };
-            // Android has its own ICU, which doesn't work well with slovak
-            if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic)
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
             {
-                yield return new object[] { s_slovakCompare, "chodit hore", "HO", 0, 11, CompareOptions.IgnoreCase, 7, 2 };
+                yield return new object[] { s_slovakCompare, "ch", "h", 0, 2, CompareOptions.None, -1, 0 };
+                 // Android has its own ICU, which doesn't work well with slovak
+                if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic)
+                {
+                    yield return new object[] { s_slovakCompare, "chodit hore", "HO", 0, 11, CompareOptions.IgnoreCase, 7, 2 };
+                }
+                yield return new object[] { s_slovakCompare, "chh", "h", 0, 3, CompareOptions.None, 2, 1 };
             }
-            yield return new object[] { s_slovakCompare, "chh", "h", 0, 3, CompareOptions.None, 2, 1 };
 
             // Turkish
             // Android has its own ICU, which doesn't work well with tr
@@ -63,16 +66,24 @@ namespace System.Globalization.Tests
             yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", 0, 9, CompareOptions.IgnoreCase, 8, 1 };
             yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", 0, 9, CompareOptions.OrdinalIgnoreCase, -1, 0 };
             yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", 0, 6, CompareOptions.Ordinal, -1, 0 };
-            yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 0, 11, CompareOptions.IgnoreNonSpace, 4, 7 };
+            yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 0, 11, supportedIgnoreNonSpaceOption, 4, 7 };
             yield return new object[] { s_invariantCompare, "o\u0308", "o", 0, 2, CompareOptions.None, -1, 0 };
-            yield return new object[] { s_invariantCompare, "\r\n", "\n", 0, 2, CompareOptions.None, 1, 1 };
+            if (PlatformDetection.IsHybridGlobalizationOnBrowser)
+            {
+                yield return new object[] { s_invariantCompare, "\r\n", "\n", 0, 2, CompareOptions.None, -1, 0 };
+            }
+            else
+            {
+                yield return new object[] { s_invariantCompare, "\r\n", "\n", 0, 2, CompareOptions.None, 1, 1 };
+            }
 
             // Weightless characters
             yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0, 0 };
             yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1, 0 };
 
             // Ignore symbols
-            yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 };
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+                yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 };
             yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 };
             yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 0, 13, CompareOptions.None, 2, 2 };
 
@@ -127,19 +138,22 @@ namespace System.Globalization.Tests
             }
 
             // Inputs where matched length does not equal value string length
-            yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, CompareOptions.IgnoreNonSpace, 3, 2 };
-            yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, CompareOptions.IgnoreNonSpace, 3, 1 };
-            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
-            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, -1, 0 };
-            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
-            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, -1, 0 };
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+            {
+                yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 7 };
+                yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 6 };
+                yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, supportedIgnoreNonSpaceOption, 3, 2 };
+                yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, supportedIgnoreNonSpaceOption, 3, 1 };
+            }
+            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };
+            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };
         }
 
         public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
         {
             bool useNls = PlatformDetection.IsNlsGlobalization;
             // Searches for the ligature \u00C6
-            string source1 = "Is AE or ae the same as \u00C6 or \u00E6?";
+            string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; // 3 failures here
             yield return new object[] { s_invariantCompare, source1, "AE", 8, 18, CompareOptions.None, useNls ? 24 : -1, useNls ? 1 : 0};
             yield return new object[] { s_invariantCompare, source1, "ae", 8, 18, CompareOptions.None, 9 , 2};
             yield return new object[] { s_invariantCompare, source1, "\u00C6", 8, 18, CompareOptions.None, 24, 1 };
@@ -157,7 +171,7 @@ namespace System.Globalization.Tests
         public static IEnumerable<object[]> IndexOf_U_WithDiaeresis_TestData()
         {
             // Searches for the combining character sequence Latin capital letter U with diaeresis or Latin small letter u with diaeresis.
-            string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?";
+            string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; // 7 failures here
             yield return new object[] { s_invariantCompare, source, "U\u0308", 8, 18, CompareOptions.None, 24, 1 };
             yield return new object[] { s_invariantCompare, source, "u\u0308", 8, 18, CompareOptions.None, 9, 2 };
             yield return new object[] { s_invariantCompare, source, "\u00DC", 8, 18, CompareOptions.None, 24, 1 };
@@ -229,8 +243,11 @@ namespace System.Globalization.Tests
                 valueBoundedMemory.MakeReadonly();
 
                 Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
-                Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
-                Assert.Equal(expectedMatchLength, actualMatchLength);
+                if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+                {
+                    Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
+                    Assert.Equal(expectedMatchLength, actualMatchLength);
+                }
 
                 if (TryCreateRuneFrom(value, out Rune rune))
                 {
@@ -273,7 +290,7 @@ namespace System.Globalization.Tests
             bool useNls = PlatformDetection.IsNlsGlobalization;
             int expectedMatchLength = (useNls) ? 6 : 0;
             IndexOf_String(s_invariantCompare, "FooBar", "Foo\uFFFFBar", 0, 6, CompareOptions.None, useNls ? 0 : -1, expectedMatchLength);
-            IndexOf_String(s_invariantCompare, "~FooBar", "Foo\uFFFFBar", 0, 7, CompareOptions.IgnoreNonSpace, useNls ? 1 : -1, expectedMatchLength);
+            IndexOf_String(s_invariantCompare, "~FooBar", "Foo\uFFFFBar", 0, 7, supportedIgnoreNonSpaceOption, useNls ? 1 : -1, expectedMatchLength);
         }
 
         [Fact]
index 02b959e..1c8a242 100644 (file)
@@ -79,7 +79,7 @@ namespace System.Globalization.Tests
 
             // Platform differences
             // in HybridGlobalization on Browser we use TextEncoder that is not supported for v8 and the manual decoding works like NLS
-            bool behavesLikeNls = PlatformDetection.IsNlsGlobalization || 
+            bool behavesLikeNls = PlatformDetection.IsNlsGlobalization ||
                 (PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsBrowserDomSupportedOrNodeJS);
             if (behavesLikeNls)
             {
@@ -114,9 +114,6 @@ namespace System.Globalization.Tests
             {
                 yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
                 yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 };
-            }
-            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
-            {
                 yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
                 yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
             }
index 3ef9b91..76646ed 100644 (file)
@@ -49,11 +49,12 @@ namespace System.Globalization.Tests
             // Slovak
             yield return new object[] { s_slovakCompare, "ch", "h", 0, 1, CompareOptions.None, -1, 0 };
             // Android has its own ICU, which doesn't work well with slovak
-            if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic)
+            if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic && !PlatformDetection.IsHybridGlobalizationOnBrowser)
             {
                 yield return new object[] { s_slovakCompare, "hore chodit", "HO", 11, 12, CompareOptions.IgnoreCase, 0, 2 };
             }
-            yield return new object[] { s_slovakCompare, "chh", "h", 2, 2, CompareOptions.None, 2, 1 };
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+                yield return new object[] { s_slovakCompare, "chh", "h", 2, 2, CompareOptions.None, 2, 1 };
 
             // Turkish
             // Android has its own ICU, which doesn't work well with tr
@@ -78,9 +79,16 @@ namespace System.Globalization.Tests
             yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", 8, 9, CompareOptions.OrdinalIgnoreCase, -1, 0 };
             yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", 8, 9, CompareOptions.Ordinal, -1, 0 };
             yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", 5, 6, CompareOptions.Ordinal, -1, 0 };
-            yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 10, 11, CompareOptions.IgnoreNonSpace, 4, 7 };
+            yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 10, 11, supportedIgnoreNonSpaceOption, 4, 7 };
             yield return new object[] { s_invariantCompare, "o\u0308", "o", 1, 2, CompareOptions.None, -1, 0 };
-            yield return new object[] { s_invariantCompare, "\r\n", "\n", 1, 2, CompareOptions.None, 1, 1 };
+            if (PlatformDetection.IsHybridGlobalizationOnBrowser)
+            {
+                yield return new object[] { s_invariantCompare, "\r\n", "\n", 1, 2, CompareOptions.None, -1, 0 };
+            }
+            else
+            {
+                yield return new object[] { s_invariantCompare, "\r\n", "\n", 1, 1, CompareOptions.None, 1, 1 };
+            }
 
             // Weightless characters
             // NLS matches weightless characters at the end of the string
@@ -96,7 +104,8 @@ namespace System.Globalization.Tests
             yield return new object[] { s_invariantCompare, "AA\u200DA", "\u200d", 3, 4, CompareOptions.None, 4, 0};
 
             // Ignore symbols
-            yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 };
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+                yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 };
             yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.None, -1, 0 };
             yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 12, 13, CompareOptions.None, 10, 2 };
 
@@ -111,12 +120,15 @@ namespace System.Globalization.Tests
             }
 
             // Inputs where matched length does not equal value string length
-            yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 7, 8, CompareOptions.IgnoreNonSpace, 3, 2 };
-            yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 6, 7, CompareOptions.IgnoreNonSpace, 3, 1 };
-            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 22, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 12, 7 };
-            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 22, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, -1, 0 };
-            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 20, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 11, 6 };
-            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 20, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, -1, 0 };
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+            {
+                yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 22, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, 12, 7 };
+                yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 20, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, 11, 6 };
+                yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 7, 8, supportedIgnoreNonSpaceOption, 3, 2 };
+                yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 6, 7, supportedIgnoreNonSpaceOption, 3, 1 };
+            }
+            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 22, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };
+            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 20, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };
         }
 
         public static IEnumerable<object[]> LastIndexOf_Aesc_Ligature_TestData()
@@ -241,8 +253,11 @@ namespace System.Globalization.Tests
                 valueBoundedMemory.MakeReadonly();
 
                 Assert.Equal(expected, compareInfo.LastIndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
-                Assert.Equal(expected, compareInfo.LastIndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
-                Assert.Equal(expectedMatchLength, actualMatchLength);
+                if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+                {
+                    Assert.Equal(expected, compareInfo.LastIndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
+                    Assert.Equal(expectedMatchLength, actualMatchLength);
+                }
 
                 if (TryCreateRuneFrom(value, out Rune rune))
                 {
@@ -292,7 +307,7 @@ namespace System.Globalization.Tests
             bool useNls = PlatformDetection.IsNlsGlobalization;
             int expectedMatchLength = (useNls) ? 6 : 0;
             LastIndexOf_String(s_invariantCompare, "FooBar", "Foo\uFFFFBar", 5, 6, CompareOptions.None, useNls ? 0 : -1, expectedMatchLength);
-            LastIndexOf_String(s_invariantCompare, "~FooBar", "Foo\uFFFFBar", 6, 7, CompareOptions.IgnoreNonSpace, useNls ? 1 : -1, expectedMatchLength);
+            LastIndexOf_String(s_invariantCompare, "~FooBar", "Foo\uFFFFBar", 6, 7, supportedIgnoreNonSpaceOption, useNls ? 1 : -1, expectedMatchLength);
         }
 
         [Fact]
index 9c8dc72..8ca3113 100644 (file)
@@ -518,7 +518,7 @@ namespace System.Globalization.Tests
                 Assert.Equal(expected && !char.IsSurrogate(c), CompareInfo.IsSortable(c));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotHybridGlobalizationOnBrowser))]
         public void VersionTest()
         {
             SortVersion sv1 = CultureInfo.GetCultureInfo("en-US").CompareInfo.Version;
index b8a1948..0d842f4 100644 (file)
@@ -6,11 +6,13 @@
     <HybridGlobalization>true</HybridGlobalization>
   </PropertyGroup>
   <ItemGroup>
+    <Compile Include="..\CompareInfo\CompareInfoTestsBase.cs" />
     <Compile Include="..\System\Globalization\TextInfoTests.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.Compare.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.cs" />
-    <Compile Include="..\CompareInfo\CompareInfoTestsBase.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
+    <Compile Include="..\CompareInfo\CompareInfoTests.IndexOf.cs" />
+    <Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
   </ItemGroup>
 </Project>
diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Browser.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Browser.xml
new file mode 100644 (file)
index 0000000..d40b570
--- /dev/null
@@ -0,0 +1,7 @@
+<linker>
+  <assembly fullname="System.Private.CoreLib">
+    <type fullname="System.Globalization.GlobalizationMode">
+      <method signature="System.Boolean get_Hybrid()" body="stub" value="true" feature="System.Globalization.Hybrid" featurevalue="false" />
+    </type>
+  </assembly>
+</linker>
index e98d8b0..10b27e3 100644 (file)
@@ -57,6 +57,7 @@
     <ILLinkSubstitutionsXmls Include="$(ILLinkSharedDirectory)ILLink.Substitutions.NoArmIntrinsics.xml" Condition="'$(SupportsArmIntrinsics)' != 'true'" />
     <ILLinkSubstitutionsXmls Include="$(ILLinkSharedDirectory)ILLink.Substitutions.NoX86Intrinsics.xml" Condition="'$(SupportsX86Intrinsics)' != 'true'" />
     <ILLinkSubstitutionsXmls Include="$(ILLinkSharedDirectory)ILLink.Substitutions.OSX.xml" Condition="'$(IsOSXLike)' == 'true'" />
+    <ILLinkSubstitutionsXmls Include="$(ILLinkSharedDirectory)ILLink.Substitutions.Browser.xml" Condition="'$(TargetsBrowser)' == 'true'" />
     <ILLinkLinkAttributesXmls Include="$(ILLinkSharedDirectory)ILLink.LinkAttributes.Shared.xml" />
   </ItemGroup>
   <PropertyGroup>
index 0496d93..a31c6a7 100644 (file)
@@ -21,24 +21,24 @@ namespace System.Globalization
 
         private void IcuInitSortHandle(string interopCultureName)
         {
-            if (GlobalizationMode.Invariant)
-            {
-                _isAsciiEqualityOrdinal = true;
-            }
-            else
-            {
-                Debug.Assert(!GlobalizationMode.UseNls);
-                Debug.Assert(interopCultureName != null);
-
-                // Inline the following condition to avoid potential implementation cycles within globalization
-                //
-                // _isAsciiEqualityOrdinal = _sortName == "" || _sortName == "en" || _sortName.StartsWith("en-", StringComparison.Ordinal);
-                //
-                _isAsciiEqualityOrdinal = _sortName.Length == 0 ||
-                    (_sortName.Length >= 2 && _sortName[0] == 'e' && _sortName[1] == 'n' && (_sortName.Length == 2 || _sortName[2] == '-'));
+            _isAsciiEqualityOrdinal = GetIsAsciiEqualityOrdinal(interopCultureName);
+            if (!GlobalizationMode.Invariant)
+                 _sortHandle = SortHandleCache.GetCachedSortHandle(interopCultureName);
+        }
 
-                _sortHandle = SortHandleCache.GetCachedSortHandle(interopCultureName);
-            }
+        private bool GetIsAsciiEqualityOrdinal(string interopCultureName)
+        {
+            if (GlobalizationMode.Invariant)
+                return true;
+            Debug.Assert(!GlobalizationMode.UseNls);
+            Debug.Assert(interopCultureName != null);
+
+            // Inline the following condition to avoid potential implementation cycles within globalization
+            //
+            // _isAsciiEqualityOrdinal = _sortName == "" || _sortName == "en" || _sortName.StartsWith("en-", StringComparison.Ordinal);
+            //
+            return _sortName.Length == 0 ||
+                (_sortName.Length >= 2 && _sortName[0] == 'e' && _sortName[1] == 'n' && (_sortName.Length == 2 || _sortName[2] == '-'));
         }
 
         private unsafe int IcuCompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
@@ -185,6 +185,17 @@ namespace System.Globalization
                 return -1;
 
             InteropCall:
+#if TARGET_BROWSER
+                if (GlobalizationMode.Hybrid)
+                {
+                    int result = Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, b, target.Length, a, source.Length, options, fromBeginning);
+                    if (!string.IsNullOrEmpty(exceptionMessage))
+                    {
+                        throw new Exception(exceptionMessage);
+                    }
+                    return result;
+                }
+#endif
                 if (fromBeginning)
                     return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
                 else
@@ -275,6 +286,10 @@ namespace System.Globalization
                 return -1;
 
             InteropCall:
+#if TARGET_BROWSER
+                if (GlobalizationMode.Hybrid)
+                    return Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, b, target.Length, a, source.Length, options, fromBeginning);
+#endif
                 if (fromBeginning)
                     return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
                 else
index 474935f..d0258a2 100644 (file)
@@ -8,6 +8,11 @@ namespace System.Globalization
 {
     public partial class CompareInfo
     {
+        private void JsInit(string interopCultureName)
+        {
+            _isAsciiEqualityOrdinal = GetIsAsciiEqualityOrdinal(interopCultureName);
+        }
+
         private static void AssertHybridOnWasm(CompareOptions options)
         {
             Debug.Assert(!GlobalizationMode.Invariant);
@@ -40,16 +45,15 @@ namespace System.Globalization
             string cultureName = m_name;
             AssertComparisonSupported(options, cultureName);
 
-            string exceptionMessage;
             int cmpResult;
             fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
             fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
             {
-                cmpResult = Interop.JsGlobalization.CompareString(out exceptionMessage, cultureName, pString1, string1.Length, pString2, string2.Length, options);
-            }
+                cmpResult = Interop.JsGlobalization.CompareString(out string exceptionMessage, cultureName, pString1, string1.Length, pString2, string2.Length, options);
 
-            if (!string.IsNullOrEmpty(exceptionMessage))
-                throw new Exception(exceptionMessage);
+                if (!string.IsNullOrEmpty(exceptionMessage))
+                    throw new Exception(exceptionMessage);
+            }
 
             return cmpResult;
         }
@@ -61,16 +65,16 @@ namespace System.Globalization
             string cultureName = m_name;
             AssertIndexingSupported(options, cultureName);
 
-            string exceptionMessage;
             bool result;
             fixed (char* pSource = &MemoryMarshal.GetReference(source))
             fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
             {
-                result = Interop.JsGlobalization.StartsWith(out exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);
+                result = Interop.JsGlobalization.StartsWith(out string exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);
+
+                if (!string.IsNullOrEmpty(exceptionMessage))
+                    throw new Exception(exceptionMessage);
             }
 
-            if (!string.IsNullOrEmpty(exceptionMessage))
-                throw new Exception(exceptionMessage);
 
             return result;
         }
@@ -82,20 +86,48 @@ namespace System.Globalization
             string cultureName = m_name;
             AssertIndexingSupported(options, cultureName);
 
-            string exceptionMessage;
             bool result;
             fixed (char* pSource = &MemoryMarshal.GetReference(source))
             fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
             {
-                result = Interop.JsGlobalization.EndsWith(out exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);
-            }
+                result = Interop.JsGlobalization.EndsWith(out string exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);
 
-            if (!string.IsNullOrEmpty(exceptionMessage))
-                throw new Exception(exceptionMessage);
+                if (!string.IsNullOrEmpty(exceptionMessage))
+                    throw new Exception(exceptionMessage);
+            }
 
             return result;
         }
 
+        private unsafe int JsIndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning)
+        {
+            AssertHybridOnWasm(options);
+            Debug.Assert(!target.IsEmpty);
+            string cultureName = m_name;
+            AssertIndexingSupported(options, cultureName);
+
+            int idx;
+            if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options))
+            {
+                idx = (options & CompareOptions.IgnoreCase) != 0 ?
+                    IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr, fromBeginning) :
+                    IndexOfOrdinalHelper(source, target, options, matchLengthPtr, fromBeginning);
+            }
+            else
+            {
+                fixed (char* pSource = &MemoryMarshal.GetReference(source))
+                fixed (char* pTarget = &MemoryMarshal.GetReference(target))
+                {
+                    idx = Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, pTarget, target.Length, pSource, source.Length, options, fromBeginning);
+
+                    if (!string.IsNullOrEmpty(exceptionMessage))
+                        throw new Exception(exceptionMessage);
+                }
+            }
+
+            return idx;
+        }
+
         private static bool IndexingOptionsNotSupported(CompareOptions options) =>
             (options & CompareOptions.IgnoreSymbols) == CompareOptions.IgnoreSymbols;
 
index f205901..2c52633 100644 (file)
@@ -170,6 +170,13 @@ namespace System.Globalization
         {
             _sortName = culture.SortName;
 
+#if TARGET_BROWSER
+            if (GlobalizationMode.Hybrid)
+            {
+                JsInit(culture.InteropName!);
+                return;
+            }
+#endif
             if (GlobalizationMode.UseNls)
             {
                 NlsInitSortHandle();
@@ -1118,6 +1125,10 @@ namespace System.Globalization
         private unsafe int IndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> target, CompareOptions options, int* matchLengthPtr, bool fromBeginning) =>
             GlobalizationMode.UseNls ?
                 NlsIndexOfCore(source, target, options, matchLengthPtr, fromBeginning) :
+#if TARGET_BROWSER
+            GlobalizationMode.Hybrid ?
+                JsIndexOfCore(source, target, options, matchLengthPtr, fromBeginning) :
+#endif
                 IcuIndexOfCore(source, target, options, matchLengthPtr, fromBeginning);
 
         /// <summary>
@@ -1624,6 +1635,12 @@ namespace System.Globalization
                     }
                     else
                     {
+#if TARGET_BROWSER
+                if (GlobalizationMode.Hybrid)
+                {
+                    throw new PlatformNotSupportedException(GetPNSEText("SortVersion"));
+                }
+#endif
                         m_SortVersion = GlobalizationMode.UseNls ? NlsGetSortVersion() : IcuGetSortVersion();
                     }
                 }
index c29e8c3..3ec2681 100644 (file)
     <TestConsoleAppSourceFiles Include="InterfacesOnArrays.cs" />
     <TestConsoleAppSourceFiles Include="InvariantGlobalizationFalse.cs">
       <DisabledFeatureSwitches>System.Globalization.Invariant</DisabledFeatureSwitches>
+      <EnabledFeatureSwitches>System.Globalization.Hybrid</EnabledFeatureSwitches>
+    </TestConsoleAppSourceFiles>
+    <TestConsoleAppSourceFiles Include="InvariantGlobalizationFalse.cs">
+      <DisabledFeatureSwitches>System.Globalization.Invariant</DisabledFeatureSwitches>
     </TestConsoleAppSourceFiles>
     <TestConsoleAppSourceFiles Include="InvariantGlobalizationTrue.cs">
+      <DisabledFeatureSwitches>System.Globalization.Hybrid</DisabledFeatureSwitches>
       <EnabledFeatureSwitches>System.Globalization.Invariant;System.Globalization.PredefinedCulturesOnly</EnabledFeatureSwitches>
     </TestConsoleAppSourceFiles>
     <TestConsoleAppSourceFiles Include="StackFrameHelperTest.cs">
index 16cc2c6..d57ec20 100644 (file)
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
@@ -28,6 +28,8 @@ namespace Sample
                 new CompareInfoEndsWithMeasurement(),
                 new StringStartsWithMeasurement(),
                 new StringEndsWithMeasurement(),
+                new StringIndexOfMeasurement(),
+                new StringLastIndexOfMeasurement(),
             };
         }
 
@@ -130,19 +132,24 @@ namespace Sample
 
         public abstract class StringsCompare : StringMeasurement
         {
-            protected string strDifferentSuffix;
-            protected string strDifferentPrefix;
+            protected string strAsciiSuffix;
+            protected string strAsciiPrefix;
+            protected string needleSameAsStrEnd;
+            protected string needleSameAsStrStart;
 
             public void InitializeStringsForComparison()
             {
                 InitializeString();
+                needleSameAsStrEnd = new string(new ArraySegment<char>(data, len - 10, 10));
+                needleSameAsStrStart = new string(new ArraySegment<char>(data, 0, 10));
                 // worst case: strings may differ only with the last/first char
                 char originalLastChar = data[len-1];
                 data[len-1] = (char)random.Next(0x80);
-                strDifferentSuffix = new string(data);
+                strAsciiSuffix = new string(data);
+                int middleIdx = (int)(len/2);
                 data[len-1] = originalLastChar;
                 data[0] = (char)random.Next(0x80);
-                strDifferentPrefix = new string(data);
+                strAsciiPrefix = new string(data);
             }
             public override string Name => "Strings Compare Base";
         }
@@ -158,7 +165,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "String Compare";
-            public override void RunStep() => string.Compare(str, strDifferentSuffix, cultureInfo, CompareOptions.None);
+            public override void RunStep() => string.Compare(str, strAsciiSuffix, cultureInfo, CompareOptions.None);
         }
 
         public class StringEqualsMeasurement : StringsCompare
@@ -169,7 +176,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "String Equals";
-            public override void RunStep() => string.Equals(str, strDifferentSuffix, StringComparison.InvariantCulture);
+            public override void RunStep() => string.Equals(str, strAsciiSuffix, StringComparison.InvariantCulture);
         }
 
         public class CompareInfoCompareMeasurement : StringsCompare
@@ -183,7 +190,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "CompareInfo Compare";
-            public override void RunStep() => compareInfo.Compare(str, strDifferentSuffix);
+            public override void RunStep() => compareInfo.Compare(str, strAsciiSuffix);
         }
 
         public class CompareInfoStartsWithMeasurement : StringsCompare
@@ -197,7 +204,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "CompareInfo IsPrefix";
-            public override void RunStep() => compareInfo.IsPrefix(str, strDifferentSuffix);
+            public override void RunStep() => compareInfo.IsPrefix(str, strAsciiSuffix);
         }
 
         public class CompareInfoEndsWithMeasurement : StringsCompare
@@ -211,7 +218,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "CompareInfo IsSuffix";
-            public override void RunStep() => compareInfo.IsSuffix(str, strDifferentPrefix);
+            public override void RunStep() => compareInfo.IsSuffix(str, strAsciiPrefix);
         }
 
         public class StringStartsWithMeasurement : StringsCompare
@@ -225,7 +232,7 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "String StartsWith";
-            public override void RunStep() => str.StartsWith(strDifferentSuffix, false, cultureInfo);
+            public override void RunStep() => str.StartsWith(strAsciiSuffix, false, cultureInfo);
         }
 
         public class StringEndsWithMeasurement : StringsCompare
@@ -239,7 +246,35 @@ namespace Sample
                 return Task.CompletedTask;
             }
             public override string Name => "String EndsWith";
-            public override void RunStep() => str.EndsWith(strDifferentPrefix, false, cultureInfo);
+            public override void RunStep() => str.EndsWith(strAsciiPrefix, false, cultureInfo);
+        }
+
+        public class StringIndexOfMeasurement : StringsCompare
+        {
+            protected CompareInfo compareInfo;
+
+            public override Task BeforeBatch()
+            {
+                compareInfo = new CultureInfo("nb-NO").CompareInfo;
+                InitializeStringsForComparison();
+                return Task.CompletedTask;
+            }
+            public override string Name => "String IndexOf";
+            public override void RunStep() => compareInfo.IndexOf(str, needleSameAsStrEnd, CompareOptions.None);
+        }
+
+        public class StringLastIndexOfMeasurement : StringsCompare
+        {
+            protected CompareInfo compareInfo;
+
+            public override Task BeforeBatch()
+            {
+                compareInfo = new CultureInfo("nb-NO").CompareInfo;
+                InitializeStringsForComparison();
+                return Task.CompletedTask;
+            }
+            public override string Name => "String LastIndexOf";
+            public override void RunStep() => compareInfo.LastIndexOf(str, needleSameAsStrStart, CompareOptions.None);
         }
     }
 }
index 0b196d0..8d92fdc 100644 (file)
@@ -48,6 +48,7 @@ extern void mono_wasm_change_case(MonoString **exceptionMessage, MonoString **cu
 extern int mono_wasm_compare_string(MonoString **exceptionMessage, MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options);
 extern mono_bool mono_wasm_starts_with(MonoString **exceptionMessage, MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options);
 extern mono_bool mono_wasm_ends_with(MonoString **exceptionMessage, MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options);
+extern int mono_wasm_index_of(MonoString **exceptionMessage, MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options, mono_bool fromBeginning);
 
 void bindings_initialize_internals (void)
 {
@@ -81,4 +82,5 @@ void bindings_initialize_internals (void)
        mono_add_internal_call ("Interop/JsGlobalization::CompareString", mono_wasm_compare_string);
        mono_add_internal_call ("Interop/JsGlobalization::StartsWith", mono_wasm_starts_with);
        mono_add_internal_call ("Interop/JsGlobalization::EndsWith", mono_wasm_ends_with);
+       mono_add_internal_call ("Interop/JsGlobalization::IndexOf", mono_wasm_index_of);
 }
index 68a8899..ab293f6 100644 (file)
@@ -111,6 +111,7 @@ let linked_functions = [
     "mono_wasm_compare_string",
     "mono_wasm_starts_with",
     "mono_wasm_ends_with",
+    "mono_wasm_index_of",
 
     "icudt68_dat",
 ];
index 417cb51..f4be31c 100644 (file)
@@ -27,7 +27,7 @@ import {
     mono_wasm_invoke_js_blazor, mono_wasm_invoke_js_with_args_ref, mono_wasm_get_object_property_ref, mono_wasm_set_object_property_ref,
     mono_wasm_get_by_index_ref, mono_wasm_set_by_index_ref, mono_wasm_get_global_object_ref
 } from "./net6-legacy/method-calls";
-import { mono_wasm_change_case, mono_wasm_change_case_invariant, mono_wasm_compare_string, mono_wasm_ends_with, mono_wasm_starts_with } from "./hybrid-globalization";
+import { mono_wasm_change_case, mono_wasm_change_case_invariant, mono_wasm_compare_string, mono_wasm_ends_with, mono_wasm_index_of, mono_wasm_starts_with } from "./hybrid-globalization";
 
 // the methods would be visible to EMCC linker
 // --- keep in sync with dotnet.cjs.lib.js ---
@@ -102,6 +102,7 @@ export function export_linker(): any {
         mono_wasm_compare_string,
         mono_wasm_starts_with,
         mono_wasm_ends_with,
+        mono_wasm_index_of,
 
         // threading exports, if threading is enabled
         ...mono_wasm_threads_exports,
index e0f3fdb..b94a2d1 100644 (file)
@@ -54,10 +54,7 @@ export function mono_wasm_compare_string(exceptionMessage: Int32Ptr, culture: Mo
         const string2 = string_decoder.decode(<any>str2, <any>(str2 + 2 * str2Length));
         const casePicker = (options & 0x1f);
         const locale = cultureName ? cultureName : undefined;
-        const result = compare_strings(string1, string2, locale, casePicker);
-        if (result == -2)
-            throw new Error("$Invalid comparison option.");
-        return result;
+        return compare_strings(string1, string2, locale, casePicker);
     }
     catch (ex: any) {
         pass_exception_details(ex, exceptionMessage);
@@ -75,16 +72,16 @@ function pass_exception_details(ex: any, exceptionMessage: Int32Ptr) {
     exceptionRoot.release();
 }
 
-export function mono_wasm_starts_with(exceptionMessage: Int32Ptr, culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number): number {
+export function mono_wasm_starts_with(exceptionMessage: Int32Ptr, culture: MonoStringRef, srcPtr: number, srcLength: number, prefixPtr: number, prefixLength: number, options: number): number{
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture);
     try {
         const cultureName = conv_string_root(cultureRoot);
-        const prefix = get_clean_string(str2, str2Length);
+        const prefix = decode_to_clean_string(prefixPtr, prefixLength);
         // no need to look for an empty string
         if (prefix.length == 0)
             return 1; // true
 
-        const source = get_clean_string(str1, str1Length);
+        const source = decode_to_clean_string(srcPtr, srcLength);
         if (source.length < prefix.length)
             return 0; //false
         const sourceOfPrefixLength = source.slice(0, prefix.length);
@@ -92,8 +89,6 @@ export function mono_wasm_starts_with(exceptionMessage: Int32Ptr, culture: MonoS
         const casePicker = (options & 0x1f);
         const locale = cultureName ? cultureName : undefined;
         const result = compare_strings(sourceOfPrefixLength, prefix, locale, casePicker);
-        if (result == -2)
-            throw new Error("$Invalid comparison option.");
         return result === 0 ? 1 : 0; // equals ? true : false
     }
     catch (ex: any) {
@@ -105,15 +100,15 @@ export function mono_wasm_starts_with(exceptionMessage: Int32Ptr, culture: MonoS
     }
 }
 
-export function mono_wasm_ends_with(exceptionMessage: Int32Ptr, culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number): number {
+export function mono_wasm_ends_with(exceptionMessage: Int32Ptr, culture: MonoStringRef, srcPtr: number, srcLength: number, suffixPtr: number, suffixLength: number, options: number): number{
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture);
     try {
         const cultureName = conv_string_root(cultureRoot);
-        const suffix = get_clean_string(str2, str2Length);
+        const suffix = decode_to_clean_string(suffixPtr, suffixLength);
         if (suffix.length == 0)
             return 1; // true
 
-        const source = get_clean_string(str1, str1Length);
+        const source = decode_to_clean_string(srcPtr, srcLength);
         const diff = source.length - suffix.length;
         if (diff < 0)
             return 0; //false
@@ -122,8 +117,6 @@ export function mono_wasm_ends_with(exceptionMessage: Int32Ptr, culture: MonoStr
         const casePicker = (options & 0x1f);
         const locale = cultureName ? cultureName : undefined;
         const result = compare_strings(sourceOfSuffixLength, suffix, locale, casePicker);
-        if (result == -2)
-            throw new Error("$Invalid comparison option.");
         return result === 0 ? 1 : 0; // equals ? true : false
     }
     catch (ex: any) {
@@ -135,24 +128,116 @@ export function mono_wasm_ends_with(exceptionMessage: Int32Ptr, culture: MonoStr
     }
 }
 
-function get_clean_string(strPtr: number, strLen: number) {
-    const str = string_decoder.decode(<any>strPtr, <any>(strPtr + 2 * strLen));
+function decode_to_clean_string(strPtr: number, strLen: number)
+{
+    const str = string_decoder.decode(<any>strPtr, <any>(strPtr + 2*strLen));
+    return clean_string(str);
+}
+
+function clean_string(str: string)
+{
     const nStr = str.normalize();
     return nStr.replace(/[\u200B-\u200D\uFEFF\0]/g, "");
 }
 
+export function mono_wasm_index_of(exceptionMessage: Int32Ptr, culture: MonoStringRef, needlePtr: number, needleLength: number, srcPtr: number, srcLength: number, options: number, fromBeginning: number): number{
+    const cultureRoot = mono_wasm_new_external_root<MonoString>(culture);
+    try {
+        const needle = string_decoder.decode(<any>needlePtr, <any>(needlePtr + 2*needleLength));
+        // no need to look for an empty string
+        if (clean_string(needle).length == 0)
+            return fromBeginning ? 0 : srcLength;
+
+        const source = string_decoder.decode(<any>srcPtr, <any>(srcPtr + 2*srcLength));
+        // no need to look in an empty string
+        if (clean_string(source).length == 0)
+            return fromBeginning ? 0 : srcLength;
+        const cultureName = conv_string_root(cultureRoot);
+        const locale = cultureName ? cultureName : undefined;
+        const casePicker = (options & 0x1f);
+
+        const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
+        const needleSegments = Array.from(segmenter.segment(needle)).map(s => s.segment);
+        let i = 0;
+        let stop = false;
+        let result = -1;
+        let segmentWidth = 0;
+        let index = 0;
+        let nextIndex = 0;
+        while (!stop)
+        {
+            // we need to restart the iterator in this outer loop because we have shifted it in the inner loop
+            const iteratorSrc = segmenter.segment(source.slice(i, source.length))[Symbol.iterator]();
+            let srcNext = iteratorSrc.next();
+
+            if (srcNext.done)
+                break;
+
+            let matchFound = check_match_found(srcNext.value.segment, needleSegments[0], locale, casePicker);
+            index = nextIndex;
+            srcNext = iteratorSrc.next();
+            if (srcNext.done)
+            {
+                result = matchFound ? index : result;
+                break;
+            }
+            segmentWidth = srcNext.value.index;
+            nextIndex = index + segmentWidth;
+            if (matchFound)
+            {
+                for(let j=1; j<needleSegments.length; j++)
+                {
+                    if (srcNext.done)
+                    {
+                        stop = true;
+                        break;
+                    }
+                    matchFound = check_match_found(srcNext.value.segment, needleSegments[j], locale, casePicker);
+                    if (!matchFound)
+                        break;
+
+                    srcNext = iteratorSrc.next();
+                }
+                if (stop)
+                    break;
+            }
+
+            if (matchFound)
+            {
+                result = index;
+                if (fromBeginning)
+                    break;
+            }
+            i = nextIndex;
+        }
+        return result;
+    }
+    catch (ex: any) {
+        pass_exception_details(ex, exceptionMessage);
+        return -1;
+    }
+    finally {
+        cultureRoot.release();
+    }
+
+    function check_match_found(str1: string, str2: string, locale: string | undefined, casePicker: number) : boolean
+    {
+        return compare_strings(str1, str2, locale, casePicker) === 0;
+    }
+}
+
 export function compare_strings(string1: string, string2: string, locale: string | undefined, casePicker: number): number {
     switch (casePicker) {
         case 0:
             // 0: None - default algorithm for the platform OR
             //    StringSort - since .Net 5 StringSort gives the same result as None, even for hyphen etc.
             //    does not work for "ja"
-            if (locale && locale.split("-")[0] === "ja")
+            if (locale && locale.startsWith("ja"))
                 return -2;
             return string1.localeCompare(string2, locale); // a ≠ b, a ≠ á, a ≠ A
         case 8:
             // 8: IgnoreKanaType works only for "ja"
-            if (locale && locale.split("-")[0] !== "ja")
+            if (locale && !locale.startsWith("ja"))
                 return -2;
             return string1.localeCompare(string2, locale); // a ≠ b, a ≠ á, a ≠ A
         case 1:
@@ -229,6 +314,6 @@ export function compare_strings(string1: string, string2: string, locale: string
             // 29: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreCase
             // 30: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace
             // 31: IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase
-            return -2;
+            throw new Error(`Invalid comparison option. Option=${casePicker}`);
     }
 }