From: Levi Broderick Date: Fri, 24 Apr 2020 23:45:21 +0000 (-0700) Subject: Fix CompareInfo weightless code point handling, plus other improvements (#1514) X-Git-Tag: submit/tizen/20210909.063632~8383 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=90cc6dff11e4eea07d8b2ed438b25679f0caf7a8;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fix CompareInfo weightless code point handling, plus other improvements (#1514) * Create spanified and Rune-accepting overloads of CompareInfo APIs * Remove much of the duplicated code throughout CompareInfo * Remove "empty string" optimizations that were causing incorrect comparisons against weightless code points * Improve error detection around some edge cases --- diff --git a/src/libraries/Common/src/Interop/Interop.Collation.cs b/src/libraries/Common/src/Interop/Interop.Collation.cs index a59292e..ece19be 100644 --- a/src/libraries/Common/src/Interop/Interop.Collation.cs +++ b/src/libraries/Common/src/Interop/Interop.Collation.cs @@ -23,7 +23,7 @@ internal static partial class Interop internal static extern unsafe int IndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr); [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_LastIndexOf")] - internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options); + internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr); [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")] internal static extern unsafe int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast); diff --git a/src/libraries/Common/src/Interop/Windows/Kernel32/Interop.Globalization.cs b/src/libraries/Common/src/Interop/Windows/Kernel32/Interop.Globalization.cs index 608073d..2aa2d6a 100644 --- a/src/libraries/Common/src/Interop/Windows/Kernel32/Interop.Globalization.cs +++ b/src/libraries/Common/src/Interop/Windows/Kernel32/Interop.Globalization.cs @@ -53,7 +53,7 @@ internal static partial class Interop [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] internal static extern int LocaleNameToLCID(string lpName, uint dwFlags); - [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] + [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] internal static extern int LCMapStringEx( string? lpLocaleName, uint dwMapFlags, diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index 55ff465..0787656 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -2516,7 +2516,15 @@ namespace System.Tests Assert.Equal(s1.GetHashCode(), s1.GetHashCode()); } - Assert.Equal(expected, s1.AsSpan().Equals(s2.AsSpan(), comparisonType)); + if (string.IsNullOrEmpty(s1) && string.IsNullOrEmpty(s2)) + { + // null strings are normalized to empty spans + Assert.True(s1.AsSpan().Equals(s2.AsSpan(), comparisonType)); + } + else + { + Assert.Equal(expected, s1.AsSpan().Equals(s2.AsSpan(), comparisonType)); + } } public static IEnumerable Equals_EncyclopaediaData() @@ -6779,6 +6787,19 @@ namespace System.Tests Assert.Equal(expected, source.EndsWith(end, ignoreCase, ci)); } + [Theory] + [InlineData("", StringComparison.InvariantCulture, true)] + [InlineData("", StringComparison.Ordinal, true)] + [InlineData(ZeroWidthJoiner, StringComparison.InvariantCulture, true)] + [InlineData(ZeroWidthJoiner, StringComparison.Ordinal, false)] + public static void StartEndWith_ZeroWeightValue(string value, StringComparison comparison, bool expectedStartsAndEndsWithResult) + { + Assert.Equal(expectedStartsAndEndsWithResult, string.Empty.StartsWith(value, comparison)); + Assert.Equal(expectedStartsAndEndsWithResult, string.Empty.EndsWith(value, comparison)); + Assert.Equal(expectedStartsAndEndsWithResult ? 0 : -1, string.Empty.IndexOf(value, comparison)); + Assert.Equal(expectedStartsAndEndsWithResult ? 0 : -1, string.Empty.LastIndexOf(value, comparison)); + } + [Fact] public static void StartEndNegativeTest() { diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c index 4410c3e..508d3a4 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c @@ -445,6 +445,20 @@ int32_t GlobalizationNative_CompareString( if (U_SUCCESS(err)) { + // Workaround for https://unicode-org.atlassian.net/projects/ICU/issues/ICU-9396 + // The ucol_strcoll routine on some older versions of ICU doesn't correctly + // handle nullptr inputs. We'll play defensively and always flow a non-nullptr. + + UChar dummyChar = 0; + if (lpStr1 == NULL) + { + lpStr1 = &dummyChar; + } + if (lpStr2 == NULL) + { + lpStr2 = &dummyChar; + } + result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length); } @@ -464,7 +478,28 @@ int32_t GlobalizationNative_IndexOf( int32_t options, int32_t* pMatchedLength) { + assert(cwTargetLength > 0); + int32_t result = USEARCH_DONE; + + // It's possible somebody passed us (source = , target = ). + // ICU's usearch_* APIs don't handle empty source inputs properly. However, + // if this occurs the user really just wanted us to perform an equality check. + // We can't short-circuit the operation because depending on the collation in + // use, certain code points may have zero weight, which means that empty + // strings may compare as equal to non-empty strings. + + if (cwSourceLength == 0) + { + result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options); + if (result == UCOL_EQUAL && pMatchedLength != NULL) + { + *pMatchedLength = cwTargetLength; + } + + return (result == UCOL_EQUAL) ? 0 : -1; + } + UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); @@ -499,9 +534,31 @@ int32_t GlobalizationNative_LastIndexOf( int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, - int32_t options) + int32_t options, + int32_t* pMatchedLength) { + assert(cwTargetLength > 0); + int32_t result = USEARCH_DONE; + + // It's possible somebody passed us (source = , target = ). + // ICU's usearch_* APIs don't handle empty source inputs properly. However, + // if this occurs the user really just wanted us to perform an equality check. + // We can't short-circuit the operation because depending on the collation in + // use, certain code points may have zero weight, which means that empty + // strings may compare as equal to non-empty strings. + + if (cwSourceLength == 0) + { + result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options); + if (result == UCOL_EQUAL && pMatchedLength != NULL) + { + *pMatchedLength = cwTargetLength; + } + + return (result == UCOL_EQUAL) ? 0 : -1; + } + UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); @@ -512,6 +569,13 @@ int32_t GlobalizationNative_LastIndexOf( if (U_SUCCESS(err)) { result = usearch_last(pSearch, &err); + + // if the search was successful, + // we'll try to get the matched string length. + if (result != USEARCH_DONE && pMatchedLength != NULL) + { + *pMatchedLength = usearch_getMatchedLength(pSearch); + } usearch_close(pSearch); } } @@ -771,14 +835,16 @@ static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCod int32_t idx = usearch_last(pSearch, pErrorCode); if (idx != USEARCH_DONE) { - if ((idx + usearch_getMatchedLength(pSearch)) == patternLength) + int32_t matchEnd = idx + usearch_getMatchedLength(pSearch); + assert(matchEnd <= textLength); + + if (matchEnd == textLength) { result = TRUE; } else { - int32_t matchEnd = idx + usearch_getMatchedLength(pSearch); - int32_t remainingStringLength = patternLength - matchEnd; + int32_t remainingStringLength = textLength - matchEnd; result = CanIgnoreAllCollationElements(pCollator, pText + matchEnd, remainingStringLength); } diff --git a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h index 3d04ba7..79f2fd7 100644 --- a/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h +++ b/src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h @@ -37,7 +37,8 @@ PALEXPORT int32_t GlobalizationNative_LastIndexOf(SortHandle* pSortHandle, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, - int32_t options); + int32_t options, + int32_t* pMatchedLength); PALEXPORT int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(const UChar* lpTarget, int32_t cwTargetLength, diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs index 963d7ad..38f884c 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; using Xunit; @@ -207,6 +208,13 @@ namespace System.Globalization.Tests yield return new object[] { s_invariantCompare, "Test's", null, CompareOptions.None, 1 }; yield return new object[] { s_invariantCompare, null, null, CompareOptions.None, 0 }; + yield return new object[] { s_invariantCompare, "", "Tests", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "Tests", "", CompareOptions.None, 1 }; + + yield return new object[] { s_invariantCompare, null, "", CompareOptions.None, -1 }; + yield return new object[] { s_invariantCompare, "", null, CompareOptions.None, 1 }; + yield return new object[] { s_invariantCompare, "", "", CompareOptions.None, 0 }; + yield return new object[] { s_invariantCompare, new string('a', 5555), new string('a', 5555), CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, -1 }; @@ -362,6 +370,26 @@ namespace System.Globalization.Tests // Use Compare(string, int, int, string, int, int, CompareOptions) Assert.Equal(expected, Math.Sign(compareInfo.Compare(string1, offset1, length1, string2, offset2, length2, options))); Assert.Equal(-expected, Math.Sign(compareInfo.Compare(string2, offset2, length2, string1, offset1, length1, options))); + + // Now test the span-based versions - use BoundedMemory to detect buffer overruns + // We can't run this test for null inputs since they implicitly convert to empty span + + if (string1 != null && string2 != null) + { + RunSpanCompareTest(compareInfo, string1.AsSpan(offset1, length1), string2.AsSpan(offset2, length2), options, expected); + } + + static void RunSpanCompareTest(CompareInfo compareInfo, ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options, int expected) + { + using BoundedMemory string1BoundedMemory = BoundedMemory.AllocateFromExistingData(string1); + string1BoundedMemory.MakeReadonly(); + + using BoundedMemory string2BoundedMemory = BoundedMemory.AllocateFromExistingData(string2); + string2BoundedMemory.MakeReadonly(); + + Assert.Equal(expected, Math.Sign(compareInfo.Compare(string1, string2, options))); + Assert.Equal(-expected, Math.Sign(compareInfo.Compare(string2, string1, options))); + } } [Fact] diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs index 6c41d6c..311ae44 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; +using System.Text; using Xunit; namespace System.Globalization.Tests @@ -63,6 +65,10 @@ namespace System.Globalization.Tests yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 0, 11, CompareOptions.IgnoreNonSpace, 4 }; yield return new object[] { s_invariantCompare, "o\u0308", "o", 0, 2, CompareOptions.None, -1 }; + // Weightless characters + yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0 }; + yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1 }; + // Ignore symbols yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1 }; @@ -192,7 +198,27 @@ namespace System.Globalization.Tests // Use int MemoryExtensions.IndexOf(this ReadOnlySpan, ReadOnlySpan, StringComparison) Assert.Equal((expected == -1) ? -1 : (expected - startIndex), source.AsSpan(startIndex, count).IndexOf(value.AsSpan(), stringComparison)); } - } + + // Now test the span-based versions - use BoundedMemory to detect buffer overruns + + RunSpanIndexOfTest(compareInfo, source.AsSpan(startIndex, count), value, options, (expected < 0) ? expected : expected - startIndex); + + static void RunSpanIndexOfTest(CompareInfo compareInfo, ReadOnlySpan source, ReadOnlySpan value, CompareOptions options, int expected) + { + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + + using BoundedMemory valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value); + valueBoundedMemory.MakeReadonly(); + + Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); + + if (TryCreateRuneFrom(value, out Rune rune)) + { + Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, rune, options)); // try the Rune-based version + } + } + } private static void IndexOf_Char(CompareInfo compareInfo, string source, char value, int startIndex, int count, CompareOptions options, int expected) { @@ -331,14 +357,11 @@ namespace System.Globalization.Tests AssertExtensions.Throws("count", () => s_invariantCompare.IndexOf("Test", 'a', 2, 4, CompareOptions.None)); } - [Fact] - public static void IndexOf_MinusOneCompatability() + // Attempts to create a Rune from the entirety of a given text buffer. + private static bool TryCreateRuneFrom(ReadOnlySpan text, out Rune value) { - // This behavior was for .NET Framework 1.1 compatability. - // Allowing empty source strings with invalid offsets was quickly outed. - // with invalid offsets. - Assert.Equal(0, s_invariantCompare.IndexOf("", "", -1, CompareOptions.None)); - Assert.Equal(-1, s_invariantCompare.IndexOf("", "a", -1, CompareOptions.None)); + return Rune.DecodeFromUtf16(text, out value, out int charsConsumed) == OperationStatus.Done + && charsConsumed == text.Length; } } } diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs index 32f45d4..0d32af1 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; using Xunit; @@ -56,6 +57,9 @@ namespace System.Globalization.Tests yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true }; yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true }; + // Weightless comparisons + yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true }; + // Surrogates yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true }; yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.IgnoreCase, true }; @@ -102,6 +106,16 @@ namespace System.Globalization.Tests Assert.Equal(expected, source.StartsWith(value, stringComparison)); Assert.Equal(expected, source.AsSpan().StartsWith(value.AsSpan(), stringComparison)); } + + // Now test the span version - use BoundedMemory to detect buffer overruns + + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + + using BoundedMemory valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value); + valueBoundedMemory.MakeReadonly(); + + Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); } [Fact] diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs index 909ceb9..05eb588 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; using Xunit; @@ -63,6 +64,9 @@ namespace System.Globalization.Tests yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.None, true }; yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.Ordinal, true }; + // Weightless comparisons + yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true }; + // Surrogates yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true }; yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.IgnoreCase, true }; @@ -104,6 +108,16 @@ namespace System.Globalization.Tests Assert.Equal(expected, source.EndsWith(value, stringComparison)); Assert.Equal(expected, source.AsSpan().EndsWith(value.AsSpan(), stringComparison)); } + + // Now test the span version - use BoundedMemory to detect buffer overruns + + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + + using BoundedMemory valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value); + valueBoundedMemory.MakeReadonly(); + + Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); } [Fact] diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs index 2702d72..38bb08b 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; +using System.Text; using Xunit; namespace System.Globalization.Tests @@ -16,6 +18,8 @@ namespace System.Globalization.Tests public static IEnumerable LastIndexOf_TestData() { + bool useNls = PlatformDetection.IsNlsGlobalization; + // Empty strings yield return new object[] { s_invariantCompare, "foo", "", 2, 3, CompareOptions.None, 3 }; yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0 }; @@ -75,6 +79,13 @@ namespace System.Globalization.Tests yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 10, 11, CompareOptions.IgnoreNonSpace, 4 }; yield return new object[] { s_invariantCompare, "o\u0308", "o", 1, 2, CompareOptions.None, -1 }; + // Weightless characters + // NLS matches weightless characters at the end of the string + // ICU matches weightless characters at 1 index prior to the end of the string + yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0 }; + yield return new object[] { s_invariantCompare, "", "\u200d", -1, 0, CompareOptions.None, 0 }; + yield return new object[] { s_invariantCompare, "hello", "\u200d", 4, 5, CompareOptions.IgnoreCase, useNls ? 5 : 4 }; + // Ignore symbols yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.None, -1 }; @@ -193,6 +204,26 @@ namespace System.Globalization.Tests // Use int MemoryExtensions.LastIndexOf(this ReadOnlySpan, ReadOnlySpan, StringComparison) Assert.Equal(expected - adjustmentFactor, sourceSpan.LastIndexOf(value.AsSpan(), stringComparison)); } + + // Now test the span-based versions - use BoundedMemory to detect buffer overruns + + RunSpanLastIndexOfTest(compareInfo, sourceSpan, value, options, expected - adjustmentFactor); + + static void RunSpanLastIndexOfTest(CompareInfo compareInfo, ReadOnlySpan source, ReadOnlySpan value, CompareOptions options, int expected) + { + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + + using BoundedMemory valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value); + valueBoundedMemory.MakeReadonly(); + + Assert.Equal(expected, compareInfo.LastIndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); + + if (TryCreateRuneFrom(value, out Rune rune)) + { + Assert.Equal(expected, compareInfo.LastIndexOf(sourceBoundedMemory.Span, rune, options)); // try the Rune-based version + } + } } private static void LastIndexOf_Char(CompareInfo compareInfo, string source, char value, int startIndex, int count, CompareOptions options, int expected) @@ -271,38 +302,38 @@ namespace System.Globalization.Tests // Options are invalid AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", CompareOptions.StringSort)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, CompareOptions.StringSort)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 2, CompareOptions.StringSort)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 1, CompareOptions.StringSort)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', CompareOptions.StringSort)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, CompareOptions.StringSort)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 2, CompareOptions.StringSort)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 1, CompareOptions.StringSort)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 2, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 1, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 2, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 1, CompareOptions.Ordinal | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 2, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 1, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 2, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 1, CompareOptions.OrdinalIgnoreCase | CompareOptions.IgnoreWidth)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", (CompareOptions)(-1))); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, (CompareOptions)(-1))); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 2, (CompareOptions)(-1))); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 1, (CompareOptions)(-1))); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", (CompareOptions)(-1))); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, (CompareOptions)(-1))); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 2, (CompareOptions)(-1))); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 1, (CompareOptions)(-1))); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", (CompareOptions)0x11111111)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, (CompareOptions)0x11111111)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 2, (CompareOptions)0x11111111)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", "Tests", 0, 1, (CompareOptions)0x11111111)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', (CompareOptions)0x11111111)); AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, (CompareOptions)0x11111111)); - AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 2, (CompareOptions)0x11111111)); + AssertExtensions.Throws("options", () => s_invariantCompare.LastIndexOf("Test's", 'a', 0, 1, (CompareOptions)0x11111111)); // StartIndex < 0 AssertExtensions.Throws("startIndex", () => s_invariantCompare.LastIndexOf("Test", "Test", -1, CompareOptions.None)); @@ -350,6 +381,19 @@ namespace System.Globalization.Tests AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", "s", 4, 7, CompareOptions.None)); AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", 's', 4, 6)); AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", 's', 4, 7, CompareOptions.None)); + + // Count > StartIndex + 1 + AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", "e", 1, 3)); + AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", "e", 1, 3, CompareOptions.None)); + AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", 'e', 1, 3)); + AssertExtensions.Throws("count", () => s_invariantCompare.LastIndexOf("Test", 'e', 1, 3, CompareOptions.None)); + } + + // Attempts to create a Rune from the entirety of a given text buffer. + private static bool TryCreateRuneFrom(ReadOnlySpan text, out Rune value) + { + return Rune.DecodeFromUtf16(text, out value, out int charsConsumed) == OperationStatus.Done + && charsConsumed == text.Length; } } } diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs index d4a7620..f2f3a0b 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs @@ -2,8 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Collections.Generic; using System.Reflection; +using System.Text; using Xunit; namespace System.Globalization.Tests @@ -104,6 +106,7 @@ namespace System.Globalization.Tests // sort before the corresponding characters that are in the block U+FF00-U+FFEF private static int s_expectedHalfToFullFormsComparison = PlatformDetection.IsNlsGlobalization ? -1 : 1; + private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo; private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo; private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo; @@ -373,6 +376,33 @@ namespace System.Globalization.Tests Assert.Equal(string1, sk1.OriginalString); Assert.Equal(string2, sk2.OriginalString); + + // Now try the span-based versions - use BoundedMemory to detect buffer overruns + + RunSpanSortKeyTest(compareInfo, string1, options, sk1.KeyData); + RunSpanSortKeyTest(compareInfo, string2, options, sk2.KeyData); + + unsafe static void RunSpanSortKeyTest(CompareInfo compareInfo, ReadOnlySpan source, CompareOptions options, byte[] expectedSortKey) + { + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + + Assert.Equal(expectedSortKey.Length, compareInfo.GetSortKeyLength(sourceBoundedMemory.Span, options)); + + using BoundedMemory sortKeyBoundedMemory = BoundedMemory.Allocate(expectedSortKey.Length); + + // First try with a destination which is too small - should result in an error + + Assert.Throws("destination", () => compareInfo.GetSortKey(sourceBoundedMemory.Span, sortKeyBoundedMemory.Span.Slice(1), options)); + + // Next, try with a destination which is perfectly sized - should succeed + + Span sortKeyBoundedSpan = sortKeyBoundedMemory.Span; + sortKeyBoundedSpan.Clear(); + + Assert.Equal(expectedSortKey.Length, compareInfo.GetSortKey(sourceBoundedMemory.Span, sortKeyBoundedSpan, options)); + Assert.Equal(expectedSortKey, sortKeyBoundedSpan[0..expectedSortKey.Length].ToArray()); + } } [Fact] @@ -436,6 +466,12 @@ namespace System.Globalization.Tests string source = sourceObj as string ?? new string((char[])sourceObj); Assert.Equal(expected, CompareInfo.IsSortable(source)); + // Now test the span version - use BoundedMemory to detect buffer overruns + + using BoundedMemory sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source); + sourceBoundedMemory.MakeReadonly(); + Assert.Equal(expected, CompareInfo.IsSortable(sourceBoundedMemory.Span)); + // If the string as a whole is sortable, then all chars which aren't standalone // surrogate halves must also be sortable. diff --git a/src/libraries/System.Globalization/tests/Invariant/Invariant.Tests.csproj b/src/libraries/System.Globalization/tests/Invariant/Invariant.Tests.csproj index 78f6d5d..1de762d 100644 --- a/src/libraries/System.Globalization/tests/Invariant/Invariant.Tests.csproj +++ b/src/libraries/System.Globalization/tests/Invariant/Invariant.Tests.csproj @@ -2,6 +2,7 @@ $(NetCoreAppCurrent) true + true diff --git a/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs b/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs index 67c0b09..8012d34 100644 --- a/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs +++ b/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs @@ -3,8 +3,10 @@ // See the LICENSE file in the project root for more information. using System.Buffers; +using System.Buffers.Binary; using System.Collections.Generic; -using System.Collections; +using System.IO; +using System.Runtime.InteropServices; using System.Text; using Xunit; @@ -63,6 +65,10 @@ namespace System.Globalization.Tests yield return new object[] { "FooBar", "Foo\u0400Bar", 0, 6, CompareOptions.Ordinal, -1 }; yield return new object[] { "TestFooBA\u0300R", "FooB\u00C0R", 0, 11, CompareOptions.IgnoreNonSpace, -1 }; + // Weightless characters + yield return new object[] { "", "\u200d", 0, 0, CompareOptions.None, -1 }; + yield return new object[] { "hello", "\u200d", 0, 5, CompareOptions.IgnoreCase, -1 }; + // Ignore symbols yield return new object[] { "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, -1 }; yield return new object[] { "More Test's", "Tests", 0, 11, CompareOptions.None, -1 }; @@ -167,6 +173,11 @@ namespace System.Globalization.Tests yield return new object[] { "FooBar", "Foo\u0400Bar", 5, 6, CompareOptions.Ordinal, -1 }; yield return new object[] { "TestFooBA\u0300R", "FooB\u00C0R", 10, 11, CompareOptions.IgnoreNonSpace, -1 }; + // Weightless characters + yield return new object[] { "", "\u200d", 0, 0, CompareOptions.None, -1 }; + yield return new object[] { "", "\u200d", -1, 0, CompareOptions.None, -1 }; + yield return new object[] { "hello", "\u200d", 4, 5, CompareOptions.IgnoreCase, -1 }; + // Ignore symbols yield return new object[] { "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, -1 }; yield return new object[] { "More Test's", "Tests", 10, 11, CompareOptions.None, -1 }; @@ -249,6 +260,10 @@ namespace System.Globalization.Tests yield return new object[] { "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false }; yield return new object[] { "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, false }; + // Weightless characters + yield return new object[] { "", "\u200d", CompareOptions.None, false }; + yield return new object[] { "", "\u200d", CompareOptions.IgnoreCase, false }; + // Ignore symbols yield return new object[] { "More Test's", "Tests", CompareOptions.IgnoreSymbols, false }; yield return new object[] { "More Test's", "Tests", CompareOptions.None, false }; @@ -640,6 +655,77 @@ namespace System.Globalization.Tests Assert.Equal(version, new CultureInfo(cultureName).CompareInfo.Version); } + [Theory] + [InlineData(0, 0)] + [InlineData(1, 2)] + [InlineData(100_000, 200_000)] + [InlineData(0x3FFF_FFFF, 0x7FFF_FFFE)] + public void TestGetSortKeyLength_Valid(int inputLength, int expectedSortKeyLength) + { + using BoundedMemory boundedMemory = BoundedMemory.Allocate(0); // AV if dereferenced + boundedMemory.MakeReadonly(); + ReadOnlySpan dummySpan = MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetReference(boundedMemory.Span), inputLength); + Assert.Equal(expectedSortKeyLength, CultureInfo.InvariantCulture.CompareInfo.GetSortKeyLength(dummySpan)); + } + + [Theory] + [InlineData(0x4000_0000)] + [InlineData(int.MaxValue)] + public unsafe void TestGetSortKeyLength_OverlongArgument(int inputLength) + { + using BoundedMemory boundedMemory = BoundedMemory.Allocate(0); // AV if dereferenced + boundedMemory.MakeReadonly(); + + Assert.Throws("source", () => + { + ReadOnlySpan dummySpan = MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetReference(boundedMemory.Span), inputLength); + CultureInfo.InvariantCulture.CompareInfo.GetSortKeyLength(dummySpan); + }); + } + + [Theory] + [InlineData("Hello", CompareOptions.None, "Hello")] + [InlineData("Hello", CompareOptions.IgnoreWidth, "Hello")] + [InlineData("Hello", CompareOptions.IgnoreCase, "HELLO")] + [InlineData("Hello", CompareOptions.IgnoreCase | CompareOptions.IgnoreWidth, "HELLO")] + [InlineData("Hell\u00F6", CompareOptions.None, "Hell\u00F6")] // U+00F6 = LATIN SMALL LETTER O WITH DIAERESIS + [InlineData("Hell\u00F6", CompareOptions.IgnoreCase, "HELL\u00F6")] // note the final "o with diaeresis" isn't capitalized + public unsafe void TestSortKey_FromSpan(string input, CompareOptions options, string expected) + { + byte[] expectedOutputBytes = GetExpectedInvariantOrdinalSortKey(expected); + + CompareInfo compareInfo = CultureInfo.InvariantCulture.CompareInfo; + + // First, validate that too short a buffer throws + + Assert.Throws("destination", () => compareInfo.GetSortKey(input, new byte[expectedOutputBytes.Length - 1], options)); + + // Next, validate that using a properly-sized buffer succeeds + // We'll use BoundedMemory to check for buffer overruns + + using BoundedMemory boundedInputMemory = BoundedMemory.AllocateFromExistingData(input); + boundedInputMemory.MakeReadonly(); + ReadOnlySpan boundedInputSpan = boundedInputMemory.Span; + + using BoundedMemory boundedOutputMemory = BoundedMemory.Allocate(expectedOutputBytes.Length); + Span boundedOutputSpan = boundedOutputMemory.Span; + + Assert.Equal(expectedOutputBytes.Length, compareInfo.GetSortKey(boundedInputSpan, boundedOutputSpan, options)); + Assert.Equal(expectedOutputBytes, boundedOutputSpan[0..expectedOutputBytes.Length].ToArray()); + + // Now try it once more, passing a larger span where the last byte points to unallocated memory. + // If GetSortKey attempts to write beyond the number of bytes we expect, the unit test will AV. + + boundedOutputSpan.Clear(); + + fixed (byte* pBoundedOutputSpan = boundedOutputSpan) + { + boundedOutputSpan = new Span(pBoundedOutputSpan, boundedOutputSpan.Length + 1); // last byte is unallocated memory + Assert.Equal(expectedOutputBytes.Length, compareInfo.GetSortKey(boundedInputSpan, boundedOutputSpan, options)); + Assert.Equal(expectedOutputBytes, boundedOutputSpan[0..expectedOutputBytes.Length].ToArray()); + } + } + [Fact] public void TestSortKey_ZeroWeightCodePoints() { @@ -773,6 +859,7 @@ namespace System.Globalization.Tests valueBoundedMemory.MakeReadonly(); ReadOnlySpan valueBoundedSpan = valueBoundedMemory.Span; + Assert.Equal(result, CultureInfo.GetCultureInfo(cul).CompareInfo.IsSuffix(sourceBoundedSpan, valueBoundedSpan, options)); Assert.Equal(result, sourceBoundedSpan.EndsWith(valueBoundedSpan, GetStringComparison(options))); } } @@ -816,6 +903,9 @@ namespace System.Globalization.Tests valueBoundedMemory.MakeReadonly(); ReadOnlySpan valueBoundedSpan = valueBoundedMemory.Span; + res = CultureInfo.GetCultureInfo(cul).CompareInfo.Compare(sourceBoundedSpan, valueBoundedSpan, options); + Assert.Equal(result, Math.Sign(res)); + res = sourceBoundedSpan.CompareTo(valueBoundedSpan, GetStringComparison(options)); Assert.Equal(result, Math.Sign(res)); } @@ -923,5 +1013,19 @@ namespace System.Globalization.Tests Assert.Equal(expectedToLower, Rune.ToLowerInvariant(originalRune).Value); Assert.Equal(expectedToLower, Rune.ToLower(originalRune, CultureInfo.GetCultureInfo("tr-TR")).Value); } + + private static byte[] GetExpectedInvariantOrdinalSortKey(ReadOnlySpan input) + { + MemoryStream memoryStream = new MemoryStream(); + Span tempBuffer = stackalloc byte[sizeof(char)]; + + foreach (char ch in input) + { + BinaryPrimitives.WriteUInt16BigEndian(tempBuffer, (ushort)ch); + memoryStream.Write(tempBuffer); + } + + return memoryStream.ToArray(); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index 0c27625..3cc636b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -6,6 +6,7 @@ using System.Buffers; using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; +using System.Text; namespace System.Globalization { @@ -164,34 +165,14 @@ namespace System.Globalization } } - // TODO https://github.com/dotnet/runtime/issues/8890: - // This method shouldn't be necessary, as we should be able to just use the overload - // that takes two spans. But due to this issue, that's adding significant overhead. - private unsafe int IcuCompareString(ReadOnlySpan string1, string string2, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(string2 != null); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - - // Unlike NLS, ICU (ucol_getSortKey) allows passing nullptr for either of the source arguments - // as long as the corresponding length parameter is 0. - - fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) - fixed (char* pString2 = &string2.GetRawStringData()) - { - return Interop.Globalization.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options); - } - } - private unsafe int IcuCompareString(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - // Unlike NLS, ICU (ucol_getSortKey) allows passing nullptr for either of the source arguments - // as long as the corresponding length parameter is 0. + // GetReference may return nullptr if the input span is defaulted. The native layer handles + // this appropriately; no workaround is needed on the managed side. fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) @@ -200,42 +181,10 @@ namespace System.Globalization } } - private unsafe int IcuIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(target != null); - Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); - Debug.Assert((options & CompareOptions.Ordinal) == 0); - - int index; - - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) - { - if ((options & CompareOptions.IgnoreCase) != 0) - index = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true); - else - index = IndexOfOrdinalHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true); - } - else - { - fixed (char* pSource = source) - fixed (char* pTarget = target) - { - index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr); - } - } - - return index != -1 ? index + startIndex : -1; - } - private unsafe int IcuIndexOfCore(ReadOnlySpan source, ReadOnlySpan target, CompareOptions options, int* matchLengthPtr, bool fromBeginning) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(source.Length != 0); Debug.Assert(target.Length != 0); if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) @@ -247,13 +196,16 @@ namespace System.Globalization } else { + // GetReference may return nullptr if the input span is defaulted. The native layer handles + // this appropriately; no workaround is needed on the managed side. + fixed (char* pSource = &MemoryMarshal.GetReference(source)) fixed (char* pTarget = &MemoryMarshal.GetReference(target)) { if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr); else - return Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options); + return Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr); } } } @@ -363,7 +315,7 @@ namespace System.Globalization if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); else - return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options); + return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); } } @@ -456,60 +408,14 @@ namespace System.Globalization if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); else - return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options); + return Interop.Globalization.LastIndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); } } - private unsafe int IcuLastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!GlobalizationMode.UseNls); - - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(target != null); - Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); - - // startIndex points to the final char to include in the search space. - // empty target strings trivially occur at the end of the search space. - - if (target.Length == 0) - { - return startIndex + 1; - } - - if (options == CompareOptions.Ordinal) - { - return IcuLastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false); - } - - // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source - // of the start of the string that is count characters away from startIndex. - int leftStartIndex = (startIndex - count + 1); - - int lastIndex; - - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) - { - if ((options & CompareOptions.IgnoreCase) != 0) - lastIndex = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false); - else - lastIndex = IndexOfOrdinalHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false); - } - else - { - fixed (char* pSource = source) - fixed (char* pTarget = target) - { - lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options); - } - } - - return lastIndex != -1 ? lastIndex + leftStartIndex : -1; - } - private unsafe bool IcuStartsWith(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); Debug.Assert(!prefix.IsEmpty); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -726,10 +632,29 @@ namespace System.Globalization continue; } + // The match may be affected by special character. Verify that the preceding character is regular ASCII. + if (a > ap && *(a - 1) >= 0x80) + goto InteropCall; + if (b > bp && *(b - 1) >= 0x80) + goto InteropCall; return false; } - return (source.Length >= suffix.Length); + // The match may be affected by special character. Verify that the preceding character is regular ASCII. + + if (source.Length < suffix.Length) + { + if (*b >= 0x80) + goto InteropCall; + return false; + } + + if (source.Length > suffix.Length) + { + if (*a >= 0x80) + goto InteropCall; + } + return true; InteropCall: return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options); @@ -766,10 +691,29 @@ namespace System.Globalization continue; } + // The match may be affected by special character. Verify that the preceding character is regular ASCII. + if (a > ap && *(a - 1) >= 0x80) + goto InteropCall; + if (b > bp && *(b - 1) >= 0x80) + goto InteropCall; + return false; + } + + // The match may be affected by special character. Verify that the preceding character is regular ASCII. + + if (source.Length < suffix.Length) + { + if (*b >= 0x80) + goto InteropCall; return false; } - return (source.Length >= suffix.Length); + if (source.Length > suffix.Length) + { + if (*a >= 0x80) + goto InteropCall; + } + return true; InteropCall: return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options); @@ -806,44 +750,76 @@ namespace System.Globalization return new SortKey(this, source, options, keyData); } - private static unsafe bool IcuIsSortable(char *text, int length) + private unsafe int IcuGetSortKey(ReadOnlySpan source, Span destination, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); - int index = 0; - UnicodeCategory uc; + // It's ok to pass nullptr (for empty buffers) to ICU's sort key routines. - while (index < length) + int actualSortKeyLength; + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (byte* pDest = &MemoryMarshal.GetReference(destination)) { - if (char.IsHighSurrogate(text[index])) - { - if (index == length - 1 || !char.IsLowSurrogate(text[index+1])) - return false; // unpaired surrogate + actualSortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pDest, destination.Length, options); + } - uc = CharUnicodeInfo.GetUnicodeCategory(char.ConvertToUtf32(text[index], text[index+1])); - if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned) - return false; + // The check below also handles errors due to negative values / overflow being returned. - index += 2; - continue; + if ((uint)actualSortKeyLength > (uint)destination.Length) + { + if (actualSortKeyLength > destination.Length) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); } + else + { + throw new ArgumentException(SR.Arg_ExternalException); + } + } - if (char.IsLowSurrogate(text[index])) + return actualSortKeyLength; + } + + private unsafe int IcuGetSortKeyLength(ReadOnlySpan source, CompareOptions options) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); + + // It's ok to pass nullptr (for empty buffers) to ICU's sort key routines. + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + { + return Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); + } + } + + private static bool IcuIsSortable(ReadOnlySpan text) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + Debug.Assert(!text.IsEmpty); + + do + { + if (Rune.DecodeFromUtf16(text, out Rune result, out int charsConsumed) != OperationStatus.Done) { - return false; // unpaired surrogate + return false; // found an unpaired surrogate somewhere in the text } - uc = CharUnicodeInfo.GetUnicodeCategory(text[index]); - if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned) + UnicodeCategory category = Rune.GetUnicodeCategory(result); + if (category == UnicodeCategory.PrivateUse || category == UnicodeCategory.OtherNotAssigned) { - return false; + return false; // can't sort private use or unassigned code points } - index++; - } + text = text.Slice(charsConsumed); + } while (!text.IsEmpty); - return true; + return true; // saw no unsortable data in the buffer } // ----------------------------- diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs index 922a471..9904655 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Invariant.cs @@ -269,5 +269,52 @@ namespace System.Globalization sortKey = sortKey.Slice(sizeof(ushort)); } } + + private int InvariantGetSortKey(ReadOnlySpan source, Span destination, CompareOptions options) + { + Debug.Assert(GlobalizationMode.Invariant); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); + + // Make sure the destination buffer is large enough to hold the source projection. + // Using unsigned arithmetic below also checks for buffer overflow since the incoming + // length is always a non-negative signed integer. + + if ((uint)destination.Length < (uint)source.Length * sizeof(char)) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + if ((options & CompareOptions.IgnoreCase) == 0) + { + InvariantCreateSortKeyOrdinal(source, destination); + } + else + { + InvariantCreateSortKeyOrdinalIgnoreCase(source, destination); + } + + return source.Length * sizeof(char); + } + + private int InvariantGetSortKeyLength(ReadOnlySpan source, CompareOptions options) + { + Debug.Assert(GlobalizationMode.Invariant); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); + + // In invariant mode, sort keys are simply a byte projection of the source input, + // optionally with casing modifications. We need to make sure we don't overflow + // while computing the length. + + int byteLength = source.Length * sizeof(char); + + if (byteLength < 0) + { + throw new ArgumentException( + paramName: nameof(source), + message: SR.ArgumentOutOfRange_GetByteCountOverflow); + } + + return byteLength; + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs index cf0b5fd..2d96f96 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Nls.cs @@ -110,6 +110,19 @@ namespace System.Globalization Debug.Assert(GlobalizationMode.UseNls); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); +#if TARGET_WINDOWS + if (!Environment.IsWindows8OrAbove) + { + // On Windows 7 / Server 2008, LCMapStringEx exhibits strange behaviors if the destination + // buffer is both non-null and too small for the required output. To prevent this from + // causing issues for us, we need to make an immutable copy of the input buffer so that + // its contents can't change between when we calculate the required sort key length and + // when we populate the sort key buffer. + + source = source.ToString(); + } +#endif + // LCMapStringEx doesn't support passing cchSrc = 0, so if given a null or empty input // we'll normalize it to an empty null-terminated string and pass -1 to indicate that // the underlying OS function should read until it encounters the null terminator. @@ -193,55 +206,6 @@ namespace System.Globalization } } - // TODO https://github.com/dotnet/runtime/issues/8890: - // This method shouldn't be necessary, as we should be able to just use the overload - // that takes two spans. But due to this issue, that's adding significant overhead. - private unsafe int NlsCompareString(ReadOnlySpan string1, string string2, CompareOptions options) - { - Debug.Assert(string2 != null); - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - - string? localeName = _sortHandle != IntPtr.Zero ? null : _sortName; - - // CompareStringEx may try to dereference the first character of its input, even if an explicit - // length of 0 is specified. To work around potential AVs we'll always ensure zero-length inputs - // are normalized to a null-terminated empty string. - - if (string1.IsEmpty) - { - string1 = string.Empty; - } - - fixed (char* pLocaleName = localeName) - fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) - fixed (char* pString2 = &string2.GetPinnableReference()) - { - Debug.Assert(*pString1 >= 0); // assert that we can always dereference this - Debug.Assert(*pString2 >= 0); // assert that we can always dereference this - - int result = Interop.Kernel32.CompareStringEx( - pLocaleName, - (uint)GetNativeCompareFlags(options), - pString1, - string1.Length, - pString2, - string2.Length, - null, - null, - _sortHandle); - - if (result == 0) - { - throw new ArgumentException(SR.Arg_ExternalException); - } - - // Map CompareStringEx return value to -1, 0, 1. - return result - 2; - } - } - private unsafe int NlsCompareString(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); @@ -343,72 +307,22 @@ namespace System.Globalization } } - private unsafe int NlsIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr) - { - Debug.Assert(!GlobalizationMode.Invariant); - - Debug.Assert(target != null); - Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); - Debug.Assert((options & CompareOptions.Ordinal) == 0); - - int retValue = FindString(FIND_FROMSTART | (uint)GetNativeCompareFlags(options), source.AsSpan(startIndex, count), target, matchLengthPtr); - if (retValue >= 0) - { - return retValue + startIndex; - } - - return -1; - } - private unsafe int NlsIndexOfCore(ReadOnlySpan source, ReadOnlySpan target, CompareOptions options, int* matchLengthPtr, bool fromBeginning) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); Debug.Assert(target.Length != 0); - Debug.Assert(options == CompareOptions.None || options == CompareOptions.IgnoreCase); uint positionFlag = fromBeginning ? (uint)FIND_FROMSTART : FIND_FROMEND; return FindString(positionFlag | (uint)GetNativeCompareFlags(options), source, target, matchLengthPtr); } - private unsafe int NlsLastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(GlobalizationMode.UseNls); - - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(target != null); - Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); - - // startIndex points to the final char to include in the search space. - // empty target strings trivially occur at the end of the search space. - - if (target.Length == 0) - return startIndex + 1; - - if ((options & CompareOptions.Ordinal) != 0) - { - return FastLastIndexOfString(source, target, startIndex, count, target.Length); - } - else - { - int retValue = FindString(FIND_FROMEND | (uint)GetNativeCompareFlags(options), source.AsSpan(startIndex - count + 1, count), target, null); - - if (retValue >= 0) - { - return retValue + startIndex - (count - 1); - } - } - - return -1; - } - private unsafe bool NlsStartsWith(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(!source.IsEmpty); Debug.Assert(!prefix.IsEmpty); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); @@ -433,50 +347,6 @@ namespace System.Globalization private const int FIND_FROMSTART = 0x00400000; private const int FIND_FROMEND = 0x00800000; - // TODO: Instead of this method could we just have upstack code call LastIndexOfOrdinal with ignoreCase = false? - private static unsafe int FastLastIndexOfString(string source, string target, int startIndex, int sourceCount, int targetCount) - { - int retValue = -1; - - int sourceStartIndex = startIndex - sourceCount + 1; - - fixed (char* pSource = source, spTarget = target) - { - char* spSubSource = pSource + sourceStartIndex; - - int endPattern = sourceCount - targetCount; - if (endPattern < 0) - return -1; - - Debug.Assert(target.Length >= 1); - char patternChar0 = spTarget[0]; - for (int ctrSrc = endPattern; ctrSrc >= 0; ctrSrc--) - { - if (spSubSource[ctrSrc] != patternChar0) - continue; - - int ctrPat; - for (ctrPat = 1; ctrPat < targetCount; ctrPat++) - { - if (spSubSource[ctrSrc + ctrPat] != spTarget[ctrPat]) - break; - } - if (ctrPat == targetCount) - { - retValue = ctrSrc; - break; - } - } - - if (retValue >= 0) - { - retValue += startIndex - sourceCount + 1; - } - } - - return retValue; - } - private unsafe SortKey NlsCreateSortKey(string source, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); @@ -531,13 +401,159 @@ namespace System.Globalization return new SortKey(this, source, options, keyData); } - private static unsafe bool NlsIsSortable(char* text, int length) + private unsafe int NlsGetSortKey(ReadOnlySpan source, Span destination, CompareOptions options) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); + + // LCMapStringEx doesn't allow cchDest = 0 unless we're trying to query + // the total number of bytes necessary. + + if (destination.IsEmpty) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + +#if TARGET_WINDOWS + if (!Environment.IsWindows8OrAbove) + { + // On Windows 7 / Server 2008, LCMapStringEx exhibits strange behaviors if the destination + // buffer is both non-null and too small for the required output. To prevent this from + // causing issues for us, we need to make an immutable copy of the input buffer so that + // its contents can't change between when we calculate the required sort key length and + // when we populate the sort key buffer. + + source = source.ToString(); + } +#endif + + uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options); + + // LCMapStringEx doesn't support passing cchSrc = 0, so if given an empty span + // we'll instead normalize to a null-terminated empty string and pass -1 as + // the length to indicate that the implicit null terminator should be used. + + int sourceLength = source.Length; + if (sourceLength == 0) + { + source = string.Empty; + sourceLength = -1; + } + + int actualSortKeyLength; + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (byte* pSortKey = &MemoryMarshal.GetReference(destination)) + { + Debug.Assert(pSource != null); + Debug.Assert(pSortKey != null); + +#if TARGET_WINDOWS + if (!Environment.IsWindows8OrAbove) + { + // Manually check that the destination buffer is large enough to hold the full output. + // See earlier comment for reasoning. + + int requiredSortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, + flags, + pSource, sourceLength, + null, 0, + null, null, _sortHandle); + + if (requiredSortKeyLength > destination.Length) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + + if (requiredSortKeyLength <= 0) + { + throw new ArgumentException(SR.Arg_ExternalException); + } + } +#endif + + actualSortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, + flags, + pSource, sourceLength, + pSortKey, destination.Length, + null, null, _sortHandle); + } + + if (actualSortKeyLength <= 0) + { + Debug.Assert(actualSortKeyLength == 0, "LCMapStringEx should never return a negative value."); + + // This could fail for a variety of reasons, including NLS being unable + // to allocate a temporary buffer large enough to hold intermediate state, + // or the destination buffer being too small. + + if (Marshal.GetLastWin32Error() == Interop.Errors.ERROR_INSUFFICIENT_BUFFER) + { + ThrowHelper.ThrowArgumentException_DestinationTooShort(); + } + else + { + throw new ArgumentException(SR.Arg_ExternalException); + } + } + + Debug.Assert(actualSortKeyLength <= destination.Length); + return actualSortKeyLength; + } + + private unsafe int NlsGetSortKeyLength(ReadOnlySpan source, CompareOptions options) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert((options & ValidCompareMaskOffFlags) == 0); + + uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options); + + // LCMapStringEx doesn't support passing cchSrc = 0, so if given an empty span + // we'll instead normalize to a null-terminated empty string and pass -1 as + // the length to indicate that the implicit null terminator should be used. + + int sourceLength = source.Length; + if (sourceLength == 0) + { + source = string.Empty; + sourceLength = -1; + } + + int sortKeyLength; + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + { + Debug.Assert(pSource != null); + sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, + flags, + pSource, sourceLength, + null, 0, + null, null, _sortHandle); + } + + if (sortKeyLength <= 0) + { + Debug.Assert(sortKeyLength == 0, "LCMapStringEx should never return a negative value."); + + // This could fail for a variety of reasons, including NLS being unable + // to allocate a temporary buffer large enough to hold intermediate state. + + throw new ArgumentException(SR.Arg_ExternalException); + } + + return sortKeyLength; + } + + private static unsafe bool NlsIsSortable(ReadOnlySpan text) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(text != null); + Debug.Assert(!text.IsEmpty); - return Interop.Kernel32.IsNLSDefinedString(Interop.Kernel32.COMPARE_STRING, 0, IntPtr.Zero, text, length); + fixed (char* pText = &MemoryMarshal.GetReference(text)) + { + return Interop.Kernel32.IsNLSDefinedString(Interop.Kernel32.COMPARE_STRING, 0, IntPtr.Zero, pText, text.Length); + } } private const int COMPARE_OPTIONS_ORDINAL = 0x40000000; // Ordinal diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs index 83b475c..7fb13f2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs @@ -3,10 +3,12 @@ // See the LICENSE file in the project root for more information. using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Serialization; +using System.Text; using System.Text.Unicode; using Internal.Runtime.CompilerServices; @@ -126,24 +128,31 @@ namespace System.Globalization return CultureInfo.GetCultureInfo(name).CompareInfo; } - public static unsafe bool IsSortable(char ch) + public static bool IsSortable(char ch) { - if (GlobalizationMode.Invariant) - { - return true; - } - - char* pChar = &ch; - return IsSortableCore(pChar, 1); + return IsSortable(MemoryMarshal.CreateReadOnlySpan(ref ch, 1)); } - public static unsafe bool IsSortable(string text) + public static bool IsSortable(string text) { if (text == null) { throw new ArgumentNullException(nameof(text)); } + return IsSortable(text.AsSpan()); + } + + /// + /// Indicates whether a specified Unicode string is sortable. + /// + /// A string of zero or more Unicode characters. + /// + /// if is non-empty and contains + /// only sortable Unicode characters; otherwise, . + /// + public static bool IsSortable(ReadOnlySpan text) + { if (text.Length == 0) { return false; @@ -151,19 +160,26 @@ namespace System.Globalization if (GlobalizationMode.Invariant) { - return true; + return true; // all chars are sortable in invariant mode } - fixed (char* pChar = text) - { - return IsSortableCore(pChar, text.Length); - } + return (GlobalizationMode.UseNls) ? NlsIsSortable(text) : IcuIsSortable(text); } - private static unsafe bool IsSortableCore(char* pChar, int length) => - GlobalizationMode.UseNls ? - NlsIsSortable(pChar, length) : - IcuIsSortable(pChar, length); + /// + /// Indicates whether a specified is sortable. + /// + /// A Unicode scalar value. + /// + /// if is a sortable Unicode scalar + /// value; otherwise, . + /// + public static bool IsSortable(Rune value) + { + Span valueAsUtf16 = stackalloc char[Rune.MaxUtf16CharsPerRune]; + int charCount = value.EncodeToUtf16(valueAsUtf16); + return IsSortable(valueAsUtf16.Slice(0, charCount)); + } private void InitSort(CultureInfo culture) { @@ -257,118 +273,35 @@ namespace System.Globalization public int Compare(string? string1, string? string2, CompareOptions options) { - if (options == CompareOptions.OrdinalIgnoreCase) - { - return string.Compare(string1, string2, StringComparison.OrdinalIgnoreCase); - } - - // Verify the options before we do any real comparison. - if ((options & CompareOptions.Ordinal) != 0) - { - if (options != CompareOptions.Ordinal) - { - throw new ArgumentException(SR.Argument_CompareOptionOrdinal, nameof(options)); - } - - return string.CompareOrdinal(string1, string2); - } - - if ((options & ValidCompareMaskOffFlags) != 0) - { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + int retVal; // Our paradigm is that null sorts less than any other string and // that two nulls sort as equal. - if (string1 == null) - { - if (string2 == null) - { - return 0; - } - return -1; // null < non-null - } - if (string2 == null) - { - return 1; // non-null > null - } - - if (GlobalizationMode.Invariant) - { - if ((options & CompareOptions.IgnoreCase) != 0) - { - return CompareOrdinalIgnoreCase(string1, string2); - } - - return string.CompareOrdinal(string1, string2); - } - - return CompareStringCore(string1.AsSpan(), string2.AsSpan(), options); - } - - // TODO https://github.com/dotnet/runtime/issues/8890: - // This method shouldn't be necessary, as we should be able to just use the overload - // that takes two spans. But due to this issue, that's adding significant overhead. - internal int Compare(ReadOnlySpan string1, string? string2, CompareOptions options) - { - if (options == CompareOptions.OrdinalIgnoreCase) - { - return CompareOrdinalIgnoreCase(string1, string2.AsSpan()); - } - // Verify the options before we do any real comparison. - if ((options & CompareOptions.Ordinal) != 0) - { - if (options != CompareOptions.Ordinal) - { - throw new ArgumentException(SR.Argument_CompareOptionOrdinal, nameof(options)); - } - - return string.CompareOrdinal(string1, string2.AsSpan()); - } - - if ((options & ValidCompareMaskOffFlags) != 0) + if (string1 == null) { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + retVal = (string2 == null) ? 0 : -1; + goto CheckOptionsAndReturn; } - - // null sorts less than any other string. if (string2 == null) { - return 1; + retVal = 1; + goto CheckOptionsAndReturn; } - if (GlobalizationMode.Invariant) - { - return (options & CompareOptions.IgnoreCase) != 0 ? - CompareOrdinalIgnoreCase(string1, string2.AsSpan()) : - string.CompareOrdinal(string1, string2.AsSpan()); - } + return Compare(string1.AsSpan(), string2.AsSpan(), options); - return CompareStringCore(string1, string2, options); - } + CheckOptionsAndReturn: - internal int CompareOptionNone(ReadOnlySpan string1, ReadOnlySpan string2) - { - // Check for empty span or span from a null string - if (string1.Length == 0 || string2.Length == 0) - { - return string1.Length - string2.Length; - } + // If we're short-circuiting the globalization logic, we still need to check that + // the provided options were valid. - return GlobalizationMode.Invariant ? - string.CompareOrdinal(string1, string2) : - CompareStringCore(string1, string2, CompareOptions.None); + CheckCompareOptionsForCompare(options); + return retVal; } internal int CompareOptionIgnoreCase(ReadOnlySpan string1, ReadOnlySpan string2) { - // Check for empty span or span from a null string - if (string1.Length == 0 || string2.Length == 0) - { - return string1.Length - string2.Length; - } - return GlobalizationMode.Invariant ? CompareOrdinalIgnoreCase(string1, string2) : CompareStringCore(string1, string2, CompareOptions.IgnoreCase); @@ -383,7 +316,7 @@ namespace System.Globalization /// public int Compare(string? string1, int offset1, int length1, string? string2, int offset2, int length2) { - return Compare(string1, offset1, length1, string2, offset2, length2, 0); + return Compare(string1, offset1, length1, string2, offset2, length2, CompareOptions.None); } public int Compare(string? string1, int offset1, string? string2, int offset2, CompareOptions options) @@ -394,92 +327,189 @@ namespace System.Globalization public int Compare(string? string1, int offset1, string? string2, int offset2) { - return Compare(string1, offset1, string2, offset2, 0); + return Compare(string1, offset1, string2, offset2, CompareOptions.None); } public int Compare(string? string1, int offset1, int length1, string? string2, int offset2, int length2, CompareOptions options) { - if (options == CompareOptions.OrdinalIgnoreCase) + ReadOnlySpan span1 = default; + ReadOnlySpan span2 = default; + + if (string1 == null) { - int result = string.Compare(string1, offset1, string2, offset2, length1 < length2 ? length1 : length2, StringComparison.OrdinalIgnoreCase); - if ((length1 != length2) && result == 0) + if (offset1 != 0 || length1 != 0) { - return length1 > length2 ? 1 : -1; + goto BoundsCheckError; } + } + else if (!string1.TryGetSpan(offset1, length1, out span1)) + { + goto BoundsCheckError; + } - return result; + if (string2 == null) + { + if (offset2 != 0 || length2 != 0) + { + goto BoundsCheckError; + } + } + else if (!string2.TryGetSpan(offset2, length2, out span2)) + { + goto BoundsCheckError; } + // At this point both string1 and string2 have been bounds-checked. + + int retVal; + + // Our paradigm is that null sorts less than any other string and + // that two nulls sort as equal. + + if (string1 == null) + { + retVal = (string2 == null) ? 0 : -1; + goto CheckOptionsAndReturn; + } + if (string2 == null) + { + retVal = 1; + goto CheckOptionsAndReturn; + } + + // At this point we know both string1 and string2 weren't null, + // though they may have been empty. + + Debug.Assert(!Unsafe.IsNullRef(ref MemoryMarshal.GetReference(span1))); + Debug.Assert(!Unsafe.IsNullRef(ref MemoryMarshal.GetReference(span2))); + + return Compare(span1, span2, options); + + CheckOptionsAndReturn: + + // If we're short-circuiting the globalization logic, we still need to check that + // the provided options were valid. + + CheckCompareOptionsForCompare(options); + return retVal; + + BoundsCheckError: + + // We know a bounds check error occurred. Now we just need to figure + // out the correct error message to surface. + if (length1 < 0 || length2 < 0) { throw new ArgumentOutOfRangeException((length1 < 0) ? nameof(length1) : nameof(length2), SR.ArgumentOutOfRange_NeedPosNum); } + if (offset1 < 0 || offset2 < 0) { throw new ArgumentOutOfRangeException((offset1 < 0) ? nameof(offset1) : nameof(offset2), SR.ArgumentOutOfRange_NeedPosNum); } + if (offset1 > (string1 == null ? 0 : string1.Length) - length1) { throw new ArgumentOutOfRangeException(nameof(string1), SR.ArgumentOutOfRange_OffsetLength); } - if (offset2 > (string2 == null ? 0 : string2.Length) - length2) + + Debug.Assert(offset2 > (string2 == null ? 0 : string2.Length) - length2); + throw new ArgumentOutOfRangeException(nameof(string2), SR.ArgumentOutOfRange_OffsetLength); + } + + /// + /// Compares two strings. + /// + /// The first string to compare. + /// The second string to compare. + /// The to use during the comparison. + /// + /// Zero if and are equal; + /// or a negative value if sorts before ; + /// or a positive value if sorts after . + /// + /// + /// contains an unsupported combination of flags. + /// + public int Compare(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options = CompareOptions.None) + { + if (string1 == string2) // referential equality + length { - throw new ArgumentOutOfRangeException(nameof(string2), SR.ArgumentOutOfRange_OffsetLength); + CheckCompareOptionsForCompare(options); + return 0; } - if ((options & CompareOptions.Ordinal) != 0) + + if ((options & ValidCompareMaskOffFlags) == 0) { - if (options != CompareOptions.Ordinal) + // Common case: caller is attempting to perform linguistic comparison. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + + if (!GlobalizationMode.Invariant) + { + return CompareStringCore(string1, string2, options); + } + else if ((options & CompareOptions.IgnoreCase) == 0) { - throw new ArgumentException(SR.Argument_CompareOptionOrdinal, - nameof(options)); + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; } } - else if ((options & ValidCompareMaskOffFlags) != 0) + else { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - if (string1 == null) - { - if (string2 == null) + if (options == CompareOptions.Ordinal) { - return 0; + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + ThrowCompareOptionsCheckFailed(options); } - return -1; - } - if (string2 == null) - { - return 1; } - ReadOnlySpan span1 = string1.AsSpan(offset1, length1); - ReadOnlySpan span2 = string2.AsSpan(offset2, length2); + ReturnOrdinal: + return string1.SequenceCompareTo(string2); - if (options == CompareOptions.Ordinal) - { - return string.CompareOrdinal(span1, span2); - } + ReturnOrdinalIgnoreCase: + return CompareOrdinalIgnoreCase(string1, string2); + } - if (GlobalizationMode.Invariant) + // Checks that 'CompareOptions' is valid for a call to Compare, throwing the appropriate + // exception if the check fails. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [StackTraceHidden] + private static void CheckCompareOptionsForCompare(CompareOptions options) + { + // Any combination of defined CompareOptions flags is valid, except for + // Ordinal and OrdinalIgnoreCase, which may only be used in isolation. + + if ((options & ValidCompareMaskOffFlags) != 0) { - if ((options & CompareOptions.IgnoreCase) != 0) + if (options != CompareOptions.Ordinal && options != CompareOptions.OrdinalIgnoreCase) { - return CompareOrdinalIgnoreCase(span1, span2); + ThrowCompareOptionsCheckFailed(options); } - - return string.CompareOrdinal(span1, span2); } - - return CompareStringCore(span1, span2, options); } - // TODO https://github.com/dotnet/runtime/issues/8890: - // This method shouldn't be necessary, as we should be able to just use the overload - // that takes two spans. But due to this issue, that's adding significant overhead. - private unsafe int CompareStringCore(ReadOnlySpan string1, string string2, CompareOptions options) => - GlobalizationMode.UseNls ? - NlsCompareString(string1, string2, options) : - IcuCompareString(string1, string2, options); + [DoesNotReturn] + [StackTraceHidden] + private static void ThrowCompareOptionsCheckFailed(CompareOptions options) + { + throw new ArgumentException( + paramName: nameof(options), + message: ((options & CompareOptions.Ordinal) != 0) ? SR.Argument_CompareOptionOrdinal : SR.Argument_InvalidFlag); + } private unsafe int CompareStringCore(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) => GlobalizationMode.UseNls ? @@ -718,54 +748,83 @@ namespace System.Globalization { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } if (prefix == null) { - throw new ArgumentNullException(nameof(prefix)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.prefix); } - if (prefix.Length == 0) + return IsPrefix(source.AsSpan(), prefix.AsSpan(), options); + } + + /// + /// Determines whether a string starts with a specific prefix. + /// + /// The string to search within. + /// The prefix to attempt to match at the start of . + /// The to use during the match. + /// + /// if occurs at the start of ; + /// otherwise, . + /// + /// + /// contains an unsupported combination of flags. + /// + public bool IsPrefix(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options = CompareOptions.None) + { + // The empty string is trivially a prefix of every other string. For compat with + // earlier versions of the Framework we'll early-exit here before validating the + // 'options' argument. + + if (prefix.IsEmpty) { return true; } - if (source.Length == 0) - { - return false; - } - if (options == CompareOptions.OrdinalIgnoreCase) + if ((options & ValidIndexMaskOffFlags) == 0) { - return source.StartsWith(prefix, StringComparison.OrdinalIgnoreCase); - } + // Common case: caller is attempting to perform a linguistic search. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. - if (options == CompareOptions.Ordinal) - { - return source.StartsWith(prefix, StringComparison.Ordinal); + if (!GlobalizationMode.Invariant) + { + return StartsWithCore(source, prefix, options); + } + else if ((options & CompareOptions.IgnoreCase) == 0) + { + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; + } } - - if ((options & ValidIndexMaskOffFlags) != 0) + else { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - if (GlobalizationMode.Invariant) - { - return source.StartsWith(prefix, (options & CompareOptions.IgnoreCase) != 0 ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); + if (options == CompareOptions.Ordinal) + { + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + ThrowCompareOptionsCheckFailed(options); + } } - return StartsWithCore(source, prefix, options); - } + ReturnOrdinal: + return source.StartsWith(prefix); - internal bool IsPrefix(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options) - { - Debug.Assert(prefix.Length != 0); - Debug.Assert(source.Length != 0); - Debug.Assert((options & ValidIndexMaskOffFlags) == 0); - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - - return StartsWithCore(source, prefix, options); + ReturnOrdinalIgnoreCase: + return source.StartsWithOrdinalIgnoreCase(prefix); } private unsafe bool StartsWithCore(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options) => @@ -775,7 +834,7 @@ namespace System.Globalization public bool IsPrefix(string source, string prefix) { - return IsPrefix(source, prefix, 0); + return IsPrefix(source, prefix, CompareOptions.None); } /// @@ -786,59 +845,88 @@ namespace System.Globalization { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } if (suffix == null) { - throw new ArgumentNullException(nameof(suffix)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.suffix); } - if (suffix.Length == 0) + return IsSuffix(source.AsSpan(), suffix.AsSpan(), options); + } + + /// + /// Determines whether a string ends with a specific suffix. + /// + /// The string to search within. + /// The suffix to attempt to match at the end of . + /// The to use during the match. + /// + /// if occurs at the end of ; + /// otherwise, . + /// + /// + /// contains an unsupported combination of flags. + /// + public bool IsSuffix(ReadOnlySpan source, ReadOnlySpan suffix, CompareOptions options = CompareOptions.None) + { + // The empty string is trivially a suffix of every other string. For compat with + // earlier versions of the Framework we'll early-exit here before validating the + // 'options' argument. + + if (suffix.IsEmpty) { return true; } - if (source.Length == 0) - { - return false; - } - if (options == CompareOptions.OrdinalIgnoreCase) + if ((options & ValidIndexMaskOffFlags) == 0) { - return source.EndsWith(suffix, StringComparison.OrdinalIgnoreCase); - } + // Common case: caller is attempting to perform a linguistic search. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. - if (options == CompareOptions.Ordinal) - { - return source.EndsWith(suffix, StringComparison.Ordinal); + if (!GlobalizationMode.Invariant) + { + return EndsWithCore(source, suffix, options); + } + else if ((options & CompareOptions.IgnoreCase) == 0) + { + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; + } } - - if ((options & ValidIndexMaskOffFlags) != 0) + else { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - if (GlobalizationMode.Invariant) - { - return source.EndsWith(suffix, (options & CompareOptions.IgnoreCase) != 0 ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); + if (options == CompareOptions.Ordinal) + { + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + ThrowCompareOptionsCheckFailed(options); + } } - return EndsWithCore(source, suffix, options); - } + ReturnOrdinal: + return source.EndsWith(suffix); - internal bool IsSuffix(ReadOnlySpan source, ReadOnlySpan suffix, CompareOptions options) - { - Debug.Assert(suffix.Length != 0); - Debug.Assert(source.Length != 0); - Debug.Assert((options & ValidIndexMaskOffFlags) == 0); - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - - return EndsWithCore(source, suffix, options); + ReturnOrdinalIgnoreCase: + return source.EndsWithOrdinalIgnoreCase(suffix); } public bool IsSuffix(string source, string suffix) { - return IsSuffix(source, suffix, 0); + return IsSuffix(source, suffix, CompareOptions.None); } private unsafe bool EndsWithCore(ReadOnlySpan source, ReadOnlySpan suffix, CompareOptions options) => @@ -852,79 +940,68 @@ namespace System.Globalization /// the specified value is not found. If value equals string.Empty, /// startIndex is returned. Throws IndexOutOfRange if startIndex or /// endIndex is less than zero or greater than the length of string. - /// Throws ArgumentException if value is null. + /// Throws ArgumentException if value (as a string) is null. /// public int IndexOf(string source, char value) { - if (source == null) - { - throw new ArgumentNullException(nameof(source)); - } - - return IndexOf(source, value, 0, source.Length, CompareOptions.None); + return IndexOf(source, value, CompareOptions.None); } public int IndexOf(string source, string value) { - if (source == null) - throw new ArgumentNullException(nameof(source)); - - return IndexOf(source, value, 0, source.Length, CompareOptions.None); + return IndexOf(source, value, CompareOptions.None); } public int IndexOf(string source, char value, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } - return IndexOf(source, value, 0, source.Length, options); + return IndexOf(source, MemoryMarshal.CreateReadOnlySpan(ref value, 1), options); } public int IndexOf(string source, string value, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); + } + if (value == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); } - return IndexOf(source, value, 0, source.Length, options); + return IndexOf(source.AsSpan(), value.AsSpan(), options); } public int IndexOf(string source, char value, int startIndex) { - if (source == null) - { - throw new ArgumentNullException(nameof(source)); - } - - return IndexOf(source, value, startIndex, source.Length - startIndex, CompareOptions.None); + return IndexOf(source, value, startIndex, CompareOptions.None); } public int IndexOf(string source, string value, int startIndex) { - if (source == null) - throw new ArgumentNullException(nameof(source)); - - return IndexOf(source, value, startIndex, source.Length - startIndex, CompareOptions.None); + return IndexOf(source, value, startIndex, CompareOptions.None); } public int IndexOf(string source, char value, int startIndex, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } return IndexOf(source, value, startIndex, source.Length - startIndex, options); + } public int IndexOf(string source, string value, int startIndex, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } return IndexOf(source, value, startIndex, source.Length - startIndex, options); @@ -944,93 +1021,151 @@ namespace System.Globalization { if (source == null) { - throw new ArgumentNullException(nameof(source)); - } - if (startIndex < 0 || startIndex > source.Length) - { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - } - if (count < 0 || startIndex > source.Length - count) - { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } - if (source.Length == 0) + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) { - return -1; + // Bounds check failed - figure out exactly what went wrong so that we can + // surface the correct argument exception. + + if ((uint)startIndex > (uint)source.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_Count); + } } - // Validate CompareOptions - // Ordinal can't be selected with other flags - if ((options & ValidIndexMaskOffFlags) != 0 && (options != CompareOptions.Ordinal && options != CompareOptions.OrdinalIgnoreCase)) + int result = IndexOf(sourceSpan, MemoryMarshal.CreateReadOnlySpan(ref value, 1), options); + if (result >= 0) { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + result += startIndex; } - - return IndexOf(source, char.ToString(value), startIndex, count, options, null); + return result; } public unsafe int IndexOf(string source, string value, int startIndex, int count, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } if (value == null) { - throw new ArgumentNullException(nameof(value)); - } - if (startIndex > source.Length) - { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); } - // In Everett we used to return -1 for empty string even if startIndex is negative number so we keeping same behavior here. - // We return 0 if both source and value are empty strings for Everett compatibility too. - if (source.Length == 0) + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) { - if (value.Length == 0) + // Bounds check failed - figure out exactly what went wrong so that we can + // surface the correct argument exception. + + if ((uint)startIndex > (uint)source.Length) { - return 0; + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_Count); } - return -1; } - if (startIndex < 0) + int result = IndexOf(sourceSpan, value, options); + if (result >= 0) { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + result += startIndex; } + return result; + } - if (count < 0 || startIndex > source.Length - count) - { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + /// + /// Searches for the first occurrence of a substring within a source string. + /// + /// The string to search within. + /// The substring to locate within . + /// The to use during the search. + /// + /// The zero-based index into where the substring + /// first appears; or -1 if cannot be found within . + /// + /// + /// contains an unsupported combination of flags. + /// + public unsafe int IndexOf(ReadOnlySpan source, ReadOnlySpan value, CompareOptions options = CompareOptions.None) + { + if ((options & ValidIndexMaskOffFlags) == 0) + { + // Common case: caller is attempting to perform a linguistic search. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + + if (!GlobalizationMode.Invariant) + { + if (value.IsEmpty) + { + return 0; // Empty target string trivially occurs at index 0 of every search space. + } + else + { + return IndexOfCore(source, value, options, null /* matchLengthPtr */, fromBeginning: true); + } + } + else if ((options & CompareOptions.IgnoreCase) == 0) + { + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; + } } - - // Validate CompareOptions - // Ordinal can't be selected with other flags - if ((options & ValidIndexMaskOffFlags) != 0 && (options != CompareOptions.Ordinal && options != CompareOptions.OrdinalIgnoreCase)) + else { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - return IndexOf(source, value, startIndex, count, options, null); - } + if (options == CompareOptions.Ordinal) + { + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); + } + } - internal int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnlySpan value) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); - Debug.Assert(!value.IsEmpty); + ReturnOrdinal: + return source.IndexOf(value); - return IndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true); + ReturnOrdinalIgnoreCase: + return IndexOfOrdinalIgnoreCase(source, value, fromBeginning: true); } - internal int LastIndexOfOrdinal(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); - Debug.Assert(!value.IsEmpty); - return IndexOfOrdinalCore(source, value, ignoreCase, fromBeginning: false); + /// + /// Searches for the first occurrence of a within a source string. + /// + /// The string to search within. + /// The to locate within . + /// The to use during the search. + /// + /// The zero-based index into where + /// first appears; or -1 if cannot be found within . + /// + /// + /// contains an unsupported combination of flags. + /// + public int IndexOf(ReadOnlySpan source, Rune value, CompareOptions options = CompareOptions.None) + { + Span valueAsUtf16 = stackalloc char[Rune.MaxUtf16CharsPerRune]; + int charCount = value.EncodeToUtf16(valueAsUtf16); + return IndexOf(source, valueAsUtf16.Slice(0, charCount), options); } private static int IndexOfOrdinalCore(ReadOnlySpan source, ReadOnlySpan value, bool ignoreCase, bool fromBeginning) => @@ -1038,138 +1173,109 @@ namespace System.Globalization NlsIndexOfOrdinalCore(source, value, ignoreCase, fromBeginning) : IcuIndexOfOrdinalCore(source, value, ignoreCase, fromBeginning); - internal unsafe int IndexOf(ReadOnlySpan source, ReadOnlySpan value, CompareOptions options) + internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan source, ReadOnlySpan value, bool fromBeginning) { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); - Debug.Assert(!value.IsEmpty); - return IndexOfCore(source, value, options, null, fromBeginning: true); - } - - internal unsafe int LastIndexOf(ReadOnlySpan source, ReadOnlySpan value, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); - Debug.Assert(!value.IsEmpty); - return IndexOfCore(source, value, options, null, fromBeginning: false); - } + if (value.IsEmpty) + { + // Empty target string trivially appears at all indexes of all search spaces. - /// - /// The following IndexOf overload is mainly used by String.Replace. This overload assumes the parameters are already validated - /// and the caller is passing a valid matchLengthPtr pointer. - /// - internal unsafe int IndexOf(string source, string value, int startIndex, int count, CompareOptions options, int* matchLengthPtr, bool fromBeginning = true) - { - Debug.Assert(source != null); - Debug.Assert(value != null); - Debug.Assert(startIndex >= 0); + return (fromBeginning) ? 0 : source.Length; + } - if (matchLengthPtr != null) + if (value.Length > source.Length) { - *matchLengthPtr = 0; + // A non-linguistic search compares chars directly against one another, so large + // target strings can never be found inside small search spaces. This check also + // handles empty 'source' spans. + + return -1; } - if (value.Length == 0) + if (GlobalizationMode.Invariant) { - return startIndex; + return InvariantIndexOf(source, value, ignoreCase: true, fromBeginning); } - - if (startIndex >= source.Length) + else { - return -1; + return IndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning); } + } + + /// + /// The following IndexOf overload is mainly used by String.Replace. This overload assumes the parameters are already validated + /// and the caller is passing a valid matchLengthPtr pointer. + /// + internal unsafe int IndexOf(ReadOnlySpan source, ReadOnlySpan value, int* matchLengthPtr, CompareOptions options, bool fromBeginning) + { + Debug.Assert(matchLengthPtr != null); + *matchLengthPtr = 0; - if (options == CompareOptions.OrdinalIgnoreCase) + if ((options & ValidIndexMaskOffFlags) == 0) { - int res; - if (fromBeginning) + // Common case: caller is attempting to perform a linguistic search. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + + if (!GlobalizationMode.Invariant) { - res = IndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); + if (value.IsEmpty) + { + // empty target substring trivially occurs at beginning / end of search space + return (fromBeginning) ? 0 : source.Length; + } + else + { + return IndexOfCore(source, value, options, matchLengthPtr, fromBeginning); + } } - else + else if ((options & CompareOptions.IgnoreCase) == 0) { - res = LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); + goto ReturnOrdinal; } - - if (res >= 0 && matchLengthPtr != null) + else { - *matchLengthPtr = value.Length; + goto ReturnOrdinalIgnoreCase; } - return res; } - - if (GlobalizationMode.Invariant) + else { - bool ignoreCase = (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0; - int res; + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - if (fromBeginning) + if (options == CompareOptions.Ordinal) { - res = IndexOfOrdinal(source, value, startIndex, count, ignoreCase); + goto ReturnOrdinal; } - else + else if (options == CompareOptions.OrdinalIgnoreCase) { - res = LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase); + goto ReturnOrdinalIgnoreCase; } - - if (res >= 0 && matchLengthPtr != null) + else { - *matchLengthPtr = value.Length; + ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); } - return res; } - if (options == CompareOptions.Ordinal) - { - int retValue; + ReturnOrdinal: + int retVal = (fromBeginning) ? source.IndexOf(value) : source.LastIndexOf(value); + goto OrdinalReturn; - if (fromBeginning) - { - retValue = SpanHelpers.IndexOf( - ref Unsafe.Add(ref source.GetRawStringData(), startIndex), - count, - ref value.GetRawStringData(), - value.Length); - } - else - { - retValue = SpanHelpers.LastIndexOf( - ref Unsafe.Add(ref source.GetRawStringData(), startIndex), - count, - ref value.GetRawStringData(), - value.Length); - } + ReturnOrdinalIgnoreCase: + retVal = IndexOfOrdinalIgnoreCase(source, value, fromBeginning); + goto OrdinalReturn; - if (retValue >= 0) - { - retValue += startIndex; - if (matchLengthPtr != null) - { - *matchLengthPtr = value.Length; - } - } + OrdinalReturn: + // Both Ordinal and OrdinalIgnoreCase match by individual code points in a non-linguistic manner. + // Non-BMP code points will never match BMP code points, so given UTF-16 inputs the match length + // will always be equivalent to the target string length. - return retValue; - } - else + if (retVal >= 0) { - if (fromBeginning) - { - // Call the string-based overload, as it special-cases IsFastSort as a perf optimization. - return IndexOfCore(source, value, startIndex, count, options, matchLengthPtr); - } - else - { - return IndexOfCore(source.AsSpan(startIndex, count), value, options, matchLengthPtr, fromBeginning: false); - } + *matchLengthPtr = value.Length; } + return retVal; } - private unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr) => - GlobalizationMode.UseNls ? - NlsIndexOfCore(source, target, startIndex, count, options, matchLengthPtr) : - IcuIndexOfCore(source, target, startIndex, count, options, matchLengthPtr); - private unsafe int IndexOfCore(ReadOnlySpan source, ReadOnlySpan target, CompareOptions options, int* matchLengthPtr, bool fromBeginning) => GlobalizationMode.UseNls ? NlsIndexOfCore(source, target, options, matchLengthPtr, fromBeginning) : @@ -1229,51 +1335,40 @@ namespace System.Globalization /// the specified value is not found. If value equals string.Empty, /// endIndex is returned. Throws IndexOutOfRange if startIndex or /// endIndex is less than zero or greater than the length of string. - /// Throws ArgumentException if value is null. + /// Throws ArgumentException if value (as a string) is null. /// public int LastIndexOf(string source, char value) { - if (source == null) - { - throw new ArgumentNullException(nameof(source)); - } - - // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, source.Length, CompareOptions.None); + return LastIndexOf(source, value, CompareOptions.None); } public int LastIndexOf(string source, string value) { - if (source == null) - { - throw new ArgumentNullException(nameof(source)); - } - - // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, - source.Length, CompareOptions.None); + return LastIndexOf(source, value, CompareOptions.None); } public int LastIndexOf(string source, char value, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } - // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, source.Length, options); + return LastIndexOf(source, MemoryMarshal.CreateReadOnlySpan(ref value, 1), options); } public int LastIndexOf(string source, string value, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); + } + if (value == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); } - // Can't start at negative index, so make sure we check for the length == 0 case. - return LastIndexOf(source, value, source.Length - 1, source.Length, options); + return LastIndexOf(source.AsSpan(), value.AsSpan(), options); } public int LastIndexOf(string source, char value, int startIndex) @@ -1310,125 +1405,206 @@ namespace System.Globalization { if (source == null) { - throw new ArgumentNullException(nameof(source)); - } - // Validate CompareOptions - // Ordinal can't be selected with other flags - if ((options & ValidIndexMaskOffFlags) != 0 && - (options != CompareOptions.Ordinal) && - (options != CompareOptions.OrdinalIgnoreCase)) - { - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } - // Special case for 0 length input strings - if (source.Length == 0 && (startIndex == -1 || startIndex == 0)) - { - return -1; - } + TryAgain: - // Make sure we're not out of range - if (startIndex < 0 || startIndex > source.Length) - { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - } + // Previous versions of the Framework special-cased empty 'source' to allow startIndex = -1 or startIndex = 0, + // ignoring 'count' and short-circuiting the entire operation. We'll silently fix up the 'count' parameter + // if this occurs. + // + // See the comments just before string.IndexOf(string) for more information on how these computations are + // performed. - // Make sure that we allow startIndex == source.Length - if (startIndex == source.Length) + if ((uint)startIndex >= (uint)source.Length) { - startIndex--; - if (count > 0) + if (startIndex == -1 && source.Length == 0) { - count--; + count = 0; // normalize } - } + else if (startIndex == source.Length) + { + // The caller likely had an off-by-one error when invoking the API. The Framework has historically + // allowed for this and tried to fix up the parameters, so we'll continue to do so for compat. - // 2nd have of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0. - if (count < 0 || startIndex - count + 1 < 0) - { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + startIndex--; + if (count > 0) + { + count--; + } + + goto TryAgain; // guaranteed never to loop more than once + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } } - if (options == CompareOptions.OrdinalIgnoreCase) + startIndex = startIndex - count + 1; // this will be the actual index where we begin our search + + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) { - return source.LastIndexOf(value.ToString(), startIndex, count, StringComparison.OrdinalIgnoreCase); + ThrowHelper.ThrowCountArgumentOutOfRange_ArgumentOutOfRange_Count(); } - if (GlobalizationMode.Invariant) + int retVal = LastIndexOf(sourceSpan, MemoryMarshal.CreateReadOnlySpan(ref value, 1), options); + if (retVal >= 0) { - return InvariantLastIndexOf(source, char.ToString(value), startIndex, count, (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); + retVal += startIndex; } - - return LastIndexOfCore(source, value.ToString(), startIndex, count, options); + return retVal; } public int LastIndexOf(string source, string value, int startIndex, int count, CompareOptions options) { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } if (value == null) { - throw new ArgumentNullException(nameof(value)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); } - // Validate CompareOptions - // Ordinal can't be selected with other flags - if ((options & ValidIndexMaskOffFlags) != 0 && - (options != CompareOptions.Ordinal) && - (options != CompareOptions.OrdinalIgnoreCase)) - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + TryAgain: + + // Previous versions of the Framework special-cased empty 'source' to allow startIndex = -1 or startIndex = 0, + // ignoring 'count' and short-circuiting the entire operation. We'll silently fix up the 'count' parameter + // if this occurs. + // + // See the comments just before string.IndexOf(string) for more information on how these computations are + // performed. - // Special case for 0 length input strings - if (source.Length == 0 && (startIndex == -1 || startIndex == 0)) + if ((uint)startIndex >= (uint)source.Length) { - return (value.Length == 0) ? 0 : -1; + if (startIndex == -1 && source.Length == 0) + { + count = 0; // normalize + } + else if (startIndex == source.Length) + { + // The caller likely had an off-by-one error when invoking the API. The Framework has historically + // allowed for this and tried to fix up the parameters, so we'll continue to do so for compat. + + startIndex--; + if (count > 0) + { + count--; + } + + goto TryAgain; // guaranteed never to loop more than once + } + else + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex, ExceptionResource.ArgumentOutOfRange_Index); + } } - // Make sure we're not out of range - if (startIndex < 0 || startIndex > source.Length) + startIndex = startIndex - count + 1; // this will be the actual index where we begin our search + + if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan sourceSpan)) { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + ThrowHelper.ThrowCountArgumentOutOfRange_ArgumentOutOfRange_Count(); } - // Make sure that we allow startIndex == source.Length - if (startIndex == source.Length) + int retVal = LastIndexOf(sourceSpan, value, options); + if (retVal >= 0) { - startIndex--; - if (count > 0) + retVal += startIndex; + } + return retVal; + } + + /// + /// Searches for the last occurrence of a substring within a source string. + /// + /// The string to search within. + /// The substring to locate within . + /// The to use during the search. + /// + /// The zero-based index into where the substring + /// last appears; or -1 if cannot be found within . + /// + /// + /// contains an unsupported combination of flags. + /// + public unsafe int LastIndexOf(ReadOnlySpan source, ReadOnlySpan value, CompareOptions options = CompareOptions.None) + { + if ((options & ValidIndexMaskOffFlags) == 0) + { + // Common case: caller is attempting to perform a linguistic search. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + + if (!GlobalizationMode.Invariant) { - count--; + if (value.IsEmpty) + { + return source.Length; // Empty target string trivially occurs at the last index of every search space. + } + else + { + return IndexOfCore(source, value, options, null /* matchLengthPtr */, fromBeginning: false); + } } - - // empty substrings trivially occur at the end of the search space - if (value.Length == 0 && count >= 0 && startIndex - count + 1 >= 0) + else if ((options & CompareOptions.IgnoreCase) == 0) { - return startIndex + 1; + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; } } - - // 2nd half of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0. - if (count < 0 || startIndex - count + 1 < 0) + else { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - } + // Less common case: caller is attempting to perform non-linguistic comparison, + // or an invalid combination of flags was supplied. - if (options == CompareOptions.OrdinalIgnoreCase) - { - return LastIndexOfOrdinal(source, value, startIndex, count, ignoreCase: true); + if (options == CompareOptions.Ordinal) + { + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + throw new ArgumentException( + paramName: nameof(options), + message: SR.Argument_InvalidFlag); + } } - if (GlobalizationMode.Invariant) - return InvariantLastIndexOf(source, value, startIndex, count, (options & (CompareOptions.IgnoreCase | CompareOptions.OrdinalIgnoreCase)) != 0); + ReturnOrdinal: + return source.LastIndexOf(value); - return LastIndexOfCore(source, value, startIndex, count, options); + ReturnOrdinalIgnoreCase: + return IndexOfOrdinalIgnoreCase(source, value, fromBeginning: false); } - private int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options) => - GlobalizationMode.UseNls ? - NlsLastIndexOfCore(source, target, startIndex, count, options) : - IcuLastIndexOfCore(source, target, startIndex, count, options); + /// + /// Searches for the last occurrence of a within a source string. + /// + /// The string to search within. + /// The to locate within . + /// The to use during the search. + /// + /// The zero-based index into where + /// last appears; or -1 if cannot be found within . + /// + /// + /// contains an unsupported combination of flags. + /// + public unsafe int LastIndexOf(ReadOnlySpan source, Rune value, CompareOptions options = CompareOptions.None) + { + Span valueAsUtf16 = stackalloc char[Rune.MaxUtf16CharsPerRune]; + int charCount = value.EncodeToUtf16(valueAsUtf16); + return LastIndexOf(source, valueAsUtf16.Slice(0, charCount), options); + } internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase) { @@ -1488,6 +1664,78 @@ namespace System.Globalization NlsCreateSortKey(source, options) : IcuCreateSortKey(source, options); + /// + /// Computes a sort key over the specified input. + /// + /// The text over which to compute the sort key. + /// The buffer into which to write the resulting sort key bytes. + /// The used for computing the sort key. + /// The number of bytes written to . + /// + /// Use to query the required size of . + /// It is acceptable to provide a larger-than-necessary output buffer to this method. + /// + /// + /// is too small to contain the resulting sort key; + /// or contains an unsupported flag; + /// or cannot be processed using the desired + /// under the current . + /// + public int GetSortKey(ReadOnlySpan source, Span destination, CompareOptions options = CompareOptions.None) + { + if ((options & ValidCompareMaskOffFlags) != 0) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); + } + + if (GlobalizationMode.Invariant) + { + return InvariantGetSortKey(source, destination, options); + } + else + { + return GetSortKeyCore(source, destination, options); + } + } + + private int GetSortKeyCore(ReadOnlySpan source, Span destination, CompareOptions options) => + GlobalizationMode.UseNls ? + NlsGetSortKey(source, destination, options) : + IcuGetSortKey(source, destination, options); + + /// + /// Returns the length (in bytes) of the sort key that would be produced from the specified input. + /// + /// The text over which to compute the sort key. + /// The used for computing the sort key. + /// The length (in bytes) of the sort key. + /// + /// contains an unsupported flag; + /// or cannot be processed using the desired + /// under the current . + /// + public int GetSortKeyLength(ReadOnlySpan source, CompareOptions options = CompareOptions.None) + { + if ((options & ValidCompareMaskOffFlags) != 0) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidFlag, ExceptionArgument.options); + } + + if (GlobalizationMode.Invariant) + { + return InvariantGetSortKeyLength(source, options); + } + else + { + return GetSortKeyLengthCore(source, options); + } + } + + private int GetSortKeyLengthCore(ReadOnlySpan source, CompareOptions options) => + GlobalizationMode.UseNls ? + NlsGetSortKeyLength(source, options) : + IcuGetSortKeyLength(source, options); + public override bool Equals(object? value) { return value is CompareInfo otherCompareInfo @@ -1509,62 +1757,57 @@ namespace System.Globalization { if (source == null) { - throw new ArgumentNullException(nameof(source)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source); } - if ((options & ValidCompareMaskOffFlags) == 0) - { - // No unsupported flags are set - continue on with the regular logic - if (GlobalizationMode.Invariant) - { - return ((options & CompareOptions.IgnoreCase) != 0) ? source.GetHashCodeOrdinalIgnoreCase() : source.GetHashCode(); - } - return GetHashCodeOfStringCore(source, options); - } - else if (options == CompareOptions.Ordinal) - { - // We allow Ordinal in isolation - return source.GetHashCode(); - } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - // We allow OrdinalIgnoreCase in isolation - return source.GetHashCodeOrdinalIgnoreCase(); - } - else - { - // Unsupported combination of flags specified - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); - } + return GetHashCode(source.AsSpan(), options); } public int GetHashCode(ReadOnlySpan source, CompareOptions options) { if ((options & ValidCompareMaskOffFlags) == 0) { - // No unsupported flags are set - continue on with the regular logic - if (GlobalizationMode.Invariant) + // Common case: caller is attempting to get a linguistic sort key. + // Pass the flags down to NLS or ICU unless we're running in invariant + // mode, at which point we normalize the flags to Orginal[IgnoreCase]. + + if (!GlobalizationMode.Invariant) { - return ((options & CompareOptions.IgnoreCase) != 0) ? string.GetHashCodeOrdinalIgnoreCase(source) : string.GetHashCode(source); + return GetHashCodeOfStringCore(source, options); + } + else if ((options & CompareOptions.IgnoreCase) == 0) + { + goto ReturnOrdinal; + } + else + { + goto ReturnOrdinalIgnoreCase; } - - return GetHashCodeOfStringCore(source, options); - } - else if (options == CompareOptions.Ordinal) - { - // We allow Ordinal in isolation - return string.GetHashCode(source); - } - else if (options == CompareOptions.OrdinalIgnoreCase) - { - // We allow OrdinalIgnoreCase in isolation - return string.GetHashCodeOrdinalIgnoreCase(source); } else { - // Unsupported combination of flags specified - throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); + // Less common case: caller is attempting to get a non-linguistic sort key, + // or an invalid combination of flags was supplied. + + if (options == CompareOptions.Ordinal) + { + goto ReturnOrdinal; + } + else if (options == CompareOptions.OrdinalIgnoreCase) + { + goto ReturnOrdinalIgnoreCase; + } + else + { + ThrowCompareOptionsCheckFailed(options); + } } + + ReturnOrdinal: + return string.GetHashCode(source); + + ReturnOrdinalIgnoreCase: + return string.GetHashCodeOrdinalIgnoreCase(source); } private unsafe int GetHashCodeOfStringCore(ReadOnlySpan source, CompareOptions options) => diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs index fb339f7..4974928 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs @@ -7,6 +7,7 @@ using System.Globalization; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; +using Internal.Runtime.CompilerServices; namespace System { @@ -50,26 +51,20 @@ namespace System switch (comparisonType) { case StringComparison.CurrentCulture: - return CultureInfo.CurrentCulture.CompareInfo.CompareOptionNone(span, other) == 0; - case StringComparison.CurrentCultureIgnoreCase: - return CultureInfo.CurrentCulture.CompareInfo.CompareOptionIgnoreCase(span, other) == 0; + return CultureInfo.CurrentCulture.CompareInfo.Compare(span, other, string.GetCaseCompareOfComparisonCulture(comparisonType)) == 0; case StringComparison.InvariantCulture: - return CompareInfo.Invariant.CompareOptionNone(span, other) == 0; - case StringComparison.InvariantCultureIgnoreCase: - return CompareInfo.Invariant.CompareOptionIgnoreCase(span, other) == 0; + return CompareInfo.Invariant.Compare(span, other, string.GetCaseCompareOfComparisonCulture(comparisonType)) == 0; case StringComparison.Ordinal: return EqualsOrdinal(span, other); - case StringComparison.OrdinalIgnoreCase: + default: + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); return EqualsOrdinalIgnoreCase(span, other); } - - Debug.Fail("StringComparison outside range"); - return false; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -106,28 +101,22 @@ namespace System switch (comparisonType) { case StringComparison.CurrentCulture: - return CultureInfo.CurrentCulture.CompareInfo.CompareOptionNone(span, other); - case StringComparison.CurrentCultureIgnoreCase: - return CultureInfo.CurrentCulture.CompareInfo.CompareOptionIgnoreCase(span, other); + return CultureInfo.CurrentCulture.CompareInfo.Compare(span, other, string.GetCaseCompareOfComparisonCulture(comparisonType)); case StringComparison.InvariantCulture: - return CompareInfo.Invariant.CompareOptionNone(span, other); - case StringComparison.InvariantCultureIgnoreCase: - return CompareInfo.Invariant.CompareOptionIgnoreCase(span, other); + return CompareInfo.Invariant.Compare(span, other, string.GetCaseCompareOfComparisonCulture(comparisonType)); case StringComparison.Ordinal: if (span.Length == 0 || other.Length == 0) return span.Length - other.Length; return string.CompareOrdinal(span, other); - case StringComparison.OrdinalIgnoreCase: + default: + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); return CompareInfo.CompareOrdinalIgnoreCase(span, other); } - - Debug.Fail("StringComparison outside range"); - return 0; } /// @@ -140,16 +129,6 @@ namespace System { string.CheckStringComparison(comparisonType); - if (value.Length == 0) - { - return 0; // empty substring trivially occurs at every index (including start) of search space - } - - if (span.Length == 0) - { - return -1; - } - if (comparisonType == StringComparison.Ordinal) { return SpanHelpers.IndexOf( @@ -159,11 +138,6 @@ namespace System value.Length); } - if (GlobalizationMode.Invariant) - { - return CompareInfo.InvariantIndexOf(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None); - } - switch (comparisonType) { case StringComparison.CurrentCulture: @@ -176,7 +150,7 @@ namespace System default: Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); - return CompareInfo.Invariant.IndexOfOrdinalIgnoreCase(span, value); + return CompareInfo.IndexOfOrdinalIgnoreCase(span, value, fromBeginning: true); } } @@ -190,19 +164,13 @@ namespace System { string.CheckStringComparison(comparisonType); - if (value.Length == 0) - { - return span.Length; // empty substring trivially occurs at every index (including end) of search space - } - - if (span.Length == 0) - { - return -1; - } - - if (GlobalizationMode.Invariant) + if (comparisonType == StringComparison.Ordinal) { - return CompareInfo.InvariantIndexOf(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None, fromBeginning: false); + return SpanHelpers.LastIndexOf( + ref MemoryMarshal.GetReference(span), + span.Length, + ref MemoryMarshal.GetReference(value), + value.Length); } switch (comparisonType) @@ -216,8 +184,8 @@ namespace System return CompareInfo.Invariant.LastIndexOf(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); default: - Debug.Assert(comparisonType == StringComparison.Ordinal || comparisonType == StringComparison.OrdinalIgnoreCase); - return CompareInfo.Invariant.LastIndexOfOrdinal(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None); + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); + return CompareInfo.IndexOfOrdinalIgnoreCase(span, value, fromBeginning: false); } } @@ -335,29 +303,33 @@ namespace System { string.CheckStringComparison(comparisonType); - if (value.Length == 0) + switch (comparisonType) { - return true; // the empty string is trivially a suffix of every other string - } + case StringComparison.CurrentCulture: + case StringComparison.CurrentCultureIgnoreCase: + return CultureInfo.CurrentCulture.CompareInfo.IsSuffix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); - if (comparisonType >= StringComparison.Ordinal || GlobalizationMode.Invariant) - { - if (string.GetCaseCompareOfComparisonCulture(comparisonType) == CompareOptions.None) - return span.EndsWith(value); + case StringComparison.InvariantCulture: + case StringComparison.InvariantCultureIgnoreCase: + return CompareInfo.Invariant.IsSuffix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); - return (span.Length >= value.Length) ? (CompareInfo.CompareOrdinalIgnoreCase(span.Slice(span.Length - value.Length), value) == 0) : false; - } + case StringComparison.Ordinal: + return span.EndsWith(value); - if (span.Length == 0) - { - return false; + default: + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); + return span.EndsWithOrdinalIgnoreCase(value); } - - return (comparisonType >= StringComparison.InvariantCulture) ? - CompareInfo.Invariant.IsSuffix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)) : - CultureInfo.CurrentCulture.CompareInfo.IsSuffix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool EndsWithOrdinalIgnoreCase(this ReadOnlySpan span, ReadOnlySpan value) + => value.Length <= span.Length + && CompareInfo.EqualsOrdinalIgnoreCase( + ref Unsafe.Add(ref MemoryMarshal.GetReference(span), span.Length - value.Length), + ref MemoryMarshal.GetReference(value), + value.Length); + /// /// Determines whether the beginning of the matches the specified when compared using the specified option. /// @@ -368,29 +340,30 @@ namespace System { string.CheckStringComparison(comparisonType); - if (value.Length == 0) + switch (comparisonType) { - return true; // the empty string is trivially a prefix of every other string - } + case StringComparison.CurrentCulture: + case StringComparison.CurrentCultureIgnoreCase: + return CultureInfo.CurrentCulture.CompareInfo.IsPrefix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); - if (comparisonType >= StringComparison.Ordinal || GlobalizationMode.Invariant) - { - if (string.GetCaseCompareOfComparisonCulture(comparisonType) == CompareOptions.None) - return span.StartsWith(value); + case StringComparison.InvariantCulture: + case StringComparison.InvariantCultureIgnoreCase: + return CompareInfo.Invariant.IsPrefix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); - return (span.Length >= value.Length) ? (CompareInfo.CompareOrdinalIgnoreCase(span.Slice(0, value.Length), value) == 0) : false; - } + case StringComparison.Ordinal: + return span.StartsWith(value); - if (span.Length == 0) - { - return false; + default: + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); + return span.StartsWithOrdinalIgnoreCase(value); } - - return (comparisonType >= StringComparison.InvariantCulture) ? - CompareInfo.Invariant.IsPrefix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)) : - CultureInfo.CurrentCulture.CompareInfo.IsPrefix(span, value, string.GetCaseCompareOfComparisonCulture(comparisonType)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool StartsWithOrdinalIgnoreCase(this ReadOnlySpan span, ReadOnlySpan value) + => value.Length <= span.Length + && CompareInfo.EqualsOrdinalIgnoreCase(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(value), value.Length); + /// /// Returns an enumeration of from the provided span. /// diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index a15d9b9..2fe2781 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -968,7 +968,7 @@ namespace System public string Replace(string oldValue, string? newValue, bool ignoreCase, CultureInfo? culture) { - return ReplaceCore(oldValue, newValue, culture, ignoreCase ? CompareOptions.IgnoreCase : CompareOptions.None); + return ReplaceCore(oldValue, newValue, culture?.CompareInfo, ignoreCase ? CompareOptions.IgnoreCase : CompareOptions.None); } public string Replace(string oldValue, string? newValue, StringComparison comparisonType) @@ -977,78 +977,92 @@ namespace System { case StringComparison.CurrentCulture: case StringComparison.CurrentCultureIgnoreCase: - return ReplaceCore(oldValue, newValue, CultureInfo.CurrentCulture, GetCaseCompareOfComparisonCulture(comparisonType)); + return ReplaceCore(oldValue, newValue, CultureInfo.CurrentCulture.CompareInfo, GetCaseCompareOfComparisonCulture(comparisonType)); case StringComparison.InvariantCulture: case StringComparison.InvariantCultureIgnoreCase: - return ReplaceCore(oldValue, newValue, CultureInfo.InvariantCulture, GetCaseCompareOfComparisonCulture(comparisonType)); + return ReplaceCore(oldValue, newValue, CompareInfo.Invariant, GetCaseCompareOfComparisonCulture(comparisonType)); case StringComparison.Ordinal: return Replace(oldValue, newValue); case StringComparison.OrdinalIgnoreCase: - return ReplaceCore(oldValue, newValue, CultureInfo.InvariantCulture, CompareOptions.OrdinalIgnoreCase); + return ReplaceCore(oldValue, newValue, CompareInfo.Invariant, CompareOptions.OrdinalIgnoreCase); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); } } - private unsafe string ReplaceCore(string oldValue, string? newValue, CultureInfo? culture, CompareOptions options) + private string ReplaceCore(string oldValue, string? newValue, CompareInfo? ci, CompareOptions options) { - if (oldValue == null) + if (oldValue is null) + { throw new ArgumentNullException(nameof(oldValue)); + } + if (oldValue.Length == 0) + { throw new ArgumentException(SR.Argument_StringZeroLength, nameof(oldValue)); + } // If they asked to replace oldValue with a null, replace all occurrences - // with the empty string. - newValue ??= string.Empty; + // with the empty string. AsSpan() will normalize appropriately. + // + // If inner ReplaceCore method returns null, it means no substitutions were + // performed, so as an optimization we'll return the original string. - CultureInfo referenceCulture = culture ?? CultureInfo.CurrentCulture; - var result = new ValueStringBuilder(stackalloc char[256]); - result.EnsureCapacity(this.Length); + return ReplaceCore(this, oldValue.AsSpan(), newValue.AsSpan(), ci ?? CultureInfo.CurrentCulture.CompareInfo, options) + ?? this; + } - int startIndex = 0; - int index = 0; + private static unsafe string? ReplaceCore(ReadOnlySpan searchSpace, ReadOnlySpan oldValue, ReadOnlySpan newValue, CompareInfo compareInfo, CompareOptions options) + { + Debug.Assert(!oldValue.IsEmpty); + Debug.Assert(compareInfo != null); - int matchLength = 0; + var result = new ValueStringBuilder(stackalloc char[256]); + result.EnsureCapacity(searchSpace.Length); + int matchLength = 0; bool hasDoneAnyReplacements = false; - CompareInfo ci = referenceCulture.CompareInfo; - do + while (true) { - index = ci.IndexOf(this, oldValue, startIndex, this.Length - startIndex, options, &matchLength); + int index = compareInfo.IndexOf(searchSpace, oldValue, &matchLength, options, fromBeginning: true); // There's the possibility that 'oldValue' has zero collation weight (empty string equivalent). // If this is the case, we behave as if there are no more substitutions to be made. - if (index >= 0 && matchLength > 0) + if (index < 0 || matchLength == 0) { - // append the unmodified portion of string - result.Append(this.AsSpan(startIndex, index - startIndex)); + break; + } - // append the replacement - result.Append(newValue); + // append the unmodified portion of search space + result.Append(searchSpace.Slice(0, index)); - startIndex = index + matchLength; - hasDoneAnyReplacements = true; - } - else if (!hasDoneAnyReplacements) - { - // small optimization, - // if we have not done any replacements, - // we will return the original string - result.Dispose(); - return this; - } - else - { - result.Append(this.AsSpan(startIndex, this.Length - startIndex)); - } - } while (index >= 0); + // append the replacement + result.Append(newValue); + + searchSpace = searchSpace.Slice(index + matchLength); + hasDoneAnyReplacements = true; + } + + // Didn't find 'oldValue' in the remaining search space, or the match + // consisted only of zero collation weight characters. As an optimization, + // if we have not yet performed any replacements, we'll save the + // allocation. + + if (!hasDoneAnyReplacements) + { + result.Dispose(); + return null; + } + + // Append what remains of the search space, then allocate the new string. + result.Append(searchSpace); return result.ToString(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs index 93de7fa..77162ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs @@ -310,16 +310,6 @@ namespace System public int IndexOf(string value, int startIndex, int count) { - if (startIndex < 0 || startIndex > this.Length) - { - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - } - - if (count < 0 || count > this.Length - startIndex) - { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - } - return IndexOf(value, startIndex, count, StringComparison.CurrentCulture); } @@ -335,26 +325,7 @@ namespace System public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType) { - // Validate inputs - if (value == null) - throw new ArgumentNullException(nameof(value)); - - if (startIndex < 0 || startIndex > this.Length) - throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - - if (count < 0 || startIndex > this.Length - count) - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - - if (comparisonType == StringComparison.Ordinal) - { - int result = SpanHelpers.IndexOf( - ref Unsafe.Add(ref this._firstChar, startIndex), - count, - ref value._firstChar, - value.Length); - - return (result >= 0 ? startIndex : 0) + result; - } + // Parameter checking will be done by CompareInfo.IndexOf. switch (comparisonType) { @@ -366,11 +337,14 @@ namespace System case StringComparison.InvariantCultureIgnoreCase: return CompareInfo.Invariant.IndexOf(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType)); + case StringComparison.Ordinal: case StringComparison.OrdinalIgnoreCase: - return CompareInfo.IndexOfOrdinal(this, value, startIndex, count, GetCaseCompareOfComparisonCulture(comparisonType) != CompareOptions.None); + return CompareInfo.Invariant.IndexOf(this, value, startIndex, count, GetCompareOptionsFromOrdinalStringComparison(comparisonType)); default: - throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); + throw (value is null) + ? new ArgumentNullException(nameof(value)) + : new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); } } @@ -498,11 +472,6 @@ namespace System public int LastIndexOf(string value, int startIndex, int count) { - if (count < 0) - { - throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - } - return LastIndexOf(value, startIndex, count, StringComparison.CurrentCulture); } diff --git a/src/libraries/System.Private.CoreLib/src/System/String.cs b/src/libraries/System.Private.CoreLib/src/System/String.cs index f29d327..00582d5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.cs @@ -378,6 +378,28 @@ namespace System public static implicit operator ReadOnlySpan(string? value) => value != null ? new ReadOnlySpan(ref value.GetRawStringData(), value.Length) : default; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool TryGetSpan(int startIndex, int count, out ReadOnlySpan slice) + { +#if TARGET_64BIT + // See comment in Span.Slice for how this works. + if ((ulong)(uint)startIndex + (ulong)(uint)count > (ulong)(uint)Length) + { + slice = default; + return false; + } +#else + if ((uint)startIndex > (uint)Length || (uint)count > (uint)(Length - startIndex)) + { + slice = default; + return false; + } +#endif + + slice = new ReadOnlySpan(ref Unsafe.Add(ref _firstChar, startIndex), count); + return true; + } + public object Clone() { return this; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index 3a703d6..b2cbe18 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -20,6 +20,9 @@ namespace System.Text [DebuggerDisplay("{DebuggerDisplay,nq}")] public readonly struct Rune : IComparable, IEquatable { + internal const int MaxUtf16CharsPerRune = 2; // supplementary plane code points are encoded as 2 UTF-16 code units + internal const int MaxUtf8BytesPerRune = 4; // supplementary plane code points are encoded as 4 UTF-8 code units + private const char HighSurrogateStart = '\ud800'; private const char LowSurrogateStart = '\udc00'; private const int HighSurrogateRange = 0x3FF; @@ -163,7 +166,15 @@ namespace System.Text /// /// The return value will be 1 or 2. /// - public int Utf16SequenceLength => UnicodeUtility.GetUtf16SequenceLength(_value); + public int Utf16SequenceLength + { + get + { + int codeUnitCount = UnicodeUtility.GetUtf16SequenceLength(_value); + Debug.Assert(codeUnitCount > 0 && codeUnitCount <= MaxUtf16CharsPerRune); + return codeUnitCount; + } + } /// /// Returns the length in code units of the @@ -172,7 +183,15 @@ namespace System.Text /// /// The return value will be 1 through 4, inclusive. /// - public int Utf8SequenceLength => UnicodeUtility.GetUtf8SequenceLength(_value); + public int Utf8SequenceLength + { + get + { + int codeUnitCount = UnicodeUtility.GetUtf8SequenceLength(_value); + Debug.Assert(codeUnitCount > 0 && codeUnitCount <= MaxUtf8BytesPerRune); + return codeUnitCount; + } + } /// /// Returns the Unicode scalar value as an integer. @@ -185,8 +204,8 @@ namespace System.Text Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(textInfo != null, "This should've been checked by the caller."); - Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) - Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count + Span original = stackalloc char[MaxUtf16CharsPerRune]; + Span modified = stackalloc char[MaxUtf16CharsPerRune]; int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); @@ -220,8 +239,8 @@ namespace System.Text Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(culture != null, "This should've been checked by the caller."); - Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) - Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count + Span original = stackalloc char[MaxUtf16CharsPerRune]; // worst case scenario = 2 code units (for a surrogate pair) + Span modified = stackalloc char[MaxUtf16CharsPerRune]; // case change should preserve UTF-16 code unit count int charCount = rune.EncodeToUtf16(original); original = original.Slice(0, charCount); @@ -885,7 +904,7 @@ namespace System.Text } else { - Span buffer = stackalloc char[2]; + Span buffer = stackalloc char[MaxUtf16CharsPerRune]; UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out buffer[0], out buffer[1]); return buffer.ToString(); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs index ea295df..cfaaaf8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs @@ -281,7 +281,7 @@ namespace System.Text } else { - idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions, &matchLength, fromBeginning); + idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, &matchLength, compareOptions, fromBeginning); } #else Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); diff --git a/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs b/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs index cf01b03..30a11b6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs @@ -694,6 +694,12 @@ namespace System return "codePoint"; case ExceptionArgument.str: return "str"; + case ExceptionArgument.options: + return "options"; + case ExceptionArgument.prefix: + return "prefix"; + case ExceptionArgument.suffix: + return "suffix"; default: Debug.Fail("The enum value is not defined, please check the ExceptionArgument Enum."); return ""; @@ -846,6 +852,8 @@ namespace System return SR.Arg_TypeNotSupported; case ExceptionResource.Argument_SpansMustHaveSameLength: return SR.Argument_SpansMustHaveSameLength; + case ExceptionResource.Argument_InvalidFlag: + return SR.Argument_InvalidFlag; default: Debug.Fail("The enum value is not defined, please check the ExceptionResource Enum."); return ""; @@ -945,6 +953,9 @@ namespace System year, codePoint, str, + options, + prefix, + suffix, } // @@ -1017,5 +1028,6 @@ namespace System Rank_MultiDimNotSupported, Arg_TypeNotSupported, Argument_SpansMustHaveSameLength, + Argument_InvalidFlag, } } diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 3cea060..fa69202 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -5764,6 +5764,7 @@ namespace System.Globalization public int LCID { get { throw null; } } public string Name { get { throw null; } } public System.Globalization.SortVersion Version { get { throw null; } } + public int Compare(System.ReadOnlySpan string1, System.ReadOnlySpan string2, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } public int Compare(string? string1, int offset1, int length1, string? string2, int offset2, int length2) { throw null; } public int Compare(string? string1, int offset1, int length1, string? string2, int offset2, int length2, System.Globalization.CompareOptions options) { throw null; } public int Compare(string? string1, int offset1, string? string2, int offset2) { throw null; } @@ -5780,6 +5781,8 @@ namespace System.Globalization public int GetHashCode(string source, System.Globalization.CompareOptions options) { throw null; } public System.Globalization.SortKey GetSortKey(string source) { throw null; } public System.Globalization.SortKey GetSortKey(string source, System.Globalization.CompareOptions options) { throw null; } + public int GetSortKey(System.ReadOnlySpan source, System.Span destination, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } + public int GetSortKeyLength(System.ReadOnlySpan source, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } public int IndexOf(string source, char value) { throw null; } public int IndexOf(string source, char value, System.Globalization.CompareOptions options) { throw null; } public int IndexOf(string source, char value, int startIndex) { throw null; } @@ -5792,12 +5795,18 @@ namespace System.Globalization public int IndexOf(string source, string value, int startIndex, System.Globalization.CompareOptions options) { throw null; } public int IndexOf(string source, string value, int startIndex, int count) { throw null; } public int IndexOf(string source, string value, int startIndex, int count, System.Globalization.CompareOptions options) { throw null; } + public int IndexOf(System.ReadOnlySpan source, System.ReadOnlySpan value, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } + public int IndexOf(System.ReadOnlySpan source, System.Text.Rune value, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } public bool IsPrefix(string source, string prefix) { throw null; } public bool IsPrefix(string source, string prefix, System.Globalization.CompareOptions options) { throw null; } + public bool IsPrefix(System.ReadOnlySpan source, System.ReadOnlySpan prefix, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } public static bool IsSortable(char ch) { throw null; } + public static bool IsSortable(System.ReadOnlySpan text) { throw null; } public static bool IsSortable(string text) { throw null; } + public static bool IsSortable(System.Text.Rune value) { throw null; } public bool IsSuffix(string source, string suffix) { throw null; } public bool IsSuffix(string source, string suffix, System.Globalization.CompareOptions options) { throw null; } + public bool IsSuffix(System.ReadOnlySpan source, System.ReadOnlySpan suffix, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } public int LastIndexOf(string source, char value) { throw null; } public int LastIndexOf(string source, char value, System.Globalization.CompareOptions options) { throw null; } public int LastIndexOf(string source, char value, int startIndex) { throw null; } @@ -5810,6 +5819,8 @@ namespace System.Globalization public int LastIndexOf(string source, string value, int startIndex, System.Globalization.CompareOptions options) { throw null; } public int LastIndexOf(string source, string value, int startIndex, int count) { throw null; } public int LastIndexOf(string source, string value, int startIndex, int count, System.Globalization.CompareOptions options) { throw null; } + public int LastIndexOf(System.ReadOnlySpan source, System.ReadOnlySpan value, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } + public int LastIndexOf(System.ReadOnlySpan source, System.Text.Rune value, System.Globalization.CompareOptions options = System.Globalization.CompareOptions.None) { throw null; } void System.Runtime.Serialization.IDeserializationCallback.OnDeserialization(object sender) { } public override string ToString() { throw null; } }