From 480acec5f17e7022048fce678d1c51ec5c6bc554 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 27 Dec 2019 13:52:14 -0500 Subject: [PATCH] Remove early exit logic in GetSortKey-related code paths (#1078) --- .../tests/CompareInfo/CompareInfoTests.cs | 21 ++++--- .../tests/Invariant/InvariantMode.cs | 12 ++++ .../src/System/Globalization/CompareInfo.Unix.cs | 30 +++------- .../System/Globalization/CompareInfo.Windows.cs | 68 ++++++++++++---------- 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs index 6e6f67e..82e0e55 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs @@ -55,6 +55,7 @@ namespace System.Globalization.Tests new object[] { "abc", CompareOptions.Ordinal, "ABC", CompareOptions.Ordinal, false }, new object[] { "abc", CompareOptions.Ordinal, "abc", CompareOptions.Ordinal, true }, new object[] { "abc", CompareOptions.None, "abc", CompareOptions.None, true }, + new object[] { "", CompareOptions.None, "\u200c", CompareOptions.None, true }, // see comment at bottom of SortKey_TestData }; [Theory] @@ -66,12 +67,6 @@ namespace System.Globalization.Tests } [Fact] - public void GetHashCode_EmptyString() - { - Assert.Equal(0, CultureInfo.InvariantCulture.CompareInfo.GetHashCode("", CompareOptions.None)); - } - - [Fact] public void GetHashCode_Invalid() { AssertExtensions.Throws("source", () => CultureInfo.InvariantCulture.CompareInfo.GetHashCode(null, CompareOptions.None)); @@ -296,6 +291,12 @@ namespace System.Globalization.Tests // Spanish yield return new object[] { new CultureInfo("es-ES").CompareInfo, "llegar", "lugar", CompareOptions.None, -1 }; + + // Zero-weight code points + // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight, + // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("") + // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key. + yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 }; } public static IEnumerable IndexOf_TestData() @@ -439,9 +440,13 @@ namespace System.Globalization.Tests } [Fact] - public void GetHashCode_EmptySpan() + public void GetHashCode_NullAndEmptySpan() { - Assert.Equal(0, CultureInfo.InvariantCulture.CompareInfo.GetHashCode(ReadOnlySpan.Empty, CompareOptions.None)); + // Ensure that null spans and non-null empty spans produce the same hash code. + + int hashCodeOfNullSpan = CultureInfo.InvariantCulture.CompareInfo.GetHashCode(ReadOnlySpan.Empty, CompareOptions.None); + int hashCodeOfNotNullEmptySpan = CultureInfo.InvariantCulture.CompareInfo.GetHashCode("".AsSpan(), CompareOptions.None); + Assert.Equal(hashCodeOfNullSpan, hashCodeOfNotNullEmptySpan); } [Fact] diff --git a/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs b/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs index 2ea617b..1a186bd 100644 --- a/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs +++ b/src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs @@ -627,6 +627,18 @@ namespace System.Globalization.Tests Assert.Equal(version, new CultureInfo(cultureName).CompareInfo.Version); } + [Fact] + public void TestSortKey_ZeroWeightCodePoints() + { + // In the invariant globalization mode, there's no such thing as a zero-weight code point, + // so the U+200C ZERO WIDTH NON-JOINER code point contributes to the final sort key value. + + CompareInfo compareInfo = CultureInfo.InvariantCulture.CompareInfo; + SortKey sortKeyForEmptyString = compareInfo.GetSortKey(""); + SortKey sortKeyForZeroWidthJoiner = compareInfo.GetSortKey("\u200c"); + Assert.NotEqual(0, SortKey.Compare(sortKeyForEmptyString, sortKeyForZeroWidthJoiner)); + } + private static StringComparison GetStringComparison(CompareOptions options) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Unix.cs index f77c391..8f7a6e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Unix.cs @@ -821,24 +821,17 @@ namespace System.Globalization throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); } - byte [] keyData; - if (source.Length == 0) + byte[] keyData; + fixed (char* pSource = source) { - keyData = Array.Empty(); - } - else - { - fixed (char* pSource = source) - { - int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); - keyData = new byte[sortKeyLength]; + int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options); + keyData = new byte[sortKeyLength]; - fixed (byte* pSortKey = keyData) + fixed (byte* pSortKey = keyData) + { + if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength) { - if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength) - { - throw new ArgumentException(SR.Arg_ExternalException); - } + throw new ArgumentException(SR.Arg_ExternalException); } } } @@ -894,11 +887,6 @@ namespace System.Globalization Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - if (source.Length == 0) - { - return 0; - } - // according to ICU User Guide the performance of ucol_getSortKey is worse when it is called with null output buffer // the solution is to try to fill the sort key in a temporary buffer of size equal 4 x string length // 1MB is the biggest array that can be rented from ArrayPool.Shared without memory allocation @@ -909,7 +897,7 @@ namespace System.Globalization ? stackalloc byte[1024] : (borrowedArray = ArrayPool.Shared.Rent(sortKeyLength)); - fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pSource = &MemoryMarshal.GetNonNullPinnableReference(source)) { fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey)) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Windows.cs index 595f642..6ae7a7d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Windows.cs @@ -129,18 +129,24 @@ namespace System.Globalization Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - if (source.Length == 0) + // LCMapStringEx doesn't support passing cchSrc = 0, so if given a null or empty input + // we'll normalize it to an empty null-terminated string and pass -1 to indicate that + // the underlying OS function should read until it encounters the null terminator. + + int sourceLength = source.Length; + if (sourceLength == 0) { - return 0; + source = string.Empty; + sourceLength = -1; } uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options); - fixed (char* pSource = source) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, flags, - pSource, source.Length /* in chars */, + pSource, sourceLength /* in chars */, null, 0, null, null, _sortHandle); if (sortKeyLength == 0) @@ -162,7 +168,7 @@ namespace System.Globalization { if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, flags, - pSource, source.Length /* in chars */, + pSource, sourceLength /* in chars */, pSortKey, sortKeyLength, null, null, _sortHandle) != sortKeyLength) { @@ -505,38 +511,40 @@ namespace System.Globalization } byte[] keyData; - if (source.Length == 0) + uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options); + + // LCMapStringEx doesn't support passing cchSrc = 0, so if given an empty string + // we'll instead pass -1 to indicate a null-terminated empty string. + + int sourceLength = source.Length; + if (sourceLength == 0) { - keyData = Array.Empty(); + sourceLength = -1; } - else - { - uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options); - fixed (char* pSource = source) + fixed (char* pSource = source) + { + int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, + flags, + pSource, sourceLength, + null, 0, + null, null, _sortHandle); + if (sortKeyLength == 0) { - int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, - flags, - pSource, source.Length, - null, 0, - null, null, _sortHandle); - if (sortKeyLength == 0) - { - throw new ArgumentException(SR.Arg_ExternalException); - } + throw new ArgumentException(SR.Arg_ExternalException); + } - keyData = new byte[sortKeyLength]; + keyData = new byte[sortKeyLength]; - fixed (byte* pBytes = keyData) + fixed (byte* pBytes = keyData) + { + if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, + flags, + pSource, sourceLength, + pBytes, keyData.Length, + null, null, _sortHandle) != sortKeyLength) { - if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName, - flags, - pSource, source.Length, - pBytes, keyData.Length, - null, null, _sortHandle) != sortKeyLength) - { - throw new ArgumentException(SR.Arg_ExternalException); - } + throw new ArgumentException(SR.Arg_ExternalException); } } } -- 2.7.4