Remove early exit logic in GetSortKey-related code paths (#1078)
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>
Fri, 27 Dec 2019 18:52:14 +0000 (13:52 -0500)
committerJan Kotas <jkotas@microsoft.com>
Fri, 27 Dec 2019 18:52:14 +0000 (10:52 -0800)
src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.cs
src/libraries/System.Globalization/tests/Invariant/InvariantMode.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Unix.cs
src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Windows.cs

index 6e6f67e..82e0e55 100644 (file)
@@ -55,6 +55,7 @@ namespace System.Globalization.Tests
             new object[] { "abc", CompareOptions.Ordinal, "ABC", CompareOptions.Ordinal, false },
             new object[] { "abc", CompareOptions.Ordinal, "abc", CompareOptions.Ordinal, true },
             new object[] { "abc", CompareOptions.None, "abc", CompareOptions.None, true },
+            new object[] { "", CompareOptions.None, "\u200c", CompareOptions.None, true }, // see comment at bottom of SortKey_TestData
         };
 
         [Theory]
@@ -66,12 +67,6 @@ namespace System.Globalization.Tests
         }
 
         [Fact]
-        public void GetHashCode_EmptyString()
-        {
-            Assert.Equal(0, CultureInfo.InvariantCulture.CompareInfo.GetHashCode("", CompareOptions.None));
-        }
-
-        [Fact]
         public void GetHashCode_Invalid()
         {
             AssertExtensions.Throws<ArgumentNullException>("source", () => CultureInfo.InvariantCulture.CompareInfo.GetHashCode(null, CompareOptions.None));
@@ -296,6 +291,12 @@ namespace System.Globalization.Tests
 
             // Spanish
             yield return new object[] { new CultureInfo("es-ES").CompareInfo, "llegar", "lugar", CompareOptions.None, -1 };
+
+            // Zero-weight code points
+            // In both NLS (Windows) and ICU the code point U+200C ZERO WIDTH NON-JOINER has a zero weight,
+            // so it's compared as equal to the empty string. This means that we can't special-case GetHashCode("")
+            // and return a fixed value; we actually need to call the underlying OS or ICU API to calculate the sort key.
+            yield return new object[] { s_invariantCompare, "", "\u200c", CompareOptions.None, 0 };
         }
 
         public static IEnumerable<object[]> IndexOf_TestData()
@@ -439,9 +440,13 @@ namespace System.Globalization.Tests
         }
 
         [Fact]
-        public void GetHashCode_EmptySpan()
+        public void GetHashCode_NullAndEmptySpan()
         {
-            Assert.Equal(0, CultureInfo.InvariantCulture.CompareInfo.GetHashCode(ReadOnlySpan<char>.Empty, CompareOptions.None));
+            // Ensure that null spans and non-null empty spans produce the same hash code.
+
+            int hashCodeOfNullSpan = CultureInfo.InvariantCulture.CompareInfo.GetHashCode(ReadOnlySpan<char>.Empty, CompareOptions.None);
+            int hashCodeOfNotNullEmptySpan = CultureInfo.InvariantCulture.CompareInfo.GetHashCode("".AsSpan(), CompareOptions.None);
+            Assert.Equal(hashCodeOfNullSpan, hashCodeOfNotNullEmptySpan);
         }
 
         [Fact]
index 2ea617b..1a186bd 100644 (file)
@@ -627,6 +627,18 @@ namespace System.Globalization.Tests
             Assert.Equal(version, new CultureInfo(cultureName).CompareInfo.Version);
         }
 
+        [Fact]
+        public void TestSortKey_ZeroWeightCodePoints()
+        {
+            // In the invariant globalization mode, there's no such thing as a zero-weight code point,
+            // so the U+200C ZERO WIDTH NON-JOINER code point contributes to the final sort key value.
+
+            CompareInfo compareInfo = CultureInfo.InvariantCulture.CompareInfo;
+            SortKey sortKeyForEmptyString = compareInfo.GetSortKey("");
+            SortKey sortKeyForZeroWidthJoiner = compareInfo.GetSortKey("\u200c");
+            Assert.NotEqual(0, SortKey.Compare(sortKeyForEmptyString, sortKeyForZeroWidthJoiner));
+        }
+
 
         private static StringComparison GetStringComparison(CompareOptions options)
         {
index f77c391..8f7a6e4 100644 (file)
@@ -821,24 +821,17 @@ namespace System.Globalization
                 throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options));
             }
 
-            byte [] keyData;
-            if (source.Length == 0)
+            byte[] keyData;
+            fixed (char* pSource = source)
             {
-                keyData = Array.Empty<byte>();
-            }
-            else
-            {
-                fixed (char* pSource = source)
-                {
-                    int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
-                    keyData = new byte[sortKeyLength];
+                int sortKeyLength = Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, null, 0, options);
+                keyData = new byte[sortKeyLength];
 
-                    fixed (byte* pSortKey = keyData)
+                fixed (byte* pSortKey = keyData)
+                {
+                    if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
                     {
-                        if (Interop.Globalization.GetSortKey(_sortHandle, pSource, source.Length, pSortKey, sortKeyLength, options) != sortKeyLength)
-                        {
-                            throw new ArgumentException(SR.Arg_ExternalException);
-                        }
+                        throw new ArgumentException(SR.Arg_ExternalException);
                     }
                 }
             }
@@ -894,11 +887,6 @@ namespace System.Globalization
             Debug.Assert(!GlobalizationMode.Invariant);
             Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            if (source.Length == 0)
-            {
-                return 0;
-            }
-
             // according to ICU User Guide the performance of ucol_getSortKey is worse when it is called with null output buffer
             // the solution is to try to fill the sort key in a temporary buffer of size equal 4 x string length
             // 1MB is the biggest array that can be rented from ArrayPool.Shared without memory allocation
@@ -909,7 +897,7 @@ namespace System.Globalization
                 ? stackalloc byte[1024]
                 : (borrowedArray = ArrayPool<byte>.Shared.Rent(sortKeyLength));
 
-            fixed (char* pSource = &MemoryMarshal.GetReference(source))
+            fixed (char* pSource = &MemoryMarshal.GetNonNullPinnableReference(source))
             {
                 fixed (byte* pSortKey = &MemoryMarshal.GetReference(sortKey))
                 {
index 595f642..6ae7a7d 100644 (file)
@@ -129,18 +129,24 @@ namespace System.Globalization
             Debug.Assert(!GlobalizationMode.Invariant);
             Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            if (source.Length == 0)
+            // LCMapStringEx doesn't support passing cchSrc = 0, so if given a null or empty input
+            // we'll normalize it to an empty null-terminated string and pass -1 to indicate that
+            // the underlying OS function should read until it encounters the null terminator.
+
+            int sourceLength = source.Length;
+            if (sourceLength == 0)
             {
-                return 0;
+                source = string.Empty;
+                sourceLength = -1;
             }
 
             uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options);
 
-            fixed (char* pSource = source)
+            fixed (char* pSource = &MemoryMarshal.GetReference(source))
             {
                 int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
                                                   flags,
-                                                  pSource, source.Length /* in chars */,
+                                                  pSource, sourceLength /* in chars */,
                                                   null, 0,
                                                   null, null, _sortHandle);
                 if (sortKeyLength == 0)
@@ -162,7 +168,7 @@ namespace System.Globalization
                 {
                     if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
                                                       flags,
-                                                      pSource, source.Length /* in chars */,
+                                                      pSource, sourceLength /* in chars */,
                                                       pSortKey, sortKeyLength,
                                                       null, null, _sortHandle) != sortKeyLength)
                     {
@@ -505,38 +511,40 @@ namespace System.Globalization
             }
 
             byte[] keyData;
-            if (source.Length == 0)
+            uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options);
+
+            // LCMapStringEx doesn't support passing cchSrc = 0, so if given an empty string
+            // we'll instead pass -1 to indicate a null-terminated empty string.
+
+            int sourceLength = source.Length;
+            if (sourceLength == 0)
             {
-                keyData = Array.Empty<byte>();
+                sourceLength = -1;
             }
-            else
-            {
-                uint flags = LCMAP_SORTKEY | (uint)GetNativeCompareFlags(options);
 
-                fixed (char* pSource = source)
+            fixed (char* pSource = source)
+            {
+                int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
+                                            flags,
+                                            pSource, sourceLength,
+                                            null, 0,
+                                            null, null, _sortHandle);
+                if (sortKeyLength == 0)
                 {
-                    int sortKeyLength = Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
-                                                flags,
-                                                pSource, source.Length,
-                                                null, 0,
-                                                null, null, _sortHandle);
-                    if (sortKeyLength == 0)
-                    {
-                        throw new ArgumentException(SR.Arg_ExternalException);
-                    }
+                    throw new ArgumentException(SR.Arg_ExternalException);
+                }
 
-                    keyData = new byte[sortKeyLength];
+                keyData = new byte[sortKeyLength];
 
-                    fixed (byte* pBytes = keyData)
+                fixed (byte* pBytes = keyData)
+                {
+                    if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
+                                            flags,
+                                            pSource, sourceLength,
+                                            pBytes, keyData.Length,
+                                            null, null, _sortHandle) != sortKeyLength)
                     {
-                        if (Interop.Kernel32.LCMapStringEx(_sortHandle != IntPtr.Zero ? null : _sortName,
-                                                flags,
-                                                pSource, source.Length,
-                                                pBytes, keyData.Length,
-                                                null, null, _sortHandle) != sortKeyLength)
-                        {
-                            throw new ArgumentException(SR.Arg_ExternalException);
-                        }
+                        throw new ArgumentException(SR.Arg_ExternalException);
                     }
                 }
             }