Implement basic collation on top of ICU
authorMatt Ellis <matell@microsoft.com>
Wed, 15 Jul 2015 18:22:39 +0000 (11:22 -0700)
committerMatt Ellis <matell@microsoft.com>
Fri, 25 Sep 2015 21:41:23 +0000 (14:41 -0700)
This change adds support for basic Unicode collation support, built on
top of ICU. Windows and ICU have different collation models, and the
windows model does not correspond 1:1 with the ICU model, so in addition
to differences in sort weights between the two platforms, the
CompareOptions enum does not map nicely to ICU options.

For now, we only map CompareOptions.None, CompareOptions.IgnoreCase as
well as CompareOptions.Ordinal and CompareOptions.OrdinalIngoreCase,
other CompareOptions are ignored during collation.

In addition to collation support, I have enabed the randomized string
hashing code (using Marvin32 + a per app domain seed) so that string
hashcodes are not predictable across runs.

15 files changed:
CMakeLists.txt
clr.coreclr.props
src/corefx/System.Globalization.Native/CMakeLists.txt
src/corefx/System.Globalization.Native/collation.cpp [new file with mode: 0644]
src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs [new file with mode: 0644]
src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs
src/mscorlib/corefx/System/Globalization/TextInfo.Unix.cs
src/mscorlib/corefx/System/Globalization/TextInfo.cs
src/mscorlib/mscorlib.shared.sources.props
src/mscorlib/src/System/Collections/Hashtable.cs
src/mscorlib/src/System/StringComparer.cs
src/vm/CMakeLists.txt
src/vm/comutilnative.cpp
src/vm/comutilnative.h
src/vm/ecalllist.h

index 543e4ff..610e929 100644 (file)
@@ -573,9 +573,7 @@ add_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
 endif(WIN32)
 add_definitions(-DFEATURE_NORM_IDNA_ONLY)
 add_definitions(-DFEATURE_PREJIT)
-if(WIN32)
-    add_definitions(-DFEATURE_RANDOMIZED_STRING_HASHING)
-endif(WIN32)
+add_definitions(-DFEATURE_RANDOMIZED_STRING_HASHING)
 add_definitions(-DFEATURE_READYTORUN)
 add_definitions(-DFEATURE_STANDALONE_SN)
 add_definitions(-DFEATURE_STRONGNAME_DELAY_SIGNING_ALLOWED)
index b47de7b..819fd6f 100644 (file)
 
     <ProfilingSupportedBuild>false</ProfilingSupportedBuild>
 
-    <!-- UNIXTODO Enable randomized string hashing -->
-    <FeatureRandomizedStringHashing>false</FeatureRandomizedStringHashing>
-
     <!-- Windows specific features -->
     <FeatureWin32Registry>false</FeatureWin32Registry>
     <FeatureAppX>false</FeatureAppX>
 
     <FeatureCoreFxGlobalization>true</FeatureCoreFxGlobalization>
   </PropertyGroup>
-</Project>
\ No newline at end of file
+</Project>
index 2660e34..ecf65f2 100644 (file)
@@ -32,6 +32,7 @@ add_compile_options(-fPIC)
 set(NATIVEGLOBALIZATION_SOURCES
     calendarData.cpp
     casing.cpp
+    collation.cpp
     idna.cpp
     locale.cpp
     localeNumberData.cpp
diff --git a/src/corefx/System.Globalization.Native/collation.cpp b/src/corefx/System.Globalization.Native/collation.cpp
new file mode 100644 (file)
index 0000000..ab67026
--- /dev/null
@@ -0,0 +1,269 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+#include <assert.h>
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/ucol.h>
+#include <unicode/usearch.h>
+#include <unicode/utf16.h>
+
+const int32_t CompareOptionsIgnoreCase = 1;
+// const int32_t CompareOptionsIgnoreNonSpace = 2;
+// const int32_t CompareOptionsIgnoreSymbols = 4;
+// const int32_t CompareOptionsIgnoreKanaType = 8;
+// const int32_t CompareOptionsIgnoreWidth = 0x10;
+// const int32_t CompareOptionsStringSort = 0x20000000;
+
+/*
+ * To collator returned by this function is owned by the callee and must be closed when this method returns
+ * with a U_SUCCESS UErrorCode.
+ *
+ * On error, the return value is undefined.
+ */
+UCollator* GetCollatorForLocaleAndOptions(const char* lpLocaleName, int32_t options, UErrorCode* pErr)
+{
+    UCollator* pColl = nullptr;
+
+    pColl = ucol_open(lpLocaleName, pErr);
+
+    if ((options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase)
+    {
+        ucol_setAttribute(pColl, UCOL_STRENGTH, UCOL_SECONDARY, pErr);
+    }
+
+    return pColl;
+}
+
+
+/*
+Function:
+CompareString
+*/
+extern "C" int32_t CompareString(const char* lpLocaleName, const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length, int32_t options)
+{
+    static_assert(UCOL_EQUAL == 0, "managed side requires 0 for equal strings");
+    static_assert(UCOL_LESS < 0, "managed side requires less than zero for a < b");
+    static_assert(UCOL_GREATER > 0, "managed side requires greater than zero for a > b");
+
+    UCollationResult result = UCOL_EQUAL;
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+
+    if (U_SUCCESS(err))
+    {
+        result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length);
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+/*
+Function:
+IndexOf
+*/
+extern "C" int32_t IndexOf(const char* lpLocaleName, const UChar* lpTarget, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+{
+    static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
+
+    int32_t result = USEARCH_DONE;
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+
+    if (U_SUCCESS(err))
+    {
+        UStringSearch* pSearch = usearch_openFromCollator(lpTarget, -1, lpSource, cwSourceLength, pColl, nullptr, &err);
+
+        if (U_SUCCESS(err))
+        {
+            result = usearch_first(pSearch, &err);
+            usearch_close(pSearch);
+        }
+
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+/*
+Function:
+LastIndexOf
+*/
+extern "C" int32_t LastIndexOf(const char* lpLocaleName, const UChar* lpTarget, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+{
+    static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
+
+    int32_t result = USEARCH_DONE;
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+
+    if (U_SUCCESS(err))
+    {
+        UStringSearch* pSearch = usearch_openFromCollator(lpTarget, -1, lpSource, cwSourceLength, pColl, nullptr, &err);
+
+        if (U_SUCCESS(err))
+        {
+            result = usearch_last(pSearch, &err);
+            usearch_close(pSearch);
+        }
+
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+/*
+ Return value is a "Win32 BOOL" (1 = true, 0 = false)
+ */
+extern "C" int32_t StartsWith(const char* lpLocaleName, const UChar* lpTarget, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+{
+    int32_t result = FALSE;
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+
+    if (U_SUCCESS(err))
+    {
+        UStringSearch* pSearch = usearch_openFromCollator(lpTarget, -1, lpSource, cwSourceLength, pColl, nullptr, &err);
+        int32_t idx = USEARCH_DONE;
+
+        if (U_SUCCESS(err))
+        {
+            idx = usearch_first(pSearch, &err);
+
+            if (idx == 0)
+            {
+                result = TRUE;
+            }
+            else
+            {
+                UCollationElements* pCollElem = ucol_openElements(pColl, lpSource, idx, &err);
+
+                if (U_SUCCESS(err))
+                {
+                    int32_t curCollElem = UCOL_NULLORDER;
+
+                    result = TRUE;
+
+                    while((curCollElem = ucol_next(pCollElem, &err)) != UCOL_NULLORDER)
+                    {
+                        if (curCollElem != 0)
+                        {
+                            // Non ignorable collation element found between start of the string and the first match for lpTarget.
+                            result = FALSE;
+                            break;
+                        }
+                    }
+
+                    if (U_FAILURE(err))
+                    {
+                        result = FALSE;
+                    }
+
+                    ucol_closeElements(pCollElem);
+                }
+            }
+
+            usearch_close(pSearch);
+        }
+
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+/*
+ Return value is a "Win32 BOOL" (1 = true, 0 = false)
+ */
+extern "C" int32_t EndsWith(const char* lpLocaleName, const UChar* lpTarget, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
+{
+    int32_t result = FALSE;
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+
+    if (U_SUCCESS(err))
+    {
+        UStringSearch* pSearch = usearch_openFromCollator(lpTarget, -1, lpSource, cwSourceLength, pColl, nullptr, &err);
+        int32_t idx = USEARCH_DONE;
+
+        if (U_SUCCESS(err))
+        {
+            idx = usearch_last(pSearch, &err);
+
+            if (idx != USEARCH_DONE)
+            {
+                if ((idx + usearch_getMatchedLength(pSearch)) == cwSourceLength)
+                {
+                    result = TRUE;
+                }
+
+                // TODO (dotnet/corefx#3467): We should do something similar to what StartsWith does where we can ignore
+                // some collation elements at the end of te string if they are zero.
+            }
+
+            usearch_close(pSearch);
+        }
+
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+extern "C" int32_t GetSortKey(const char* lpLocaleName, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options)
+{
+    UErrorCode err = U_ZERO_ERROR;
+    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
+    int32_t result = 0;
+
+    if (U_SUCCESS(err))
+    {
+        result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength);
+
+        ucol_close(pColl);
+    }
+
+    return result;
+}
+
+extern "C" int32_t CompareStringOrdinalIgnoreCase(const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length)
+{
+    assert(lpStr1 != nullptr);
+    assert(cwStr1Length >= 0);
+    assert(lpStr2 != nullptr);
+    assert(cwStr2Length >= 0);
+
+    int32_t str1Idx = 0;
+    int32_t str2Idx = 0;
+
+    while (str1Idx < cwStr1Length && str2Idx < cwStr2Length)
+    {
+        UChar32 str1Codepoint;
+        UChar32 str2Codepoint;
+
+        U16_NEXT(lpStr1, str1Idx, cwStr1Length, str1Codepoint);
+        U16_NEXT(lpStr2, str2Idx, cwStr2Length, str2Codepoint);
+
+        if (str1Codepoint != str2Codepoint && u_toupper(str1Codepoint) != u_toupper(str2Codepoint))
+        {
+            return str1Codepoint < str2Codepoint ? -1 : 1;
+        }
+    }
+
+    if (cwStr1Length < cwStr2Length)
+    {
+        return -1;
+    }
+
+    if (cwStr2Length < cwStr1Length)
+    {
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs b/src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs
new file mode 100644 (file)
index 0000000..b600fa5
--- /dev/null
@@ -0,0 +1,35 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Globalization;
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+    internal static partial class GlobalizationInterop
+    {
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        internal unsafe static extern int CompareString(byte[] localeName, char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        internal unsafe static extern int IndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        [return: MarshalAs(UnmanagedType.Bool)]
+        internal unsafe static extern bool StartsWith(byte[] localeName, string target, string source, int cwSourceLength, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        [return: MarshalAs(UnmanagedType.Bool)]
+        internal unsafe static extern bool EndsWith(byte[] localeName, string target, string source, int cwSourceLength, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        internal unsafe static extern int GetSortKey(byte[] localeName, string str, int strLength, byte* sortKey, int sortKeyLength, CompareOptions options);
+
+        [DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
+        internal unsafe static extern int CompareStringOrdinalIgnoreCase(char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len);
+    }
+}
index 46fb25b..ba658d0 100644 (file)
@@ -2,14 +2,22 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System.Diagnostics.Contracts;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Security;
 
 namespace System.Globalization
 {
     public partial class CompareInfo
-    {      
+    {
+        // ICU uses a char* (UTF-8) to represent a locale name.
+        private readonly byte[] m_sortNameAsUtf8;
+
         internal unsafe CompareInfo(CultureInfo culture)
         {
-            // TODO: Implement This Fully.
+            m_name = culture.m_name;
+            m_sortName = culture.SortName;
+            m_sortNameAsUtf8 = System.Text.Encoding.UTF8.GetBytes(m_sortName);
         }
 
         internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
@@ -17,13 +25,13 @@ namespace System.Globalization
             Contract.Assert(source != null);
             Contract.Assert(value != null);
 
-            // TODO: Implement This Fully.
-
             if (value.Length == 0)
             {
                 return startIndex;
             }
 
+            // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
+
             if (ignoreCase)
             {
                 source = source.ToUpper(CultureInfo.InvariantCulture);
@@ -53,13 +61,13 @@ namespace System.Globalization
             Contract.Assert(source != null);
             Contract.Assert(value != null);
 
-            // TODO: Implement This Fully.
-
             if (value.Length == 0)
             {
                 return startIndex;
             }
 
+            // TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
+
             if (ignoreCase)
             {
                 source = source.ToUpper(CultureInfo.InvariantCulture);
@@ -76,8 +84,8 @@ namespace System.Globalization
                 last = cur;
             }
 
-            return last >= 0 ? 
-                last + startIndex - count + 1 : 
+            return last >= 0 ?
+                last + startIndex - count + 1 :
                 -1;
         }
 
@@ -86,25 +94,13 @@ namespace System.Globalization
             Contract.Assert(source != null);
             Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            // TODO: Implement This Fully.
-            int hash = 5381;
-
-            unchecked
-            {
-                for (int i = 0; i < source.Length; i++)
-                {
-                    hash = ((hash << 5) + hash) + ChangeCaseAscii(source[i]);
-                }
-            }
-
-            return hash;
+            return GetHashCodeOfStringCore(source, options, forceRandomizedHashing: false, additionalEntropy: 0);
         }
 
         [System.Security.SecuritySafeCritical]
         private static unsafe int CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2)
         {
-            // TODO: Implement This Fully.            
-            return CompareStringOrdinalAscii(string1, count1, string2, count2, ignoreCase: true);
+            return Interop.GlobalizationInterop.CompareStringOrdinalIgnoreCase(string1, count1, string2, count2);
         }
 
         [System.Security.SecuritySafeCritical]
@@ -114,37 +110,66 @@ namespace System.Globalization
             Contract.Assert(string2 != null);
             Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            // TODO: Implement This Fully.
-            string s1 = string1.Substring(offset1, length1);
-            string s2 = string2.Substring(offset2, length2);
-
-            fixed (char* c1 = s1)
+            fixed (char* pString1 = string1)
             {
-                fixed (char* c2 = s2)
+                fixed (char* pString2 = string2)
                 {
-                    return CompareStringOrdinalAscii(c1, s1.Length, c2, s2.Length, IgnoreCase(options));
+                    return Interop.GlobalizationInterop.CompareString(m_sortNameAsUtf8, pString1 + offset1, length1, pString2 + offset2, length2, options);
                 }
             }
         }
 
-        private int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
+        [System.Security.SecuritySafeCritical]
+        private unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
         {
             Contract.Assert(!string.IsNullOrEmpty(source));
             Contract.Assert(target != null);
             Contract.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
 
-            // TODO: Implement This Fully.
-            return IndexOfOrdinal(source, target, startIndex, count, IgnoreCase(options));
+            if (target.Length == 0)
+            {
+                return startIndex;
+            }
+
+            if (options == CompareOptions.Ordinal)
+            {
+                return IndexOfOrdinal(source, target, startIndex, count, ignoreCase: false);
+            }
+
+            fixed (char* pSource = source)
+            {
+                int lastIndex = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options);
+
+                return lastIndex != -1 ? lastIndex + startIndex : -1;
+            }
         }
 
-        private int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
+        private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)
         {
             Contract.Assert(!string.IsNullOrEmpty(source));
             Contract.Assert(target != null);
             Contract.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0);
 
-            // TODO: Implement This Fully.
-            return LastIndexOfOrdinal(source, target, startIndex, count, IgnoreCase(options));
+            if (target.Length == 0)
+            {
+                return startIndex;
+            }
+            
+            if (options == CompareOptions.Ordinal)
+            {
+                return LastIndexOfOrdinal(source, target, startIndex, count, ignoreCase: false);
+            }
+
+            // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source
+            // of the start of the string that is count characters away from startIndex.
+            int leftStartIndex = (startIndex - count + 1);
+
+            fixed (char* pSource = source)
+            {
+                int lastIndex = Interop.GlobalizationInterop.LastIndexOf(m_sortNameAsUtf8, target, pSource + (startIndex - count + 1), count, options);
+
+                return lastIndex != -1 ? lastIndex + leftStartIndex : -1;
+            }
         }
 
         private bool StartsWith(string source, string prefix, CompareOptions options)
@@ -153,10 +178,7 @@ namespace System.Globalization
             Contract.Assert(!string.IsNullOrEmpty(prefix));
             Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            // TODO: Implement This Fully.
-            if(prefix.Length > source.Length) return false;
-
-            return StringEqualsAscii(source.Substring(0, prefix.Length), prefix, IgnoreCase(options));
+            return Interop.GlobalizationInterop.StartsWith(m_sortNameAsUtf8, prefix, source, source.Length, options);
         }
 
         private bool EndsWith(string source, string suffix, CompareOptions options)
@@ -165,75 +187,40 @@ namespace System.Globalization
             Contract.Assert(!string.IsNullOrEmpty(suffix));
             Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-            // TODO: Implement This Fully.
-            if(suffix.Length > source.Length) return false;
-
-            return StringEqualsAscii(source.Substring(source.Length - suffix.Length), suffix, IgnoreCase(options));
+            return Interop.GlobalizationInterop.EndsWith(m_sortNameAsUtf8, suffix, source, source.Length, options);
         }
 
         // -----------------------------
         // ---- PAL layer ends here ----
         // -----------------------------
 
-        private static char ChangeCaseAscii(char c, bool toUpper = true)
+        internal unsafe int GetHashCodeOfStringCore(string source, CompareOptions options, bool forceRandomizedHashing, long additionalEntropy)
         {
-            if (toUpper && c >= 'a' && c <= 'z')
-            {
-                return (char)('A' + (c - 'a'));
-            }
-            else if (!toUpper && c >= 'A' && c <= 'Z')
-            {
-                return (char)('a' + (c - 'A'));
-            }
-
-            return c;
-        }
+            Contract.Assert(source != null);
+            Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0);
 
-        private static bool StringEqualsAscii(string s1, string s2, bool ignoreCase = true)
-        {
-            if (s1.Length != s2.Length) return false;
+            int sortKeyLength = Interop.GlobalizationInterop.GetSortKey(m_sortNameAsUtf8, source, source.Length, null, 0, options);
 
-            for (int i = 0; i < s1.Length; i++)
+            // As an optimization, for small sort keys we allocate the buffer on the stack.
+            if (sortKeyLength <= 256)
             {
-                char c1 = ignoreCase ? ChangeCaseAscii(s1[i]) : s1[i];
-                char c2 = ignoreCase ? ChangeCaseAscii(s2[i]) : s2[i];
-
-                if (c1 != c2) return false;
+                byte* pSortKey = stackalloc byte[sortKeyLength];
+                Interop.GlobalizationInterop.GetSortKey(m_sortNameAsUtf8, source, source.Length, pSortKey, sortKeyLength, options);
+                return InternalHashSortKey(pSortKey, sortKeyLength, false, additionalEntropy);
             }
 
-            return true;
-        }
+            byte[] sortKey = new byte[sortKeyLength];
 
-        [System.Security.SecuritySafeCritical]
-        private static unsafe int CompareStringOrdinalAscii(char* s1, int count1, char* s2, int count2, bool ignoreCase)
-        {
-            int countMin = Math.Min(count1, count2);
+            fixed(byte* pSortKey = sortKey)
             {
-                for (int i = 0; i < countMin; i++)
-                {
-                    char c1 = ignoreCase ? ChangeCaseAscii(s1[i]) : s1[i];
-                    char c2 = ignoreCase ? ChangeCaseAscii(s2[i]) : s2[i];
-
-                    if (c1 < c2)
-                    {
-                        return -1;
-                    }
-                    else if (c1 > c2)
-                    {
-                        return 1;
-                    }
-                }
+                Interop.GlobalizationInterop.GetSortKey(m_sortNameAsUtf8, source, source.Length, pSortKey, sortKeyLength, options);
+                return InternalHashSortKey(pSortKey, sortKeyLength, false, additionalEntropy);
             }
-
-            if (count1 == count2) return 0;
-            if (count1 > count2) return 1;
-
-            return -1;
         }
 
-        private static bool IgnoreCase(CompareOptions options)
-        {
-            return ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase);
-        }
+        [System.Security.SecurityCritical]
+        [DllImport(JitHelpers.QCall)]
+        [SuppressUnmanagedCodeSecurity]
+        private static unsafe extern int InternalHashSortKey(byte* sortKey, int sortKeyLength, [MarshalAs(UnmanagedType.Bool)] bool forceRandomizedHashing, long additionalEntropy);
     }
-}
\ No newline at end of file
+}
index 9a18175..9a64917 100644 (file)
@@ -19,11 +19,10 @@ namespace System.Globalization
         //////////////////////////////////////////////////////////////////////////
         internal unsafe TextInfo(CultureData cultureData)
         {
-            // TODO: Implement this fully.
             m_cultureData = cultureData;
             m_cultureName = m_cultureData.CultureName;
             m_textInfoName = m_cultureData.STEXTINFO;
-            m_needsTurkishCasing = NeedsTurkishCasing(this.m_textInfoName);
+            m_needsTurkishCasing = NeedsTurkishCasing(m_textInfoName);
         }
 
         [System.Security.SecuritySafeCritical]
@@ -61,9 +60,7 @@ namespace System.Globalization
         private bool NeedsTurkishCasing(string localeName)
         {
             Contract.Assert(localeName != null);
-
-            string lcName = CultureData.AnsiToLower(localeName);
-            return lcName.Length >= 2 && ((lcName[0] == 't' && lcName[1] == 'r') || (lcName[0] == 'a' && lcName[1] == 'z'));
+            return CultureInfo.GetCultureInfo(localeName).CompareInfo.Compare("i", "I", CompareOptions.IgnoreCase) != 0;
         }
     }
 }
index e99151d..7de2b67 100644 (file)
@@ -288,14 +288,6 @@ namespace System.Globalization
         {
             get
             {
-#if PLATFORM_UNIX
-                // UNIXTODO: This hack can be removed once collation works and the code after this correctly returns "false".
-                if (m_needsTurkishCasing)
-                {
-                    return false;
-                }
-#endif
-
                 if (m_IsAsciiCasingSameAsInvariant == null)
                 {
                     m_IsAsciiCasingSameAsInvariant = CultureInfo.GetCultureInfo(m_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz",
index 81749f0..13f58e2 100644 (file)
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\Interop.Libraries.cs" />
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\System.Globalization.Native\Interop.Calendar.cs" />
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\System.Globalization.Native\Interop.Casing.cs" />
+    <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\System.Globalization.Native\Interop.Collation.cs" />
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\System.Globalization.Native\Interop.Locale.cs" />
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\Interop\Unix\System.Globalization.Native\Interop.TimeZoneInfo.cs" />
     <GlobalizationSources Include="$(CoreFxSourcesRoot)\System\Globalization\CalendarData.Unix.cs" />
index 4c30b72..2c10ff0 100644 (file)
@@ -22,7 +22,7 @@ namespace System.Collections {
     using System.Runtime.CompilerServices;
     using System.Runtime.ConstrainedExecution;
     using System.Diagnostics.Contracts;
-#if FEATURE_RANDOMIZED_STRING_HASHING
+#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_PAL
     using System.Security.Cryptography;
 #endif
    
@@ -1802,9 +1802,11 @@ namespace System.Collections {
 
             return comparer;
         }
+
         private const int bufferSize = 1024;
+#if !FEATURE_PAL
         private static RandomNumberGenerator rng;
+#endif
         private static byte[] data;
         private static int currentIndex = bufferSize;
         private static readonly object lockObj = new Object();
@@ -1816,14 +1818,21 @@ namespace System.Collections {
 
                 if(currentIndex == bufferSize) 
                 {
-                    if(null == rng)
+                    if(data == null)
                     {
-                        rng = RandomNumberGenerator.Create();
                         data = new byte[bufferSize];
                         Contract.Assert(bufferSize % 8 == 0, "We increment our current index by 8, so our buffer size must be a multiple of 8");
+#if !FEATURE_PAL
+                        rng = RandomNumberGenerator.Create();
+#endif
+
                     }
 
+#if FEATURE_PAL
+                    Microsoft.Win32.Win32Native.Random(true, data, data.Length);
+#else
                     rng.GetBytes(data);
+#endif
                     currentIndex = 0;
                 }
 
index cc97650..9799a51 100644 (file)
@@ -239,8 +239,7 @@ namespace System {
             }
 
 #if FEATURE_COREFX_GLOBALIZATION
-            // TODO: Implement this fully.
-            throw new NotImplementedException();
+            return _compareInfo.GetHashCodeOfStringCore(obj, options, true, _entropy);
 #else
             return _compareInfo.GetHashCodeOfString(obj, options, true, _entropy);
 #endif
@@ -394,8 +393,7 @@ namespace System {
 
             if( _ignoreCase) {
 #if FEATURE_COREFX_GLOBALIZATION
-                // TODO: Implement this fully.
-                throw new NotImplementedException();
+                return CultureInfo.InvariantCulture.CompareInfo.GetHashCodeOfStringCore(obj, CompareOptions.IgnoreCase, true, _entropy);
 #else
                 return TextInfo.GetHashCodeOrdinalIgnoreCase(obj, true, _entropy);
 #endif
index 9282ce1..e1e6138 100644 (file)
@@ -207,6 +207,7 @@ set(VM_SOURCES_WKS
     listlock.cpp
     managedmdimport.cpp
     marshalnative.cpp
+    marvin32.cpp
     mdaassistants.cpp
     message.cpp
     methodtablebuilder.cpp
@@ -276,7 +277,6 @@ set(VM_SOURCES_DAC_AND_WKS_WIN32
 
 list(APPEND VM_SOURCES_WKS 
     ${VM_SOURCES_DAC_AND_WKS_WIN32}
-    marvin32.cpp # move out of win32 when FEATURE_RANDOMIZED_STRING_HASHING is enabled for linux
     # These should not be included for Linux
     appxutil.cpp
     assemblynativeresource.cpp
index 55a7a1f..9664bf9 100644 (file)
@@ -3067,7 +3067,7 @@ void COMNlsHashProvider::InitializeDefaultSeed()
     CONTRACTL_END;
 
     PCBYTE pEntropy = GetEntropy();
-    AllocMemHolder<SYMCRYPT_MARVIN32_EXPANDED_SEED> pSeed = GetAppDomain()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(SYMCRYPT_MARVIN32_EXPANDED_SEED)));
+    AllocMemHolder<SYMCRYPT_MARVIN32_EXPANDED_SEED> pSeed(GetAppDomain()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(SYMCRYPT_MARVIN32_EXPANDED_SEED))));
     SymCryptMarvin32ExpandSeed(pSeed, pEntropy, SYMCRYPT_MARVIN32_SEED_SIZE);
 
     if(InterlockedCompareExchangeT(&pDefaultSeed, (PCSYMCRYPT_MARVIN32_EXPANDED_SEED) pSeed, NULL) == NULL)
@@ -3104,12 +3104,16 @@ PCBYTE COMNlsHashProvider::GetEntropy()
 
     if(pEntropy == NULL)
     {
+        AllocMemHolder<BYTE> pNewEntropy(GetAppDomain()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(SYMCRYPT_MARVIN32_SEED_SIZE))));
+
+#ifdef FEATURE_PAL
+        PAL_Random(TRUE, pNewEntropy, SYMCRYPT_MARVIN32_SEED_SIZE);
+#else
         HCRYPTPROV hCryptProv;
-        AllocMemHolder<BYTE> pNewEntropy = GetAppDomain()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(sizeof(SYMCRYPT_MARVIN32_SEED_SIZE)));
-        
         WszCryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
         CryptGenRandom(hCryptProv, SYMCRYPT_MARVIN32_SEED_SIZE, pNewEntropy);
         CryptReleaseContext(hCryptProv, 0);
+#endif
 
         if(InterlockedCompareExchangeT(&pEntropy, (PBYTE) pNewEntropy, NULL) == NULL)
         {
@@ -3138,3 +3142,20 @@ void COMNlsHashProvider::CreateMarvin32Seed(INT64 additionalEntropy, PSYMCRYPT_M
     SymCryptMarvin32ExpandSeed(pExpandedMarvinSeed, (PCBYTE) &entropy, SYMCRYPT_MARVIN32_SEED_SIZE);
 }
 #endif // FEATURE_RANDOMIZED_STRING_HASHING
+
+#ifdef FEATURE_COREFX_GLOBALIZATION
+INT32 QCALLTYPE CoreFxGlobalization::HashSortKey(PCBYTE pSortKey, INT32 cbSortKey, BOOL forceRandomizedHashing, INT64 additionalEntropy)
+{
+    QCALL_CONTRACT;
+
+    INT32 retVal = 0;
+
+    BEGIN_QCALL;
+
+    retVal = COMNlsHashProvider::s_NlsHashProvider.HashSortKey(pSortKey, cbSortKey, forceRandomizedHashing, additionalEntropy);
+
+    END_QCALL;
+
+    return retVal;
+}
+#endif //FEATURE_COREFX_GLOBALIZATION
index 0f9c7c9..d234d7c 100644 (file)
@@ -309,4 +309,11 @@ private:
 #endif // FEATURE_RANDOMIZED_STRING_HASHING
 };
 
+#ifdef FEATURE_COREFX_GLOBALIZATION
+class CoreFxGlobalization {
+public:
+  static INT32 QCALLTYPE HashSortKey(PCBYTE pSortKey, INT32 cbSortKey, BOOL forceRandomizedHashing, INT64 additionalEntropy);
+};
+#endif // FEATURE_COREFX_GLOBALIZATION
+
 #endif // _COMUTILNATIVE_H_
index b6bd8ee..27f8f75 100644 (file)
@@ -1524,6 +1524,12 @@ FCFuncStart(gTextInfoFuncs)
 FCFuncEnd()
 #endif // defined(FEATURE_LEGACYSURFACE) && !defined(FEATURE_COREFX_GLOBALIZATION)
 
+#ifdef FEATURE_COREFX_GLOBALIZATION
+FCFuncStart(gCompareInfoFuncs)
+    QCFuncElement("InternalHashSortKey", CoreFxGlobalization::HashSortKey)
+FCFuncEnd()
+#endif
+
 FCFuncStart(gArrayFuncs)
     FCFuncElement("get_Rank", ArrayNative::GetRank)
     FCFuncElement("GetLowerBound", ArrayNative::GetLowerBound)
@@ -2183,9 +2189,9 @@ FCClassElement("ChannelServices", "System.Runtime.Remoting.Channels", gChannelSe
 #ifdef FEATURE_CAS_POLICY
 FCClassElement("CodeAccessSecurityEngine", "System.Security", gCodeAccessSecurityEngineFuncs)
 #endif
-#if defined(FEATURE_LEGACYSURFACE) && !defined(FEATURE_COREFX_GLOBALIZATION)
+#if defined(FEATURE_LEGACYSURFACE) || defined(FEATURE_COREFX_GLOBALIZATION)
 FCClassElement("CompareInfo", "System.Globalization", gCompareInfoFuncs)
-#endif // defined(FEATURE_LEGACYSURFACE) && !defined(FEATURE_COREFX_GLOBALIZATION)
+#endif // defined(FEATURE_LEGACYSURFACE)
 FCClassElement("CompatibilitySwitch", "System.Runtime.Versioning", gCompatibilitySwitchFuncs)
 #ifdef FEATURE_COMPRESSEDSTACK    
 FCClassElement("CompressedStack", "System.Threading", gCompressedStackFuncs)