[OSX] HybridGlobalization Implement casing functions (#87919)
authorMeri Khamoyan <96171496+mkhamoyan@users.noreply.github.com>
Thu, 29 Jun 2023 08:23:59 +0000 (12:23 +0400)
committerGitHub <noreply@github.com>
Thu, 29 Jun 2023 08:23:59 +0000 (12:23 +0400)
Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX

15 files changed:
docs/design/features/globalization-hybrid-mode.md
src/libraries/Common/src/Interop/Interop.Casing.OSX.cs [new file with mode: 0644]
src/libraries/Common/src/Interop/Interop.ResultCode.cs
src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj
src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs
src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs
src/mono/mono/mini/CMakeLists.txt
src/native/libs/System.Globalization.Native/CMakeLists.txt
src/native/libs/System.Globalization.Native/entrypoints.c
src/native/libs/System.Globalization.Native/pal_casing.h
src/native/libs/System.Globalization.Native/pal_casing.m [new file with mode: 0644]
src/native/libs/System.Globalization.Native/pal_errors.h
src/native/libs/System.Globalization.Native/pal_locale.m

index 8b0be82..920f808 100644 (file)
@@ -408,4 +408,34 @@ Affected public APIs:
 - CompareInfo.GetSortKeyLength
 - CompareInfo.GetHashCode
 
-Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
\ No newline at end of file
+Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
+
+
+## Case change
+
+Affected public APIs:
+- TextInfo.ToLower,
+- TextInfo.ToUpper
+
+Below function are used from apple native functions:
+- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring)
+- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring)
+- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc)
+- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc)
+
+Behavioural changes compared to ICU
+
+   - Final sigma behavior correction:
+
+     ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".
+
+   - Below cases will throw exception because of insufficiently sized destination buffer
+
+      - Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.
+
+      - Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)
+
+      - Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of  "\u02BC" (ʼ) and N -> (ʼN)
+
+
+
diff --git a/src/libraries/Common/src/Interop/Interop.Casing.OSX.cs b/src/libraries/Common/src/Interop/Interop.Casing.OSX.cs
new file mode 100644 (file)
index 0000000..5e0140f
--- /dev/null
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+    internal static partial class Globalization
+    {
+        [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)]
+        internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
+
+        [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)]
+        internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
+    }
+}
index b99787f..c55bba6 100644 (file)
@@ -11,7 +11,8 @@ internal static partial class Interop
             Success = 0,
             UnknownError = 1,
             InsufficientBuffer = 2,
-            OutOfMemory = 3
+            OutOfMemory = 3,
+            InvalidCodePoint = 4,
         }
     }
 }
index 1c680de..da9a1a7 100644 (file)
@@ -38,5 +38,6 @@
     <Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
     <Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
+    <Compile Include="..\System\Globalization\TextInfoTests.cs" />
   </ItemGroup>
 </Project>
index fcc4c53..6426a2a 100644 (file)
@@ -274,9 +274,9 @@ namespace System.Globalization.Tests
                 // we also don't preform.
                 // Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule).
                 yield return new object[] { cultureName, "\u03A3", "\u03C3" };
-                if (PlatformDetection.IsHybridGlobalizationOnBrowser)
+                if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX)
                 {
-                    // JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
+                    // JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
                     yield return new object[] { cultureName, "O\u03A3", "o\u03C2" };
                 }
                 else
@@ -396,23 +396,29 @@ namespace System.Globalization.Tests
                 // RAINBOW (outside the BMP and does not case)
                 yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };
 
-                // Unicode defines some codepoints which expand into multiple codepoints
-                // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
-                // these sorts of expansions, since it would cause string lengths to change when cased,
-                // which is non-intuitive. In addition, there are some context sensitive mappings which
-                // we also don't preform.
-                // es-zed does not case to SS when uppercased.
-                yield return new object[] { cultureName, "\u00DF", "\u00DF" };                
-                yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
-                if (!PlatformDetection.IsNlsGlobalization)
-                    yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
-
-                // Ligatures do not expand when cased.
-                yield return new object[] { cultureName, "\uFB00", "\uFB00" };
-
-                // Precomposed character with no uppercase variant, we don't want to "decompose" this
-                // as part of casing.
-                yield return new object[] { cultureName, "\u0149", "\u0149" };
+                if (!PlatformDetection.IsHybridGlobalizationOnOSX)
+                {
+                    // Unicode defines some codepoints which expand into multiple codepoints
+                    // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
+                    // these sorts of expansions, since it would cause string lengths to change when cased,
+                    // which is non-intuitive. In addition, there are some context sensitive mappings which
+                    // we also don't preform.
+                    // es-zed does not case to SS when uppercased.
+                    // on OSX, capitalizing the German letter ß (sharp S) gives SS
+                    yield return new object[] { cultureName, "\u00DF", "\u00DF" };
+                    yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
+                    if (!PlatformDetection.IsNlsGlobalization)
+                        yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
+
+                    // Ligatures do not expand when cased.
+                    // on OSX, this is uppercase to "FF"
+                    yield return new object[] { cultureName, "\uFB00", "\uFB00" };
+
+                    // Precomposed character with no uppercase variant, we don't want to "decompose" this
+                    // as part of casing.
+                    // on OSX, this is uppercased to "ʼN"
+                    yield return new object[] { cultureName, "\u0149", "\u0149" };
+                }
             }
 
             // Turkish i
index 0912d9b..5cc1e48 100644 (file)
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" />
     <Compile Include="$(CommonPath)Interop\Interop.Casing.cs">
       <Link>Common\Interop\Interop.Casing.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)Interop\Interop.Casing.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
+      <Link>Common\Interop\Interop.Casing.OSX.cs</Link>
+    </Compile>
     <Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
       <Link>Common\Interop\Interop.Collation.cs</Link>
     </Compile>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.OSX.cs
new file mode 100644 (file)
index 0000000..4bfd34e
--- /dev/null
@@ -0,0 +1,27 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+
+namespace System.Globalization
+{
+    public partial class TextInfo
+    {
+        internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper)
+        {
+            Debug.Assert(!GlobalizationMode.Invariant);
+            Debug.Assert(!GlobalizationMode.UseNls);
+            Debug.Assert(GlobalizationMode.Hybrid);
+            int result;
+
+            if (HasEmptyCultureName)
+                result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
+            else
+                result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
+
+            if (result != (int)Interop.Globalization.ResultCode.Success)
+                throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing"  :
+                                    result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing");
+        }
+    }
+}
index 018724a..d868e0d 100644 (file)
@@ -692,6 +692,12 @@ namespace System.Globalization
                 JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
                 return;
             }
+#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
+            if (GlobalizationMode.Hybrid)
+            {
+                ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
+                return;
+            }
 #endif
             IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
         }
index 6f5e850..54f2052 100644 (file)
@@ -71,7 +71,8 @@ if(HAVE_SYS_ICU)
     set(icu_shim_sources_base
         ${icu_shim_sources_base}
         pal_locale.m
-        pal_collation.m)
+        pal_collation.m
+        pal_casing.m)
   endif()
 
   addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
index 8f528be..1c9a25f 100644 (file)
@@ -93,7 +93,7 @@ else()
 endif()
 
 if (CLR_CMAKE_TARGET_APPLE)
-    set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m)
+    set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m)
 endif()
 
 # time zone names are filtered out of icu data for the browser and associated functionality is disabled
index 9e0e4f4..37246e1 100644 (file)
@@ -59,16 +59,18 @@ static const Entry s_globalizationNative[] =
     DllImportEntry(GlobalizationNative_ToUnicode)
     DllImportEntry(GlobalizationNative_WindowsIdToIanaId)
 #ifdef __APPLE__
+    DllImportEntry(GlobalizationNative_ChangeCaseInvariantNative)
+    DllImportEntry(GlobalizationNative_ChangeCaseNative)
     DllImportEntry(GlobalizationNative_CompareStringNative)
-    DllImportEntry(GlobalizationNative_GetLocaleNameNative)
-    DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
+    DllImportEntry(GlobalizationNative_EndsWithNative)
     DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative)
     DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative)
     DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative)
+    DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
+    DllImportEntry(GlobalizationNative_GetLocaleNameNative)
     DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
     DllImportEntry(GlobalizationNative_IndexOfNative)
     DllImportEntry(GlobalizationNative_StartsWithNative)
-    DllImportEntry(GlobalizationNative_EndsWithNative)
 #endif
 };
 
index b49a775..e39d38e 100644 (file)
@@ -23,3 +23,19 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc,
                                                      int32_t bToUpper);
 
 PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget);
+
+#ifdef __APPLE__
+PALEXPORT int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName,
+                                                       int32_t lNameLength,
+                                                       const uint16_t* lpSrc,
+                                                       int32_t cwSrcLength,
+                                                       uint16_t* lpDst,
+                                                       int32_t cwDstLength,
+                                                       int32_t bToUpper);
+
+PALEXPORT int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc,
+                                                                int32_t cwSrcLength,
+                                                                uint16_t* lpDst,
+                                                                int32_t cwDstLength,
+                                                                int32_t bToUpper);
+#endif
diff --git a/src/native/libs/System.Globalization.Native/pal_casing.m b/src/native/libs/System.Globalization.Native/pal_casing.m
new file mode 100644 (file)
index 0000000..7aeacb5
--- /dev/null
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "pal_icushim_internal.h"
+#include "pal_casing.h"
+#include "pal_errors.h"
+
+#import <Foundation/Foundation.h>
+
+#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
+
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * Converts code points outside of Basic Multilingual Plane into
+ * corresponding surrogate pairs if sufficient space in the string.
+ * High surrogate range: 0xD800 - 0xDBFF 
+ * Low surrogate range: 0xDC00 - 0xDFFF
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to true.
+ *
+ * @param buffer const uint16_t * string buffer
+ * @param offset string offset, must be offset<capacity
+ * @param capacity size of the string buffer
+ * @param codePoint code point to append
+ * @param isError output bool set to true if an error occurs, otherwise not modified
+ */
+#define Append(buffer, offset, capacity, codePoint, isError) { \
+    if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
+        (isError) = InsufficientBuffer; \
+    } else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */  { \
+        (isError) = InvalidCodePoint; \
+    } else if ((uint32_t)(codePoint) <= 0xffff) { \
+        (buffer)[(offset)++] = (uint16_t)(codePoint); \
+    } else { \
+        (buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
+        (buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
+    } \
+}
+
+/*
+Function:
+ChangeCaseNative
+
+Performs upper or lower casing of a string into a new buffer, taking into account the specified locale.
+Returns 0 for success, non-zero on failure see ErrorCodes.
+*/
+int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t lNameLength,
+                                             const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
+{
+    NSLocale *currentLocale;
+    if(localeName == NULL || lNameLength == 0)
+    {
+        currentLocale = [NSLocale systemLocale];
+    }
+    else
+    {
+        NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength];
+        currentLocale = [NSLocale localeWithLocaleIdentifier:locName];
+    }
+    NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
+    NSString *result = bToUpper ? [source uppercaseStringWithLocale:currentLocale] : [source lowercaseStringWithLocale:currentLocale];
+
+    int32_t srcIdx = 0, dstIdx = 0, isError = 0;
+    uint16_t dstCodepoint;
+    while (srcIdx < result.length)
+    {
+        dstCodepoint = [result characterAtIndex:srcIdx++];
+        Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+        if (isError)
+            return isError;
+    }
+    return Success;
+}
+
+/*
+Function:
+ChangeCaseInvariantNative
+
+Performs upper or lower casing of a string into a new buffer.
+Returns 0 for success, non-zero on failure see ErrorCodes.
+*/
+int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
+{
+    NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
+    NSString *result = bToUpper ? source.uppercaseString : source.lowercaseString;
+
+    int32_t srcIdx = 0, dstIdx = 0, isError = 0;
+    uint16_t dstCodepoint;
+    while (srcIdx < result.length)
+    {
+        dstCodepoint = [result characterAtIndex:srcIdx++];
+        Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+        if (isError)
+            return isError;
+    }
+    return Success;
+}
+
+#endif
index 535092a..84fe004 100644 (file)
@@ -12,5 +12,6 @@ typedef enum
     Success = 0,
     UnknownError = 1,
     InsufficientBuffer = 2,
-    OutOfMemory = 3
+    OutOfMemory = 3,
+    InvalidCodePoint = 4
 } ResultCode;
index c26e89b..7856a8c 100644 (file)
@@ -459,7 +459,7 @@ int32_t GlobalizationNative_GetLocaleInfoIntNative(const char* localeName, Local
         }        
         case LocaleNumber_ReadingLayout:
         {
-            NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[[NSLocale currentLocale] objectForKey:NSLocaleLanguageCode]];
+            NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[currentLocale objectForKey:NSLocaleLanguageCode]];
             //  0 - Left to right (such as en-US)
             //  1 - Right to left (such as arabic locales)
             value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0;