Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX
- CompareInfo.GetSortKeyLength
- CompareInfo.GetHashCode
-Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
\ No newline at end of file
+Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
+
+
+## Case change
+
+Affected public APIs:
+- TextInfo.ToLower,
+- TextInfo.ToUpper
+
+Below function are used from apple native functions:
+- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring)
+- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring)
+- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc)
+- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc)
+
+Behavioural changes compared to ICU
+
+ - Final sigma behavior correction:
+
+ ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".
+
+ - Below cases will throw exception because of insufficiently sized destination buffer
+
+ - Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.
+
+ - Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)
+
+ - Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N -> (ʼN)
+
+
+
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+ internal static partial class Globalization
+ {
+ [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)]
+ internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
+
+ [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)]
+ internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
+ }
+}
Success = 0,
UnknownError = 1,
InsufficientBuffer = 2,
- OutOfMemory = 3
+ OutOfMemory = 3,
+ InvalidCodePoint = 4,
}
}
}
<Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
<Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
+ <Compile Include="..\System\Globalization\TextInfoTests.cs" />
</ItemGroup>
</Project>
// we also don't preform.
// Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule).
yield return new object[] { cultureName, "\u03A3", "\u03C3" };
- if (PlatformDetection.IsHybridGlobalizationOnBrowser)
+ if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX)
{
- // JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
+ // JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
yield return new object[] { cultureName, "O\u03A3", "o\u03C2" };
}
else
// RAINBOW (outside the BMP and does not case)
yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };
- // Unicode defines some codepoints which expand into multiple codepoints
- // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
- // these sorts of expansions, since it would cause string lengths to change when cased,
- // which is non-intuitive. In addition, there are some context sensitive mappings which
- // we also don't preform.
- // es-zed does not case to SS when uppercased.
- yield return new object[] { cultureName, "\u00DF", "\u00DF" };
- yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
- if (!PlatformDetection.IsNlsGlobalization)
- yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
-
- // Ligatures do not expand when cased.
- yield return new object[] { cultureName, "\uFB00", "\uFB00" };
-
- // Precomposed character with no uppercase variant, we don't want to "decompose" this
- // as part of casing.
- yield return new object[] { cultureName, "\u0149", "\u0149" };
+ if (!PlatformDetection.IsHybridGlobalizationOnOSX)
+ {
+ // Unicode defines some codepoints which expand into multiple codepoints
+ // when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
+ // these sorts of expansions, since it would cause string lengths to change when cased,
+ // which is non-intuitive. In addition, there are some context sensitive mappings which
+ // we also don't preform.
+ // es-zed does not case to SS when uppercased.
+ // on OSX, capitalizing the German letter ß (sharp S) gives SS
+ yield return new object[] { cultureName, "\u00DF", "\u00DF" };
+ yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
+ if (!PlatformDetection.IsNlsGlobalization)
+ yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
+
+ // Ligatures do not expand when cased.
+ // on OSX, this is uppercase to "FF"
+ yield return new object[] { cultureName, "\uFB00", "\uFB00" };
+
+ // Precomposed character with no uppercase variant, we don't want to "decompose" this
+ // as part of casing.
+ // on OSX, this is uppercased to "ʼN"
+ yield return new object[] { cultureName, "\u0149", "\u0149" };
+ }
}
// Turkish i
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" />
<Compile Include="$(CommonPath)Interop\Interop.Casing.cs">
<Link>Common\Interop\Interop.Casing.cs</Link>
</Compile>
+ <Compile Include="$(CommonPath)Interop\Interop.Casing.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
+ <Link>Common\Interop\Interop.Casing.OSX.cs</Link>
+ </Compile>
<Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
<Link>Common\Interop\Interop.Collation.cs</Link>
</Compile>
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+
+namespace System.Globalization
+{
+ public partial class TextInfo
+ {
+ internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper)
+ {
+ Debug.Assert(!GlobalizationMode.Invariant);
+ Debug.Assert(!GlobalizationMode.UseNls);
+ Debug.Assert(GlobalizationMode.Hybrid);
+ int result;
+
+ if (HasEmptyCultureName)
+ result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
+ else
+ result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
+
+ if (result != (int)Interop.Globalization.ResultCode.Success)
+ throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing" :
+ result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing");
+ }
+ }
+}
JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
return;
}
+#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
+ if (GlobalizationMode.Hybrid)
+ {
+ ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
+ return;
+ }
#endif
IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
}
set(icu_shim_sources_base
${icu_shim_sources_base}
pal_locale.m
- pal_collation.m)
+ pal_collation.m
+ pal_casing.m)
endif()
addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
endif()
if (CLR_CMAKE_TARGET_APPLE)
- set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m)
+ set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m)
endif()
# time zone names are filtered out of icu data for the browser and associated functionality is disabled
DllImportEntry(GlobalizationNative_ToUnicode)
DllImportEntry(GlobalizationNative_WindowsIdToIanaId)
#ifdef __APPLE__
+ DllImportEntry(GlobalizationNative_ChangeCaseInvariantNative)
+ DllImportEntry(GlobalizationNative_ChangeCaseNative)
DllImportEntry(GlobalizationNative_CompareStringNative)
- DllImportEntry(GlobalizationNative_GetLocaleNameNative)
- DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
+ DllImportEntry(GlobalizationNative_EndsWithNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative)
DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative)
+ DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
+ DllImportEntry(GlobalizationNative_GetLocaleNameNative)
DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
DllImportEntry(GlobalizationNative_IndexOfNative)
DllImportEntry(GlobalizationNative_StartsWithNative)
- DllImportEntry(GlobalizationNative_EndsWithNative)
#endif
};
int32_t bToUpper);
PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget);
+
+#ifdef __APPLE__
+PALEXPORT int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName,
+ int32_t lNameLength,
+ const uint16_t* lpSrc,
+ int32_t cwSrcLength,
+ uint16_t* lpDst,
+ int32_t cwDstLength,
+ int32_t bToUpper);
+
+PALEXPORT int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc,
+ int32_t cwSrcLength,
+ uint16_t* lpDst,
+ int32_t cwDstLength,
+ int32_t bToUpper);
+#endif
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "pal_icushim_internal.h"
+#include "pal_casing.h"
+#include "pal_errors.h"
+
+#import <Foundation/Foundation.h>
+
+#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
+
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * Converts code points outside of Basic Multilingual Plane into
+ * corresponding surrogate pairs if sufficient space in the string.
+ * High surrogate range: 0xD800 - 0xDBFF
+ * Low surrogate range: 0xDC00 - 0xDFFF
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to true.
+ *
+ * @param buffer const uint16_t * string buffer
+ * @param offset string offset, must be offset<capacity
+ * @param capacity size of the string buffer
+ * @param codePoint code point to append
+ * @param isError output bool set to true if an error occurs, otherwise not modified
+ */
+#define Append(buffer, offset, capacity, codePoint, isError) { \
+ if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
+ (isError) = InsufficientBuffer; \
+ } else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
+ (isError) = InvalidCodePoint; \
+ } else if ((uint32_t)(codePoint) <= 0xffff) { \
+ (buffer)[(offset)++] = (uint16_t)(codePoint); \
+ } else { \
+ (buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
+ (buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
+ } \
+}
+
+/*
+Function:
+ChangeCaseNative
+
+Performs upper or lower casing of a string into a new buffer, taking into account the specified locale.
+Returns 0 for success, non-zero on failure see ErrorCodes.
+*/
+int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t lNameLength,
+ const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
+{
+ NSLocale *currentLocale;
+ if(localeName == NULL || lNameLength == 0)
+ {
+ currentLocale = [NSLocale systemLocale];
+ }
+ else
+ {
+ NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength];
+ currentLocale = [NSLocale localeWithLocaleIdentifier:locName];
+ }
+ NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
+ NSString *result = bToUpper ? [source uppercaseStringWithLocale:currentLocale] : [source lowercaseStringWithLocale:currentLocale];
+
+ int32_t srcIdx = 0, dstIdx = 0, isError = 0;
+ uint16_t dstCodepoint;
+ while (srcIdx < result.length)
+ {
+ dstCodepoint = [result characterAtIndex:srcIdx++];
+ Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+ if (isError)
+ return isError;
+ }
+ return Success;
+}
+
+/*
+Function:
+ChangeCaseInvariantNative
+
+Performs upper or lower casing of a string into a new buffer.
+Returns 0 for success, non-zero on failure see ErrorCodes.
+*/
+int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
+{
+ NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
+ NSString *result = bToUpper ? source.uppercaseString : source.lowercaseString;
+
+ int32_t srcIdx = 0, dstIdx = 0, isError = 0;
+ uint16_t dstCodepoint;
+ while (srcIdx < result.length)
+ {
+ dstCodepoint = [result characterAtIndex:srcIdx++];
+ Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+ if (isError)
+ return isError;
+ }
+ return Success;
+}
+
+#endif
Success = 0,
UnknownError = 1,
InsufficientBuffer = 2,
- OutOfMemory = 3
+ OutOfMemory = 3,
+ InvalidCodePoint = 4
} ResultCode;
}
case LocaleNumber_ReadingLayout:
{
- NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[[NSLocale currentLocale] objectForKey:NSLocaleLanguageCode]];
+ NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[currentLocale objectForKey:NSLocaleLanguageCode]];
// 0 - Left to right (such as en-US)
// 1 - Right to left (such as arabic locales)
value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0;