Add wrappers for ICU Casing
authorMatt Ellis <matell@microsoft.com>
Thu, 16 Jul 2015 20:33:03 +0000 (13:33 -0700)
committerMatt Ellis <matell@microsoft.com>
Tue, 22 Sep 2015 18:48:35 +0000 (11:48 -0700)
Unlike ICU, the CLR only wants to preform simple casing, so we can't
use the ICU APIs that work over strings.  Instead we have to do
codepoint by codepoint casing ourselves.

src/corefx/System.Globalization.Native/CMakeLists.txt
src/corefx/System.Globalization.Native/casing.cpp [new file with mode: 0644]

index 9fc28d3..47b5dd5 100644 (file)
@@ -15,6 +15,7 @@ endif()
 add_compile_options(-fPIC)
 
 set(NATIVEGLOBALIZATION_SOURCES
+    casing.cpp
     idna.cpp
     normalization.cpp
 )
diff --git a/src/corefx/System.Globalization.Native/casing.cpp b/src/corefx/System.Globalization.Native/casing.cpp
new file mode 100644 (file)
index 0000000..e7cd85b
--- /dev/null
@@ -0,0 +1,120 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+#include <assert.h>
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/utf16.h>
+
+/*
+Function:
+ToUpperSimple
+*/
+extern "C" void ToUpperSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+        dstCodepoint = u_toupper(srcCodepoint);
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToLowerSimple
+*/
+extern "C" void ToLowerSimple(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+        dstCodepoint = u_tolower(srcCodepoint);
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToUpperSimpleTurkishAzeri
+*/
+extern "C" void ToUpperSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+
+        dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint));
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+
+/*
+Function:
+ToLowerSimpleTurkishAzeri
+*/
+extern "C" void ToLowerSimpleTurkishAzeri(const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
+{
+    int32_t srcIdx = 0;
+    int32_t dstIdx = 0;
+
+    UBool isError = FALSE;
+
+    while (srcIdx < cwSrcLength)
+    {
+        UChar32 srcCodepoint;
+        UChar32 dstCodepoint;
+
+        U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
+
+        dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint));
+
+        U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
+
+        // Ensure that we wrote the data and the source codepoint when encoded in UTF16 is the same
+        // number of code units as the cased codepoint.
+        assert(isError == FALSE && srcIdx == dstIdx);
+    }
+}
+