libwinpr-crt: improve unicode conversion test
authorMarc-André Moreau <marcandre.moreau@gmail.com>
Sat, 15 Dec 2012 18:55:20 +0000 (13:55 -0500)
committerMarc-André Moreau <marcandre.moreau@gmail.com>
Sat, 15 Dec 2012 18:55:20 +0000 (13:55 -0500)
winpr/libwinpr/crt/test/TestMultiByteToWideChar.c

index 21cd719..d423def 100644 (file)
@@ -8,44 +8,51 @@
  * MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
  */
 
+/* Letters */
+
+static BYTE c_cedilla_UTF8[] = "\xC3\xA7\x00";
+static BYTE c_cedilla_UTF16[] = "\xE7\x00\x00\x00";
+static int c_cedilla_cchWideChar = 2;
+static int c_cedilla_cbMultiByte = 3;
+
 /* English */
 
 static BYTE en_Hello_UTF8[] = "Hello\0";
 static BYTE en_Hello_UTF16[] = "\x48\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x00\x00";
-static int en_Hello_UTF8_Length = sizeof(en_Hello_UTF8) / (sizeof(CHAR));
-static int en_Hello_UTF16_Length = sizeof(en_Hello_UTF16) / (sizeof(WCHAR));
+static int en_Hello_cchWideChar = 6;
+static int en_Hello_cbMultiByte = 6;
 
 static BYTE en_HowAreYou_UTF8[] = "How are you?\0";
 static BYTE en_HowAreYou_UTF16[] = "\x48\x00\x6F\x00\x77\x00\x20\x00\x61\x00\x72\x00\x65\x00\x20\x00"
                "\x79\x00\x6F\x00\x75\x00\x3F\x00\x00\x00";
-static int en_HowAreYou_UTF8_Length = sizeof(en_HowAreYou_UTF8) / (sizeof(CHAR));
-static int en_HowAreYou_UTF16_Length = sizeof(en_HowAreYou_UTF16) / (sizeof(WCHAR));
+static int en_HowAreYou_cchWideChar = 13;
+static int en_HowAreYou_cbMultiByte = 13;
 
 /* French */
 
 static BYTE fr_Hello_UTF8[] = "Allo\0";
 static BYTE fr_Hello_UTF16[] = "\x41\x00\x6C\x00\x6C\x00\x6F\x00\x00\x00";
-static int fr_Hello_UTF8_Length = sizeof(fr_Hello_UTF8) / (sizeof(CHAR));
-static int fr_Hello_UTF16_Length = sizeof(fr_Hello_UTF16) / (sizeof(WCHAR));
+static int fr_Hello_cchWideChar = 5;
+static int fr_Hello_cbMultiByte = 5;
 
 static BYTE fr_HowAreYou_UTF8[] = "\x43\x6F\x6D\x6D\x65\x6E\x74\x20\xC3\xA7\x61\x20\x76\x61\x3F\x00";
 static BYTE fr_HowAreYou_UTF16[] = "\x43\x00\x6F\x00\x6D\x00\x6D\x00\x65\x00\x6E\x00\x74\x00\x20\x00"
                "\xE7\x00\x61\x00\x20\x00\x76\x00\x61\x00\x3F\x00\x00\x00";
-static int fr_HowAreYou_UTF8_Length = sizeof(fr_HowAreYou_UTF8) / (sizeof(CHAR));
-static int fr_HowAreYou_UTF16_Length = sizeof(fr_HowAreYou_UTF16) / (sizeof(WCHAR));
+static int fr_HowAreYou_cchWideChar = 15;
+static int fr_HowAreYou_cbMultiByte = 16;
 
 /* Russian */
 
 static BYTE ru_Hello_UTF8[] = "\xD0\x97\xD0\xB4\xD0\xBE\xD1\x80\xD0\xBE\xD0\xB2\xD0\xBE\x00";
 static BYTE ru_Hello_UTF16[] = "\x17\x04\x34\x04\x3E\x04\x40\x04\x3E\x04\x32\x04\x3E\x04\x00\x00";
-static int ru_Hello_UTF8_Length = sizeof(ru_Hello_UTF8) / (sizeof(CHAR));
-static int ru_Hello_UTF16_Length = sizeof(ru_Hello_UTF16) / (sizeof(WCHAR));
+static int ru_Hello_cchWideChar = 8;
+static int ru_Hello_cbMultiByte = 15;
 
 static BYTE ru_HowAreYou_UTF8[] = "\xD0\x9A\xD0\xB0\xD0\xBA\x20\xD0\xB4\xD0\xB5\xD0\xBB\xD0\xB0\x3F\x00";
 static BYTE ru_HowAreYou_UTF16[] = "\x1A\x04\x30\x04\x3A\x04\x20\x00\x34\x04\x35\x04\x3B\x04\x30\x04"
                "\x3F\x00\x00\x00";
-static int ru_HowAreYou_UTF8_Length = sizeof(ru_HowAreYou_UTF8) / (sizeof(CHAR));
-static int ru_HowAreYou_UTF16_Length = sizeof(ru_HowAreYou_UTF16) / (sizeof(WCHAR));
+static int ru_HowAreYou_cchWideChar = 10;
+static int ru_HowAreYou_cbMultiByte = 17;
 
 /* Arabic */
 
@@ -53,27 +60,27 @@ static BYTE ar_Hello_UTF8[] = "\xD8\xA7\xD9\x84\xD8\xB3\xD9\x84\xD8\xA7\xD9\x85\
                "\x84\xD9\x8A\xD9\x83\xD9\x85\x00";
 static BYTE ar_Hello_UTF16[] = "\x27\x06\x44\x06\x33\x06\x44\x06\x27\x06\x45\x06\x20\x00\x39\x06"
                "\x44\x06\x4A\x06\x43\x06\x45\x06\x00\x00";
-static int ar_Hello_UTF8_Length = sizeof(ar_Hello_UTF8) / (sizeof(CHAR));
-static int ar_Hello_UTF16_Length = sizeof(ar_Hello_UTF16) / (sizeof(WCHAR));
+static int ar_Hello_cchWideChar = 13;
+static int ar_Hello_cbMultiByte = 24;
 
 static BYTE ar_HowAreYou_UTF8[] = "\xD9\x83\xD9\x8A\xD9\x81\x20\xD8\xAD\xD8\xA7\xD9\x84\xD9\x83\xD8"
                "\x9F\x00";
 static BYTE ar_HowAreYou_UTF16[] = "\x43\x06\x4A\x06\x41\x06\x20\x00\x2D\x06\x27\x06\x44\x06\x43\x06"
                "\x1F\x06\x00\x00";
-static int ar_HowAreYou_UTF8_Length = sizeof(ar_HowAreYou_UTF8) / (sizeof(CHAR));
-static int ar_HowAreYou_UTF16_Length = sizeof(ar_HowAreYou_UTF16) / (sizeof(WCHAR));
+static int ar_HowAreYou_cchWideChar = 10;
+static int ar_HowAreYou_cbMultiByte = 18;
 
 /* Chinese */
 
 static BYTE ch_Hello_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD";
 static BYTE ch_Hello_UTF16[] = "\x60\x4F\x7D\x59";
-static int ch_Hello_UTF8_Length = sizeof(ch_Hello_UTF8) / (sizeof(CHAR));
-static int ch_Hello_UTF16_Length = sizeof(ch_Hello_UTF16) / (sizeof(WCHAR));
+static int ch_Hello_cchWideChar = 3;
+static int ch_Hello_cbMultiByte = 7;
 
 static BYTE ch_HowAreYou_UTF8[] = "\xE4\xBD\xA0\xE5\xA5\xBD\xE5\x90\x97";
 static BYTE ch_HowAreYou_UTF16[] = "\x60\x4F\x7D\x59\x17\x54";
-static int ch_HowAreYou_UTF8_Length = sizeof(ch_HowAreYou_UTF8) / (sizeof(CHAR));
-static int ch_HowAreYou_UTF16_Length = sizeof(ch_HowAreYou_UTF16) / (sizeof(WCHAR));
+static int ch_HowAreYou_cchWideChar = 4;
+static int ch_HowAreYou_cbMultiByte = 10;
 
 void string_hexdump(BYTE* data, int length)
 {
@@ -105,122 +112,212 @@ void string_hexdump(BYTE* data, int length)
        }
 }
 
-int convert_utf8_to_utf16(BYTE* utf8, int utf8_length, BYTE* utf16, int utf16_length)
+int convert_utf8_to_utf16(BYTE* lpMultiByteStr, BYTE* expected_lpWideCharStr, int expected_cchWideChar)
 {
        int length;
+       int cbMultiByte;
+       int cchWideChar;
        LPWSTR lpWideCharStr;
 
-       length = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) utf8, -1, NULL, 0);
+       return 1;
 
-       if (length != utf16_length)
-       {
-               printf("MultiByteToWideChar: unexpected required length: actual: %d, expected: %d\n",
-                       length, utf16_length);
+       cbMultiByte = strlen((char*) lpMultiByteStr);
+       cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, NULL, 0);
 
-               printf("UTF8:\n");
-               string_hexdump((BYTE*) utf8, utf8_length);
+       printf("MultiByteToWideChar Input UTF8 String:\n");
+       string_hexdump(lpMultiByteStr, cbMultiByte);
 
+       printf("MultiByteToWideChar required cchWideChar: %d\n", cchWideChar);
+
+       if (cchWideChar != expected_cchWideChar)
+       {
+               printf("MultiByteToWideChar unexpected cchWideChar: actual: %d expected: %d\n",
+                       cchWideChar, expected_cchWideChar);
                return -1;
        }
 
-       lpWideCharStr = (LPWSTR) malloc(length * sizeof(WCHAR));
+       lpWideCharStr = (LPWSTR) malloc(cchWideChar * sizeof(WCHAR));
+       length = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, cbMultiByte + 1, lpWideCharStr, cchWideChar);
 
-       length = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) utf8, length, lpWideCharStr, length);
+       printf("MultiByteToWideChar converted length (WCHAR): %d\n", length);
 
-       if (length != utf16_length)
+       if (!length)
        {
-               printf("MultiByteToWideChar: unexpected conversion length: actual: %d, expected: %d\n",
-                       length, utf16_length);
+               DWORD error = GetLastError();
+               printf("MultiByteToWideChar error: 0x%08X\n", error);
+               return -1;
+       }
 
-               printf("UTF8:\n");
-               string_hexdump((BYTE*) utf8, utf8_length);
+       if (length != expected_cchWideChar)
+       {
+               printf("MultiByteToWideChar unexpected converted length (WCHAR): actual: %d expected: %d\n",
+                       length, expected_cchWideChar);
+               return -1;
+       }
+
+       if (wcscmp(lpWideCharStr, (WCHAR*) expected_lpWideCharStr) != 0)
+       {
+               printf("MultiByteToWideChar unexpected string:\n");
+
+               printf("UTF8 String:\n");
+               string_hexdump(lpMultiByteStr, cbMultiByte);
 
-               if (length > 0)
-               {
-                       printf("UTF16 (actual):\n");
-                       string_hexdump((BYTE*) lpWideCharStr, length * sizeof(WCHAR));
-               }
+               printf("UTF16 String (actual):\n");
+               string_hexdump((BYTE*) lpWideCharStr, length * sizeof(WCHAR));
 
-               printf("UTF16 (expecting):\n");
-               string_hexdump((BYTE*) utf16, utf16_length * sizeof(WCHAR));
+               printf("UTF16 String (expected):\n");
+               string_hexdump((BYTE*) expected_lpWideCharStr, expected_cchWideChar * sizeof(WCHAR));
 
-               return 1;
-               //return -1;
+               return -1;
        }
 
-       if (memcmp(utf16, lpWideCharStr, length) != 0)
+       free(lpWideCharStr);
+
+       return length;
+}
+
+int convert_utf16_to_utf8(BYTE* lpWideCharStr, BYTE* expected_lpMultiByteStr, int expected_cbMultiByte)
+{
+       int length;
+       int cchWideChar;
+       int cbMultiByte;
+       LPSTR lpMultiByteStr;
+
+       cchWideChar = wcslen((WCHAR*) lpWideCharStr);
+       cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, NULL, 0, NULL, NULL);
+
+       printf("WideCharToMultiByte Input UTF16 String:\n");
+       string_hexdump(lpWideCharStr, cchWideChar * sizeof(WCHAR));
+
+       printf("WideCharToMultiByte required cbMultiByte: %d\n", cbMultiByte);
+
+       if (cbMultiByte != expected_cbMultiByte)
        {
-               printf("MultiByteToWideChar: unexpected string\n");
+               printf("WideCharToMultiByte unexpected cbMultiByte: actual: %d expected: %d\n",
+                       cbMultiByte, expected_cbMultiByte);
+               return -1;
+       }
 
-               printf("actual:\n");
-               string_hexdump((BYTE*) utf16, utf16_length);
+       lpMultiByteStr = (LPSTR) malloc(cbMultiByte);
+       length = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, cchWideChar + 1, lpMultiByteStr, cbMultiByte, NULL, NULL);
 
-               printf("expected:\n");
-               string_hexdump((BYTE*) lpWideCharStr, utf16_length);
+       printf("WideCharToMultiByte converted length (BYTE): %d\n", length);
 
-               return 1;
-               //return -1;
+       if (!length)
+       {
+               DWORD error = GetLastError();
+               printf("WideCharToMultiByte error: 0x%08X\n", error);
+               return -1;
        }
 
-       printf("UTF8:\n");
-       string_hexdump((BYTE*) utf8, utf8_length);
+       if (length != expected_cbMultiByte)
+       {
+               printf("WideCharToMultiByte unexpected converted length (BYTE): actual: %d expected: %d\n",
+                       length, expected_cbMultiByte);
+               return -1;
+       }
 
-       printf("UTF16:\n");
-       string_hexdump((BYTE*) utf16, utf16_length * sizeof(WCHAR));
+       if (strcmp(lpMultiByteStr, (char*) expected_lpMultiByteStr) != 0)
+       {
+               printf("WideCharToMultiByte unexpected string:\n");
 
-       free(lpWideCharStr);
+               printf("UTF16 String:\n");
+               string_hexdump((BYTE*) lpWideCharStr, cchWideChar * sizeof(WCHAR));
+
+               printf("UTF8 String (actual):\n");
+               string_hexdump((BYTE*) lpMultiByteStr, cbMultiByte);
+
+               printf("UTF8 String (expected):\n");
+               string_hexdump((BYTE*) expected_lpMultiByteStr, expected_cbMultiByte);
+
+               return -1;
+       }
+
+       free(lpMultiByteStr);
 
        return length;
 }
 
 int TestMultiByteToWideChar(int argc, char* argv[])
 {
-       int length;
-       LPWSTR lpWideCharStr;
+       /* Letters */
+
+       printf("Letters\n");
+
+       if (convert_utf8_to_utf16(c_cedilla_UTF8, c_cedilla_UTF16, c_cedilla_cchWideChar) < 1)
+               return -1;
+
+       if (convert_utf16_to_utf8(c_cedilla_UTF16, c_cedilla_UTF8, c_cedilla_cbMultiByte) < 1)
+               return -1;
+       
+       /* English */
+
+       printf("English\n");
+
+       if (convert_utf8_to_utf16(en_Hello_UTF8, en_Hello_UTF16, en_Hello_cchWideChar) < 1)
+               return -1;
+       if (convert_utf8_to_utf16(en_HowAreYou_UTF8, en_HowAreYou_UTF16, en_HowAreYou_cchWideChar) < 1)
+               return -1;
 
-       /**
-        * int MultiByteToWideChar(
-        * _In_       UINT CodePage,
-        * _In_       DWORD dwFlags,
-        * _In_       LPCSTR lpMultiByteStr,
-        * _In_       int cbMultiByte,
-        * _Out_opt_  LPWSTR lpWideCharStr,
-        * _In_       int cchWideChar
-        * );
-        */
+       if (convert_utf16_to_utf8(en_Hello_UTF16, en_Hello_UTF8, en_Hello_cbMultiByte) < 1)
+               return -1;
+       if (convert_utf16_to_utf8(en_HowAreYou_UTF16, en_HowAreYou_UTF8, en_HowAreYou_cbMultiByte) < 1)
+               return -1;
 
-       /**
-        * The function returns 0 if it does not succeed.
-        */
+       /* French */
 
-       /**
-        * If the function succeeds and cchWideChar is 0, the return value is the
-        * required size, in characters, for the buffer indicated by lpWideCharStr.
-        */
+       printf("French\n");
 
-       if (convert_utf8_to_utf16(en_Hello_UTF8, en_Hello_UTF8_Length, en_Hello_UTF16, en_Hello_UTF16_Length) < 1)
+       if (convert_utf8_to_utf16(fr_Hello_UTF8, fr_Hello_UTF16, fr_Hello_cchWideChar) < 1)
                return -1;
-       if (convert_utf8_to_utf16(en_HowAreYou_UTF8, en_HowAreYou_UTF8_Length, en_HowAreYou_UTF16, en_HowAreYou_UTF16_Length) < 1)
+       if (convert_utf8_to_utf16(fr_HowAreYou_UTF8, fr_HowAreYou_UTF16, fr_HowAreYou_cchWideChar) < 1)
                return -1;
 
-       if (convert_utf8_to_utf16(fr_Hello_UTF8, fr_Hello_UTF8_Length, fr_Hello_UTF16, fr_Hello_UTF16_Length) < 1)
+       if (convert_utf16_to_utf8(fr_Hello_UTF16, fr_Hello_UTF8, fr_Hello_cbMultiByte) < 1)
                return -1;
-       if (convert_utf8_to_utf16(fr_HowAreYou_UTF8, fr_HowAreYou_UTF8_Length, fr_HowAreYou_UTF16, fr_HowAreYou_UTF16_Length) < 1)
+       if (convert_utf16_to_utf8(fr_HowAreYou_UTF16, fr_HowAreYou_UTF8, fr_HowAreYou_cbMultiByte) < 1)
                return -1;
 
-       if (convert_utf8_to_utf16(ru_Hello_UTF8, ru_Hello_UTF8_Length, ru_Hello_UTF16, ru_Hello_UTF16_Length) < 1)
+       /* Russian */
+
+       printf("Russian\n");
+
+       if (convert_utf8_to_utf16(ru_Hello_UTF8, ru_Hello_UTF16, ru_Hello_cchWideChar) < 1)
                return -1;
-       if (convert_utf8_to_utf16(ru_HowAreYou_UTF8, ru_HowAreYou_UTF8_Length, ru_HowAreYou_UTF16, ru_HowAreYou_UTF16_Length) < 1)
+       if (convert_utf8_to_utf16(ru_HowAreYou_UTF8, ru_HowAreYou_UTF16, ru_HowAreYou_cchWideChar) < 1)
                return -1;
 
-       if (convert_utf8_to_utf16(ar_Hello_UTF8, ar_Hello_UTF8_Length, ar_Hello_UTF16, ar_Hello_UTF16_Length) < 1)
+       if (convert_utf16_to_utf8(ru_Hello_UTF16, ru_Hello_UTF8, ru_Hello_cbMultiByte) < 1)
+               return -1;
+       if (convert_utf16_to_utf8(ru_HowAreYou_UTF16, ru_HowAreYou_UTF8, ru_HowAreYou_cbMultiByte) < 1)
+               return -1;
+
+       /* Arabic */
+
+       printf("Arabic\n");
+
+       if (convert_utf8_to_utf16(ar_Hello_UTF8, ar_Hello_UTF16, ar_Hello_cchWideChar) < 1)
+               return -1;
+       if (convert_utf8_to_utf16(ar_HowAreYou_UTF8, ar_HowAreYou_UTF16, ar_HowAreYou_cchWideChar) < 1)
+               return -1;
+
+       if (convert_utf16_to_utf8(ar_Hello_UTF16, ar_Hello_UTF8, ar_Hello_cbMultiByte) < 1)
+               return -1;
+       if (convert_utf16_to_utf8(ar_HowAreYou_UTF16, ar_HowAreYou_UTF8, ar_HowAreYou_cbMultiByte) < 1)
+               return -1;
+
+       /* Chinese */
+
+       printf("Chinese\n");
+
+       if (convert_utf8_to_utf16(ch_Hello_UTF8, ch_Hello_UTF16, ch_Hello_cchWideChar) < 1)
                return -1;
-       if (convert_utf8_to_utf16(ar_HowAreYou_UTF8, ar_HowAreYou_UTF8_Length, ar_HowAreYou_UTF16, ar_HowAreYou_UTF16_Length) < 1)
+       if (convert_utf8_to_utf16(ch_HowAreYou_UTF8, ch_HowAreYou_UTF16, ch_HowAreYou_cchWideChar) < 1)
                return -1;
 
-       if (convert_utf8_to_utf16(ch_Hello_UTF8, ch_Hello_UTF8_Length, ch_Hello_UTF16, ch_Hello_UTF16_Length) < 1)
+       if (convert_utf16_to_utf8(ch_Hello_UTF16, ch_Hello_UTF8, ch_Hello_cbMultiByte) < 1)
                return -1;
-       if (convert_utf8_to_utf16(ch_HowAreYou_UTF8, ch_HowAreYou_UTF8_Length, ch_HowAreYou_UTF16, ch_HowAreYou_UTF16_Length) < 1)
+       if (convert_utf16_to_utf8(ch_HowAreYou_UTF16, ch_HowAreYou_UTF8, ch_HowAreYou_cbMultiByte) < 1)
                return -1;
 
        return 0;