1 /********************************************************************
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
37 /*returns an action other than the one provided*/
38 #if !UCONFIG_NO_LEGACY_CONVERSION
39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
44 cnv_open(const char *name, UErrorCode *pErrorCode) {
45 if(name!=NULL && name[0]=='*') {
46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
48 return ucnv_open(name, pErrorCode);
53 static void ListNames(void);
54 static void TestFlushCache(void);
55 static void TestDuplicateAlias(void);
56 static void TestCCSID(void);
57 static void TestJ932(void);
58 static void TestJ1968(void);
59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestLMBCSMaxChar(void);
63 #if !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestConvertSafeCloneCallback(void);
67 static void TestEBCDICSwapLFNL(void);
68 static void TestConvertEx(void);
69 static void TestConvertExFromUTF8(void);
70 static void TestConvertExFromUTF8_C5F0(void);
71 static void TestConvertAlgorithmic(void);
72 void TestDefaultConverterError(void); /* defined in cctest.c */
73 void TestDefaultConverterSet(void); /* defined in cctest.c */
74 static void TestToUCountPending(void);
75 static void TestFromUCountPending(void);
76 static void TestDefaultName(void);
77 static void TestCompareNames(void);
78 static void TestSubstString(void);
79 static void InvalidArguments(void);
80 static void TestGetName(void);
81 static void TestUTFBOM(void);
83 void addTestConvert(TestNode** root);
85 void addTestConvert(TestNode** root)
87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
93 #if !UCONFIG_NO_LEGACY_CONVERSION
94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
121 static void ListNames(void) {
122 UErrorCode err = U_ZERO_ERROR;
123 int32_t testLong1 = 0;
124 const char* available_conv;
125 UEnumeration *allNamesEnum = NULL;
126 int32_t allNamesCount = 0;
129 log_verbose("Testing ucnv_openAllNames()...");
130 allNamesEnum = ucnv_openAllNames(&err);
132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
135 const char *string = NULL;
139 allNamesCount = uenum_count(allNamesEnum, &err);
140 while ((string = uenum_next(allNamesEnum, &len, &err))) {
142 log_verbose("read \"%s\", length %i\n", string, len);
144 if (U_FAILURE(err)) {
145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
148 uenum_reset(allNamesEnum, &err);
149 while ((string = uenum_next(allNamesEnum, &len, &err))) {
151 ucnv_close(ucnv_open(string, &err));
152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
155 if (count1 != count2) {
156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
159 uenum_close(allNamesEnum);
162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
164 log_verbose("Testing ucnv_countAvailable()...");
166 testLong1=ucnv_countAvailable();
167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
171 available_conv = ucnv_getAvailableName(testLong1);
172 /*test ucnv_getAvailableName with err condition*/
173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
174 available_conv = ucnv_getAvailableName(-1);
175 if(available_conv != NULL){
176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
179 /* Test ucnv_countAliases() etc. */
180 count = ucnv_countAliases("utf-8", &err);
182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
183 } else if(count <= 0) {
184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
186 /* try to get the aliases individually */
188 alias = ucnv_getAlias("utf-8", 0, &err);
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
191 } else if(strcmp("UTF-8", alias) != 0) {
192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
195 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
196 alias = ucnv_getAlias("utf-8", aliasNum, &err);
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
199 } else if(strlen(alias) > 20) {
201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
207 /* try to fill an array with all aliases */
208 const char **aliases;
209 aliases=(const char **)malloc(count * sizeof(const char *));
211 ucnv_getAliases("utf-8", aliases, &err);
213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
215 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
216 /* compare the pointers with the ones returned individually */
217 alias = ucnv_getAlias("utf-8", aliasNum, &err);
219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
220 } else if(aliases[aliasNum] != alias) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
225 free((char **)aliases);
233 static void TestConvert()
235 #if !UCONFIG_NO_LEGACY_CONVERSION
238 int32_t testLong1 = 0;
242 FILE* ucs_file_in = NULL;
244 UChar myUChar = 0x0000;
245 char* mytarget; /* [MAX_FILE_LEN] */
248 UChar* consumedUni = NULL;
249 char* consumed = NULL;
250 char* output_cp_buffer; /* [MAX_FILE_LEN] */
251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
252 UChar* ucs_file_buffer_use;
253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
254 UChar* my_ucs_file_buffer_1;
256 uint16_t codepage_index = 0;
258 UErrorCode err = U_ZERO_ERROR;
259 char ucs_file_name[UCS_FILE_NAME_SIZE];
260 UConverterFromUCallback MIA1, MIA1_2;
261 UConverterToUCallback MIA2, MIA2_2;
262 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
263 UConverter* someConverters[5];
264 UConverter* myConverter = 0;
265 UChar* displayname = 0;
272 int32_t targetcapacity2;
273 int32_t targetcapacity;
277 const UChar* tmp_ucs_buf;
278 const UChar* tmp_consumedUni=NULL;
279 const char* tmp_mytarget_use;
280 const char* tmp_consumed;
282 /******************************************************************
283 Checking Unicode -> ksc
284 ******************************************************************/
286 const char* CodePagesToTest[NUM_CODEPAGE] =
292 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
298 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
304 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
310 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
315 const char* CodePagesTestFiles[NUM_CODEPAGE] =
321 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
327 const char* CodePagesLocale[NUM_CODEPAGE] =
332 UConverterFromUCallback oldFromUAction = NULL;
333 UConverterToUCallback oldToUAction = NULL;
334 const void* oldFromUContext = NULL;
335 const void* oldToUContext = NULL;
337 /* Allocate memory */
338 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
339 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
340 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
341 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
343 ucs_file_buffer_use = ucs_file_buffer;
345 mytarget_use = mytarget;
346 my_ucs_file_buffer_1=my_ucs_file_buffer;
348 /* flush the converter cache to get a consistent state before the flushing is tested */
351 /*Testing ucnv_openU()*/
353 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
354 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
355 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
356 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
357 UChar illegalName[100];
358 UConverter *converter=NULL;
360 converter=ucnv_openU(converterName, &err);
362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
364 ucnv_close(converter);
366 converter=ucnv_openU(NULL, &err);
368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
370 ucnv_close(converter);
371 /*testing with error value*/
372 err=U_ILLEGAL_ARGUMENT_ERROR;
373 converter=ucnv_openU(converterName, &err);
374 if(!(converter == NULL)){
375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
377 ucnv_close(converter);
379 u_uastrcpy(illegalName, "");
380 u_uastrcpy(illegalName, illegalNameChars);
381 ucnv_openU(illegalName, &err);
382 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
387 ucnv_openU(firstSortedName, &err);
388 if(err!=U_FILE_ACCESS_ERROR){
389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
393 ucnv_openU(lastSortedName, &err);
394 if(err!=U_FILE_ACCESS_ERROR){
395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
402 UConverter *cnv=NULL;
404 cnv=ucnv_open("ibm-949,Madhu", &err);
406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
411 /*Testing ucnv_convert()*/
413 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
414 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
415 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
417 sourceLimit=sizeof(source)/sizeof(source[0]);
421 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
422 if(err == U_BUFFER_OVERFLOW_ERROR){
424 targetLimit=targetCapacity+1;
425 target=(char*)malloc(sizeof(char) * targetLimit);
426 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
432 for(i=0; i<targetCapacity; i++){
433 if(target[i] != expectedTarget[i]){
434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
438 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
439 if(U_FAILURE(err) || i!=7){
440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
441 u_errorName(err), i);
444 /*Test error conditions*/
446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
451 err=U_ILLEGAL_ARGUMENT_ERROR;
452 sourceLimit=sizeof(source)/sizeof(source[0]);
453 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
459 sourceLimit=sizeof(source)/sizeof(source[0]);
461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
462 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
472 err=U_ILLEGAL_ARGUMENT_ERROR;
473 if(ucnv_open(NULL, &err) != NULL){
474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
476 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
482 log_verbose("\n---Testing ucnv_open default...\n");
483 someConverters[0] = ucnv_open(NULL,&err);
484 someConverters[1] = ucnv_open(NULL,&err);
485 someConverters[2] = ucnv_open("utf8", &err);
486 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
488 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
490 /* Testing ucnv_getName()*/
491 /*default code page */
492 ucnv_getName(someConverters[0], &err);
494 log_data_err("getName[0] failed\n");
496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
498 ucnv_getName(someConverters[1], &err);
500 log_data_err("getName[1] failed\n");
502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
505 ucnv_close(someConverters[0]);
506 ucnv_close(someConverters[1]);
507 ucnv_close(someConverters[2]);
508 ucnv_close(someConverters[3]);
511 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
517 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
519 strcpy(ucs_file_name, loadTestData(&err));
522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
527 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
529 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
534 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
536 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
538 ucs_file_in = fopen(ucs_file_name,"rb");
541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
548 /* ucnv_flushCache(); */
549 myConverter =ucnv_open( "ibm-949", &err);
550 if (!myConverter || U_FAILURE(err))
552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
557 /*testing for ucnv_getName() */
558 log_verbose("Testing ucnv_getName()...\n");
559 ucnv_getName(myConverter, &err);
561 log_err("Error in getName\n");
564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
566 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
567 log_err("getName failed\n");
569 log_verbose("getName ok\n");
570 /*Test getName with error condition*/
573 err=U_ILLEGAL_ARGUMENT_ERROR;
574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
575 name=ucnv_getName(myConverter, &err);
577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
585 log_verbose("Testing ucnv_getMaxCharSize()...\n");
586 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
587 log_verbose("Max byte per character OK\n");
589 log_err("Max byte per character failed\n");
591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
592 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
593 log_verbose("Min byte per character OK\n");
595 log_err("Min byte per character failed\n");
598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
599 log_verbose("\n---Testing ucnv_getSubstChars...\n");
601 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
607 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
608 if (rest==CodePagesSubstitutionChars[codepage_index])
609 log_verbose("Substitution character ok\n");
611 log_err("Substitution character failed.\n");
613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
614 ucnv_setSubstChars(myConverter, myptr, ii, &err);
617 log_err("FAILURE! %s\n", myErrorName(err));
619 ucnv_getSubstChars(myConverter,save, &ii, &err);
622 log_err("FAILURE! %s\n", myErrorName(err));
625 if (strncmp(save, myptr, ii))
626 log_err("Saved substitution character failed\n");
628 log_verbose("Saved substitution character ok\n");
630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
633 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
634 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
639 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
641 ucnv_setSubstChars(myConverter, myptr, 0, &err);
642 if(err != U_ILLEGAL_ARGUMENT_ERROR){
643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
646 strcpy(myptr, "abc");
647 ucnv_setSubstChars(myConverter, myptr, ii, &err);
649 ucnv_getSubstChars(myConverter, save, &ii, &err);
650 if(strncmp(save, myptr, ii) == 0){
651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
655 strcpy(myptr, "abc");
656 ucnv_setSubstChars(myConverter, myptr, ii, &err);
657 err=U_ILLEGAL_ARGUMENT_ERROR;
658 ucnv_getSubstChars(myConverter, save, &ii, &err);
659 if(strncmp(save, myptr, ii) == 0){
660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
665 #ifdef U_ENABLE_GENERIC_ISO_2022
666 /*resetState ucnv_reset()*/
667 log_verbose("\n---Testing ucnv_reset()..\n");
668 ucnv_reset(myConverter);
671 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
673 UConverter *cnv=ucnv_open("ISO_2022", &err);
675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
677 c=ucnv_getNextUChar(cnv, &source, limit, &err);
678 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
688 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
689 locale=CodePagesLocale[codepage_index];
692 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
693 if(err==U_BUFFER_OVERFLOW_ERROR) {
695 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
696 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
701 log_verbose(" getDisplayName o.k.\n");
707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
709 /*test ucnv_getDiaplayName with error condition*/
710 err= U_ILLEGAL_ARGUMENT_ERROR;
711 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
715 /*test ucnv_getDiaplayName with error condition*/
717 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
718 if( len !=0 || U_SUCCESS(err)){
719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
724 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
727 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
728 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
730 log_err("FAILURE! %s\n", myErrorName(err));
733 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
734 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
735 log_err("get From UCallBack failed\n");
737 log_verbose("get From UCallBack ok\n");
739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
740 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
741 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
743 log_err("FAILURE! %s\n", myErrorName(err));
746 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
747 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
748 log_err("get From UCallBack action failed\n");
750 log_verbose("get From UCallBack action ok\n");
752 /*testing ucnv_setToUCallBack with error conditions*/
753 err=U_ILLEGAL_ARGUMENT_ERROR;
754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
755 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
756 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
757 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
764 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
766 log_verbose("\n---Testing setTo UCallBack...\n");
767 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
768 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
770 log_err("FAILURE! %s\n", myErrorName(err));
773 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
774 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
775 log_err("To UCallBack failed\n");
777 log_verbose("To UCallBack ok\n");
779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
780 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
781 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
782 { log_err("FAILURE! %s\n", myErrorName(err)); }
784 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
785 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
786 log_err("To UCallBack failed\n");
788 log_verbose("To UCallBack ok\n");
790 /*testing ucnv_setToUCallBack with error conditions*/
791 err=U_ILLEGAL_ARGUMENT_ERROR;
792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
793 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
794 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
795 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
801 /*getcodepageid testing ucnv_getCCSID() */
802 log_verbose("\n----Testing getCCSID....\n");
803 cp = ucnv_getCCSID(myConverter,&err);
806 log_err("FAILURE!..... %s\n", myErrorName(err));
808 if (cp != CodePageNumberToTest[codepage_index])
809 log_err("Codepage number test failed\n");
811 log_verbose("Codepage number test OK\n");
813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
814 err=U_ILLEGAL_ARGUMENT_ERROR;
815 if( ucnv_getCCSID(myConverter,&err) != -1){
816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
820 /*getCodepagePlatform testing ucnv_getPlatform()*/
821 log_verbose("\n---Testing getCodepagePlatform ..\n");
822 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
823 log_err("Platform codepage test failed\n");
825 log_verbose("Platform codepage test ok\n");
829 log_err("FAILURE! %s\n", myErrorName(err));
831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
832 err= U_ILLEGAL_ARGUMENT_ERROR;
833 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
840 fread(&BOM, sizeof(UChar), 1, ucs_file_in);
841 if (BOM!=0xFEFF && BOM!=0xFFFE)
843 log_err("File Missing BOM...Bailing!\n");
849 /*Reads in the file*/
850 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
852 myUChar = ucs_file_buffer[i-1];
854 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
857 myUChar = ucs_file_buffer[i-1];
858 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
861 /*testing ucnv_fromUChars() and ucnv_toUChars() */
862 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
864 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
865 u_uastrcpy(uchar1,"");
866 u_strncpy(uchar1,ucs_file_buffer,i);
869 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
870 u_uastrcpy(uchar3,"");
871 u_strncpy(uchar3,ucs_file_buffer,i);
874 /*Calls the Conversion Routine */
875 testLong1 = MAX_FILE_LEN;
876 log_verbose("\n---Testing ucnv_fromUChars()\n");
877 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
880 log_err("\nFAILURE...%s\n", myErrorName(err));
883 log_verbose(" ucnv_fromUChars() o.k.\n");
885 /*test the conversion routine */
886 log_verbose("\n---Testing ucnv_toUChars()\n");
887 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
889 targetsize = ucnv_toUChars(myConverter,
893 strlen(output_cp_buffer),
895 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
897 if(err==U_BUFFER_OVERFLOW_ERROR)
900 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
901 targetsize = ucnv_toUChars(myConverter,
905 strlen(output_cp_buffer),
909 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
911 log_verbose(" ucnv_toUChars() o.k.\n");
913 if(u_strcmp(uchar1,uchar2)!=0)
914 log_err("equality test failed with conversion routine\n");
918 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
920 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
921 err=U_ILLEGAL_ARGUMENT_ERROR;
922 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
923 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
924 if (targetcapacity !=0) {
925 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
928 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
929 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
930 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
931 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
934 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
935 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
936 if (targetcapacity !=0) {
937 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
939 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
940 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
941 if (err != U_BUFFER_OVERFLOW_ERROR) {
942 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
944 /*toUChars with error conditions*/
945 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err);
947 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
950 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err);
951 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
952 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
955 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
956 if (targetsize !=0) {
957 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
960 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err);
961 if (err != U_STRING_NOT_TERMINATED_WARNING) {
962 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
969 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
970 /*Clean up re-usable vars*/
971 log_verbose("Testing ucnv_fromUnicode().....\n");
972 tmp_ucs_buf=ucs_file_buffer_use;
973 ucnv_fromUnicode(myConverter, &mytarget_1,
974 mytarget + MAX_FILE_LEN,
976 ucs_file_buffer_use+i,
980 consumedUni = (UChar*)tmp_consumedUni;
981 (void)consumedUni; /* Suppress set but not used warning. */
985 log_err("FAILURE! %s\n", myErrorName(err));
988 log_verbose("ucnv_fromUnicode() o.k.\n");
990 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
991 log_verbose("Testing ucnv_toUnicode().....\n");
992 tmp_mytarget_use=mytarget_use;
993 tmp_consumed = consumed;
994 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
995 my_ucs_file_buffer + MAX_FILE_LEN,
997 mytarget_use + (mytarget_1 - mytarget),
1001 consumed = (char*)tmp_consumed;
1004 log_err("FAILURE! %s\n", myErrorName(err));
1007 log_verbose("ucnv_toUnicode() o.k.\n");
1010 log_verbose("\n---Testing RoundTrip ...\n");
1013 u_strncpy(uchar3, my_ucs_file_buffer,i);
1016 if(u_strcmp(uchar1,uchar3)==0)
1017 log_verbose("Equality test o.k.\n");
1019 log_err("Equality test failed\n");
1024 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1028 if(u_strcmp(uchar2, uchar3)==0)
1029 log_verbose("Equality test o.k.\n");
1031 log_err("Equality test failed\n");
1034 fclose(ucs_file_in);
1035 ucnv_close(myConverter);
1036 if (uchar1 != 0) free(uchar1);
1037 if (uchar2 != 0) free(uchar2);
1038 if (uchar3 != 0) free(uchar3);
1041 free((void*)mytarget);
1042 free((void*)output_cp_buffer);
1043 free((void*)ucs_file_buffer);
1044 free((void*)my_ucs_file_buffer);
1048 #if !UCONFIG_NO_LEGACY_CONVERSION
1049 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1051 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1054 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1056 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1060 static void TestFlushCache(void) {
1061 #if !UCONFIG_NO_LEGACY_CONVERSION
1062 UErrorCode err = U_ZERO_ERROR;
1063 UConverter* someConverters[5];
1066 /* flush the converter cache to get a consistent state before the flushing is tested */
1069 /*Testing ucnv_open()*/
1070 /* Note: These converters have been chosen because they do NOT
1071 encode the Latin characters (U+0041, ...), and therefore are
1072 highly unlikely to be chosen as system default codepages */
1074 someConverters[0] = ucnv_open("ibm-1047", &err);
1075 if (U_FAILURE(err)) {
1076 log_data_err("FAILURE! %s\n", myErrorName(err));
1079 someConverters[1] = ucnv_open("ibm-1047", &err);
1080 if (U_FAILURE(err)) {
1081 log_data_err("FAILURE! %s\n", myErrorName(err));
1084 someConverters[2] = ucnv_open("ibm-1047", &err);
1085 if (U_FAILURE(err)) {
1086 log_data_err("FAILURE! %s\n", myErrorName(err));
1089 someConverters[3] = ucnv_open("gb18030", &err);
1090 if (U_FAILURE(err)) {
1091 log_data_err("FAILURE! %s\n", myErrorName(err));
1094 someConverters[4] = ucnv_open("ibm-954", &err);
1095 if (U_FAILURE(err)) {
1096 log_data_err("FAILURE! %s\n", myErrorName(err));
1100 /* Testing ucnv_flushCache() */
1101 log_verbose("\n---Testing ucnv_flushCache...\n");
1102 if ((flushCount=ucnv_flushCache())==0)
1103 log_verbose("Flush cache ok\n");
1105 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1107 /*testing ucnv_close() and ucnv_flushCache() */
1108 ucnv_close(someConverters[0]);
1109 ucnv_close(someConverters[1]);
1111 if ((flushCount=ucnv_flushCache())==0)
1112 log_verbose("Flush cache ok\n");
1114 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1116 ucnv_close(someConverters[2]);
1117 ucnv_close(someConverters[3]);
1119 if ((flushCount=ucnv_flushCache())==2)
1120 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1122 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1126 ucnv_close(someConverters[4]);
1127 if ( (flushCount=ucnv_flushCache())==1)
1128 log_verbose("Flush cache ok\n");
1130 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1135 * Test the converter alias API, specifically the fuzzy matching of
1136 * alias names and the alias table integrity. Make sure each
1137 * converter has at least one alias (itself), and that its listed
1138 * aliases map back to itself. Check some hard-coded UTF-8 and
1139 * ISO_2022 aliases to make sure they work.
1141 static void TestAlias() {
1143 UErrorCode status = U_ZERO_ERROR;
1145 /* Predetermined aliases that we expect to map back to ISO_2022
1146 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1147 const char* ISO_2022_NAMES[] =
1148 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1149 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1150 int32_t ISO_2022_NAMES_LENGTH =
1151 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]);
1152 const char *UTF8_NAMES[] =
1153 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1154 "utf_8", "ibm1208", "cp1208" };
1155 int32_t UTF8_NAMES_LENGTH =
1156 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]);
1161 } CONVERTERS_NAMES[] = {
1162 { "UTF-32BE", "UTF32_BigEndian" },
1163 { "UTF-32LE", "UTF32_LittleEndian" },
1164 { "UTF-32", "ISO-10646-UCS-4" },
1165 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1166 { "UTF-32", "ucs-4" }
1168 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES);
1170 /* When there are bugs in gencnval or in ucnv_io, converters can
1171 appear to have no aliases. */
1172 ncnv = ucnv_countAvailable();
1173 log_verbose("%d converters\n", ncnv);
1174 for (i=0; i<ncnv; ++i) {
1175 const char *name = ucnv_getAvailableName(i);
1177 uint16_t na = ucnv_countAliases(name, &status);
1182 log_err("FAIL: Converter \"%s\" (i=%d)"
1183 " has no aliases; expect at least one\n",
1187 cnv = ucnv_open(name, &status);
1188 if (U_FAILURE(status)) {
1189 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1190 " can't be opened.\n",
1194 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1195 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1196 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1197 "The should be the same\n",
1198 name, ucnv_getName(cnv, &status));
1203 status = U_ZERO_ERROR;
1204 alias0 = ucnv_getAlias(name, 0, &status);
1205 for (j=1; j<na; ++j) {
1207 /* Make sure each alias maps back to the the same list of
1208 aliases. Assume that if alias 0 is the same, the whole
1209 list is the same (this should always be true). */
1210 const char *mapBack;
1212 status = U_ZERO_ERROR;
1213 alias = ucnv_getAlias(name, j, &status);
1214 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1215 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1218 if (alias == NULL) {
1219 log_err("FAIL: Converter \"%s\" -> "
1225 mapBack = ucnv_getAlias(alias, 0, &status);
1227 if (mapBack == NULL) {
1228 log_err("FAIL: Converter \"%s\" -> "
1229 "alias[%d]=\"%s\" -> "
1230 "alias[0]=NULL, exp. \"%s\"\n",
1231 name, j, alias, alias0);
1235 if (0 != strcmp(alias0, mapBack)) {
1237 UBool foundAlias = FALSE;
1238 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1239 /* Make sure that we only get this mismapping when there is
1240 an ambiguous alias, and the other converter has this alias too. */
1241 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1242 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1248 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1251 log_err("FAIL: Converter \"%s\" -> "
1252 "alias[%d]=\"%s\" -> "
1253 "alias[0]=\"%s\", exp. \"%s\"\n",
1254 name, j, alias, mapBack, alias0);
1261 /* Check a list of predetermined aliases that we expect to map
1262 * back to ISO_2022 and UTF-8. */
1263 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1264 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1266 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1269 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1270 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1271 ISO_2022_NAMES[i], mapBack);
1276 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1277 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1279 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1282 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1283 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1284 UTF8_NAMES[i], mapBack);
1289 * Check a list of predetermined aliases that we expect to map
1290 * back to predermined converter names.
1293 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1294 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1296 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1299 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1300 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1301 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1307 static void TestDuplicateAlias(void) {
1309 UErrorCode status = U_ZERO_ERROR;
1311 status = U_ZERO_ERROR;
1312 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1313 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1314 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1316 status = U_ZERO_ERROR;
1317 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1318 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1319 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1321 status = U_ZERO_ERROR;
1322 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1323 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1324 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1329 /* Test safe clone callback */
1331 static uint32_t TSCC_nextSerial()
1333 static uint32_t n = 1;
1340 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1341 uint32_t serial; /* minted from nextSerial, above */
1342 UBool wasClosed; /* close happened on the object */
1345 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1347 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1349 newCtx->serial = TSCC_nextSerial();
1350 newCtx->wasClosed = 0;
1351 newCtx->magic = 0xC0FFEE;
1353 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1358 #if !UCONFIG_NO_LEGACY_CONVERSION
1359 static void TSCC_fromU(const void *context,
1360 UConverterFromUnicodeArgs *fromUArgs,
1361 const UChar* codeUnits,
1364 UConverterCallbackReason reason,
1367 TSCCContext *ctx = (TSCCContext*)context;
1368 UConverterFromUCallback junkFrom;
1370 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1372 if(ctx->magic != 0xC0FFEE) {
1373 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1377 if(reason == UCNV_CLONE) {
1378 UErrorCode subErr = U_ZERO_ERROR;
1379 TSCCContext *newCtx;
1380 TSCCContext *junkCtx;
1381 TSCCContext **pjunkCtx = &junkCtx;
1384 log_verbose("TSCC_fromU: cloning..\n");
1385 newCtx = TSCC_clone(ctx);
1387 if(newCtx == NULL) {
1388 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1392 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1393 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1395 if(U_FAILURE(subErr)) {
1400 if(reason == UCNV_CLOSE) {
1401 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1402 ctx->wasClosed = TRUE;
1406 static void TSCC_toU(const void *context,
1407 UConverterToUnicodeArgs *toUArgs,
1408 const char* codeUnits,
1410 UConverterCallbackReason reason,
1413 TSCCContext *ctx = (TSCCContext*)context;
1414 UConverterToUCallback junkFrom;
1416 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1418 if(ctx->magic != 0xC0FFEE) {
1419 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1423 if(reason == UCNV_CLONE) {
1424 UErrorCode subErr = U_ZERO_ERROR;
1425 TSCCContext *newCtx;
1426 TSCCContext *junkCtx;
1427 TSCCContext **pjunkCtx = &junkCtx;
1430 log_verbose("TSCC_toU: cloning..\n");
1431 newCtx = TSCC_clone(ctx);
1433 if(newCtx == NULL) {
1434 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1438 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1439 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1441 if(U_FAILURE(subErr)) {
1446 if(reason == UCNV_CLOSE) {
1447 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1448 ctx->wasClosed = TRUE;
1452 static void TSCC_init(TSCCContext *q)
1454 q->magic = 0xC0FFEE;
1455 q->serial = TSCC_nextSerial();
1459 static void TSCC_print_log(TSCCContext *q, const char *name)
1462 log_verbose("TSCContext: %s is NULL!!\n", name);
1464 if(q->magic != 0xC0FFEE) {
1465 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1466 q,q->serial, q->magic);
1468 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1469 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1473 static void TestConvertSafeCloneCallback()
1475 UErrorCode err = U_ZERO_ERROR;
1476 TSCCContext from1, to1;
1477 TSCCContext *from2, *from3, *to2, *to3;
1478 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1480 int32_t hunkSize = 8192;
1481 UConverterFromUCallback junkFrom;
1482 UConverterToUCallback junkTo;
1483 UConverter *conv1, *conv2 = NULL;
1485 conv1 = ucnv_open("iso-8859-3", &err);
1487 if(U_FAILURE(err)) {
1488 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1492 log_verbose("Opened conv1=%p\n", conv1);
1497 TSCC_print_log(&from1, "from1");
1498 TSCC_print_log(&to1, "to1");
1500 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1501 log_verbose("Set from1 on conv1\n");
1502 TSCC_print_log(&from1, "from1");
1504 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1505 log_verbose("Set to1 on conv1\n");
1506 TSCC_print_log(&to1, "to1");
1508 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1509 if(U_FAILURE(err)) {
1510 log_err("safeClone failed: %s\n", u_errorName(err));
1513 log_verbose("Cloned to conv2=%p.\n", conv2);
1515 /********** from *********************/
1516 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1517 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1519 TSCC_print_log(from2, "from2");
1520 TSCC_print_log(from3, "from3(==from1)");
1523 log_err("FAIL! from2 is null \n");
1528 log_err("FAIL! from3 is null \n");
1532 if(from3 != (&from1) ) {
1533 log_err("FAIL! conv1's FROM context changed!\n");
1536 if(from2 == (&from1) ) {
1537 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1540 if(from1.wasClosed) {
1541 log_err("FAIL! from1 is closed \n");
1544 if(from2->wasClosed) {
1545 log_err("FAIL! from2 was closed\n");
1548 /********** to *********************/
1549 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1550 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1552 TSCC_print_log(to2, "to2");
1553 TSCC_print_log(to3, "to3(==to1)");
1556 log_err("FAIL! to2 is null \n");
1561 log_err("FAIL! to3 is null \n");
1565 if(to3 != (&to1) ) {
1566 log_err("FAIL! conv1's TO context changed!\n");
1569 if(to2 == (&to1) ) {
1570 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1574 log_err("FAIL! to1 is closed \n");
1577 if(to2->wasClosed) {
1578 log_err("FAIL! to2 was closed\n");
1581 /*************************************/
1584 log_verbose("ucnv_closed (conv1)\n");
1585 TSCC_print_log(&from1, "from1");
1586 TSCC_print_log(from2, "from2");
1587 TSCC_print_log(&to1, "to1");
1588 TSCC_print_log(to2, "to2");
1590 if(from1.wasClosed == FALSE) {
1591 log_err("FAIL! from1 is NOT closed \n");
1594 if(from2->wasClosed) {
1595 log_err("FAIL! from2 was closed\n");
1598 if(to1.wasClosed == FALSE) {
1599 log_err("FAIL! to1 is NOT closed \n");
1602 if(to2->wasClosed) {
1603 log_err("FAIL! to2 was closed\n");
1607 log_verbose("ucnv_closed (conv2)\n");
1609 TSCC_print_log(&from1, "from1");
1610 TSCC_print_log(from2, "from2");
1612 if(from1.wasClosed == FALSE) {
1613 log_err("FAIL! from1 is NOT closed \n");
1616 if(from2->wasClosed == FALSE) {
1617 log_err("FAIL! from2 was NOT closed\n");
1620 TSCC_print_log(&to1, "to1");
1621 TSCC_print_log(to2, "to2");
1623 if(to1.wasClosed == FALSE) {
1624 log_err("FAIL! to1 is NOT closed \n");
1627 if(to2->wasClosed == FALSE) {
1628 log_err("FAIL! to2 was NOT closed\n");
1632 free(to2); /* to1 is stack based */
1634 if(from2 != (&from1)) {
1635 free(from2); /* from1 is stack based */
1641 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1652 static void TestConvertSafeClone()
1654 /* one 'regular' & all the 'private stateful' converters */
1655 static const char *const names[] = {
1656 #if !UCONFIG_NO_LEGACY_CONVERSION
1658 "ISO_2022,locale=zh,version=1",
1661 #if !UCONFIG_NO_LEGACY_CONVERSION
1665 "ISO_2022,locale=kr,version=1",
1666 "ISO_2022,locale=jp,version=2",
1670 #if !UCONFIG_NO_LEGACY_CONVERSION
1671 "IMAP-mailbox-name",
1678 /* store the actual sizes of each converter */
1679 int32_t actualSizes[LENGTHOF(names)];
1681 static const int32_t bufferSizes[] = {
1682 U_CNV_SAFECLONE_BUFFERSIZE,
1683 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1684 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1687 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1688 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1689 int32_t bufferSize, maxBufferSize;
1690 const char *maxName;
1691 UConverter * cnv, *cnv2;
1695 const char *pConstCharBuffer;
1696 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer);
1697 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1698 UChar uniCharBuffer[20];
1699 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1700 const char *pCharSource = charSourceBuffer;
1701 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1702 UChar *pUCharTarget = uniCharBuffer;
1703 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
1704 const UChar * pUniBuffer;
1705 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
1709 cnv = ucnv_open(names[0], &err);
1710 if(U_SUCCESS(err)) {
1711 /* Check the various error & informational states: */
1713 /* Null status - just returns NULL */
1714 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1715 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
1717 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1719 /* error status - should return 0 & keep error the same */
1720 err = U_MEMORY_ALLOCATION_ERROR;
1721 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1723 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1727 /* Null buffer size pointer is ok */
1728 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
1730 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1737 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1741 /* Verify our define is large enough */
1742 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1744 log_err("FAIL: Pre-calculated buffer size is too small\n");
1746 /* Verify we can use this run-time calculated size */
1747 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1749 log_err("FAIL: Converter can't be cloned with run-time size\n");
1755 /* size one byte too small - should allocate & let us know */
1757 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1766 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1769 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1780 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1793 for(j = 0; j < LENGTHOF(bufferSizes); ++j) {
1794 for (idx = 0; idx < LENGTHOF(names); idx++)
1797 cnv = ucnv_open(names[idx], &err);
1798 if(U_FAILURE(err)) {
1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1804 /* preflight to get maxBufferSize */
1805 actualSizes[idx] = 0;
1806 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1807 if(actualSizes[idx] > maxBufferSize) {
1808 maxBufferSize = actualSizes[idx];
1809 maxName = names[idx];
1813 memset(buffer, 0xaa, sizeof(buffer));
1815 bufferSize = bufferSizes[j];
1816 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1818 /* close the original immediately to make sure that the clone works by itself */
1821 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
1822 err == U_SAFECLONE_ALLOCATED_WARNING
1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1828 if(bufferSize <= bufferSizes[j]) {
1829 /* used the stack buffer */
1830 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1831 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1834 names[idx], bufferSize, bufferSizes[j]);
1837 /* heap-allocated the clone */
1838 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1840 names[idx], bufferSize, bufferSizes[j]);
1844 pCharBuffer = charBuffer;
1845 pUniBuffer = uniBuffer;
1847 ucnv_fromUnicode(cnv2,
1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1858 ucnv_toUnicode(cnv2,
1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1872 pConstCharBuffer = charBuffer;
1873 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1882 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1883 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1885 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1889 static void TestCCSID() {
1890 #if !UCONFIG_NO_LEGACY_CONVERSION
1892 UErrorCode errorCode;
1893 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1896 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) {
1899 errorCode=U_ZERO_ERROR;
1900 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
1901 if(U_FAILURE(errorCode)) {
1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
1906 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
1910 /* skip gb18030(ccsid 1392) */
1911 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
1920 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1922 /* CHUNK_SIZE defined in common\ucnv.c: */
1923 #define CHUNK_SIZE 1024
1925 static void bug1(void);
1926 static void bug2(void);
1927 static void bug3(void);
1932 bug1(); /* Unicode intermediate buffer straddle bug */
1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1938 * jitterbug 932: test chunking boundary conditions in
1940 int32_t ucnv_convert(const char *toConverterName,
1941 const char *fromConverterName,
1948 * See discussions on the icu mailing list in
1949 * 2001-April with the subject "converter 'flush' question".
1951 * Bug report and test code provided by Edward J. Batutis.
1955 #if !UCONFIG_NO_LEGACY_CONVERSION
1956 char char_in[CHUNK_SIZE+32];
1957 char char_out[CHUNK_SIZE*2];
1959 /* GB 18030 equivalent of U+10000 is 90308130 */
1960 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
1962 UErrorCode err = U_ZERO_ERROR;
1963 int32_t i, test_seq_len = sizeof(test_seq);
1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1968 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1970 * other Unicode sequences cause a bug since combining sequences are not supported by the
1974 for (i = test_seq_len; i >= 0; i--) {
1975 /* put character sequence into input buffer */
1976 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
1977 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
1979 /* do the conversion */
1980 ucnv_convert("us-ascii", /* out */
1989 if (err == U_TRUNCATED_CHAR_FOUND) {
1990 /* this happens when surrogate pair straddles the intermediate buffer in
1991 * T_UConverter_fromCodepageToCodepage */
1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2001 /* US-ASCII "1234567890" */
2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2003 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2004 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2005 0x00, 0x00, 0x00, 0x31,
2006 0x00, 0x00, 0x00, 0x32,
2007 0x00, 0x00, 0x00, 0x33,
2008 0x00, 0x00, 0x00, 0x34,
2009 0x00, 0x00, 0x00, 0x35,
2010 0x00, 0x00, 0x00, 0x36,
2011 0x00, 0x00, 0x00, 0x37,
2012 0x00, 0x00, 0x00, 0x38,
2013 0x00, 0x00, (char)0xf0, 0x00};
2014 static char target[5];
2016 UErrorCode err = U_ZERO_ERROR;
2019 /* do the conversion */
2020 size = ucnv_convert("iso-8859-1", /* out */
2021 "us-ascii", /* in */
2029 /* bug2: size is 5, should be 10 */
2030 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2034 /* do the conversion */
2035 size = ucnv_convert("UTF-32BE", /* out */
2044 /* bug2: size is 5, should be 32 */
2045 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2049 /* do the conversion */
2050 size = ucnv_convert("UTF-8", /* out */
2051 "UTF-32BE", /* in */
2055 sizeof(sourceUTF32),
2059 /* bug2: size is 5, should be 12 */
2060 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2065 * bug3: when the characters expand going from source to target codepage
2066 * you get bug3 in addition to bug2
2070 #if !UCONFIG_NO_LEGACY_CONVERSION
2071 char char_in[CHUNK_SIZE*4];
2073 UErrorCode err = U_ZERO_ERROR;
2077 * first get the buggy size from bug2 then
2078 * compare it to buggy size with an expansion
2080 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2082 /* do the conversion */
2083 size = ucnv_convert("lmbcs", /* out */
2084 "us-ascii", /* in */
2091 if ( size != sizeof(char_in) ) {
2093 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2094 * in the converter?), should be CHUNK_SIZE*4
2096 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2098 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2102 * now do the conversion with expansion
2103 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2105 memset(char_in, 8, sizeof(char_in));
2108 /* do the conversion */
2109 size = ucnv_convert("lmbcs", /* out */
2110 "us-ascii", /* in */
2117 /* expect 2X expansion */
2118 if ( size != sizeof(char_in) * 2 ) {
2121 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2123 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2129 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2130 const char *src, int32_t srcLength,
2131 const char *expectTarget, int32_t expectTargetLength,
2133 const char *testName,
2134 UErrorCode expectCode) {
2135 UChar pivotBuffer[CHUNK_SIZE];
2136 UChar *pivotSource, *pivotTarget;
2137 const UChar *pivotLimit;
2139 char targetBuffer[CHUNK_SIZE];
2141 const char *srcLimit, *finalSrcLimit, *targetLimit;
2143 int32_t targetLength;
2147 UErrorCode errorCode;
2150 if(chunkSize>CHUNK_SIZE) {
2151 chunkSize=CHUNK_SIZE;
2154 pivotSource=pivotTarget=pivotBuffer;
2155 pivotLimit=pivotBuffer+chunkSize;
2157 finalSrcLimit=src+srcLength;
2158 target=targetBuffer;
2159 targetLimit=targetBuffer+chunkSize;
2161 ucnv_resetToUnicode(srcCnv);
2162 ucnv_resetFromUnicode(targetCnv);
2164 errorCode=U_ZERO_ERROR;
2167 /* convert, streaming-style (both converters and pivot keep state) */
2169 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2170 if(src+chunkSize<=finalSrcLimit) {
2171 srcLimit=src+chunkSize;
2173 srcLimit=finalSrcLimit;
2175 ucnv_convertEx(targetCnv, srcCnv,
2176 &target, targetLimit,
2178 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2179 FALSE, flush, &errorCode);
2180 targetLength=(int32_t)(target-targetBuffer);
2181 if(target>targetLimit) {
2182 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2183 testName, chunkSize, target, targetLimit);
2184 break; /* TODO: major problem! */
2186 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2187 /* continue converting another chunk */
2188 errorCode=U_ZERO_ERROR;
2189 if(targetLength+chunkSize<=sizeof(targetBuffer)) {
2190 targetLimit=target+chunkSize;
2192 targetLimit=targetBuffer+sizeof(targetBuffer);
2194 } else if(U_FAILURE(errorCode)) {
2200 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2201 /* all consumed, now flush without input (separate from conversion for testing) */
2206 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2207 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2208 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2209 } else if(targetLength!=expectTargetLength) {
2210 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2211 testName, chunkSize, targetLength, expectTargetLength);
2212 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2213 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2214 testName, chunkSize);
2219 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2220 const char *src, int32_t srcLength,
2221 const char *expectTarget, int32_t expectTargetLength,
2222 const char *testName,
2223 UErrorCode expectCode) {
2224 convertExStreaming(srcCnv, targetCnv,
2226 expectTarget, expectTargetLength,
2227 1, testName, expectCode);
2228 convertExStreaming(srcCnv, targetCnv,
2230 expectTarget, expectTargetLength,
2231 3, testName, expectCode);
2232 convertExStreaming(srcCnv, targetCnv,
2234 expectTarget, expectTargetLength,
2235 7, testName, expectCode);
2238 static void TestConvertEx() {
2239 #if !UCONFIG_NO_LEGACY_CONVERSION
2240 static const uint8_t
2242 /* 4e00 30a1 ff61 0410 */
2243 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2246 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2250 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2251 * SUB, SUB, 0x40, SUB, SUB, 0x40
2253 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2256 char srcBuffer[100], targetBuffer[100];
2261 UChar pivotBuffer[100];
2262 UChar *pivotSource, *pivotTarget;
2264 UConverter *cnv1, *cnv2;
2265 UErrorCode errorCode;
2267 errorCode=U_ZERO_ERROR;
2268 cnv1=ucnv_open("UTF-8", &errorCode);
2269 if(U_FAILURE(errorCode)) {
2270 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2274 cnv2=ucnv_open("Shift-JIS", &errorCode);
2275 if(U_FAILURE(errorCode)) {
2276 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2281 /* test ucnv_convertEx() with streaming conversion style */
2282 convertExMultiStreaming(cnv1, cnv2,
2283 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2284 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2286 convertExMultiStreaming(cnv2, cnv1,
2287 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2288 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2290 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2291 convertExMultiStreaming(cnv1, cnv2,
2292 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2293 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2295 /* test some simple conversions */
2297 /* NUL-terminated source and target */
2298 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2299 memcpy(srcBuffer, utf8, sizeof(utf8));
2300 srcBuffer[sizeof(utf8)]=0;
2302 target=targetBuffer;
2303 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2304 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2305 if( errorCode!=U_ZERO_ERROR ||
2306 target-targetBuffer!=sizeof(shiftJIS) ||
2308 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2310 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2311 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2314 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2315 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2316 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2318 target=targetBuffer;
2319 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2320 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2321 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2322 target-targetBuffer!=sizeof(shiftJIS) ||
2323 *target!=(char)0xff ||
2324 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2326 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2327 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2331 errorCode=U_MESSAGE_PARSE_ERROR;
2333 target=targetBuffer;
2334 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2335 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2336 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2337 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2340 /* pivotLimit==pivotStart */
2341 errorCode=U_ZERO_ERROR;
2342 pivotSource=pivotTarget=pivotBuffer;
2343 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2344 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode);
2345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2346 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2349 /* *pivotSource==NULL */
2350 errorCode=U_ZERO_ERROR;
2352 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2353 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2354 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2355 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2359 errorCode=U_ZERO_ERROR;
2361 pivotSource=pivotBuffer;
2362 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2363 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2364 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2365 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2368 /* streaming conversion without a pivot buffer */
2369 errorCode=U_ZERO_ERROR;
2371 pivotSource=pivotBuffer;
2372 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2373 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode);
2374 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2375 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2383 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2384 static const char *const badUTF8[]={
2388 /* truncated multi-byte sequences */
2425 "\xfc\x80\x80\x80\x80",
2427 /* complete sequences but non-shortest forms or out of range etc. */
2433 "\xf8\x80\x80\x80\x80",
2434 "\xfc\x80\x80\x80\x80\x80",
2439 #define ARG_CHAR_ARR_SIZE 8
2441 /* get some character that can be converted and convert it */
2442 static UBool getTestChar(UConverter *cnv, const char *converterName,
2443 char charUTF8[4], int32_t *pCharUTF8Length,
2444 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2445 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2446 UChar utf16[U16_MAX_LENGTH];
2447 int32_t utf16Length;
2449 const UChar *utf16Source;
2454 UErrorCode errorCode;
2456 errorCode=U_ZERO_ERROR;
2457 set=uset_open(1, 0);
2458 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2459 c=uset_charAt(set, uset_size(set)/2);
2463 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2465 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2469 ucnv_fromUnicode(cnv,
2470 &target, char0+ARG_CHAR_ARR_SIZE,
2471 &utf16Source, utf16+utf16Length,
2472 NULL, FALSE, &errorCode);
2473 *pChar0Length=(int32_t)(target-char0);
2477 ucnv_fromUnicode(cnv,
2478 &target, char1+ARG_CHAR_ARR_SIZE,
2479 &utf16Source, utf16+utf16Length,
2480 NULL, FALSE, &errorCode);
2481 *pChar1Length=(int32_t)(target-char1);
2483 if(U_FAILURE(errorCode)) {
2484 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2490 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2491 char charUTF8[4], int32_t charUTF8Length,
2492 char char0[8], int32_t char0Length,
2493 char char1[8], int32_t char1Length) {
2498 int32_t outputLength;
2500 char invalidChars[8];
2501 int8_t invalidLength;
2506 UChar pivotBuffer[8];
2507 UChar *pivotSource, *pivotTarget;
2509 UErrorCode errorCode;
2512 /* test truncated sequences */
2513 errorCode=U_ZERO_ERROR;
2514 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2516 memcpy(utf8, charUTF8, charUTF8Length);
2518 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2519 /* truncated sequence? */
2520 int32_t length=strlen(badUTF8[i]);
2521 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) {
2525 /* assemble a string with the test character and the truncated sequence */
2526 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2527 utf8Length=charUTF8Length+length;
2529 /* convert and check the invalidChars */
2532 pivotSource=pivotTarget=pivotBuffer;
2533 errorCode=U_ZERO_ERROR;
2534 ucnv_convertEx(cnv, utf8Cnv,
2535 &target, output+sizeof(output),
2536 &source, utf8+utf8Length,
2537 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2538 TRUE, TRUE, /* reset & flush */
2540 outputLength=(int32_t)(target-output);
2541 (void)outputLength; /* Suppress set but not used warning. */
2542 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2543 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2547 errorCode=U_ZERO_ERROR;
2548 invalidLength=(int8_t)sizeof(invalidChars);
2549 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2550 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2551 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2556 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2557 char charUTF8[4], int32_t charUTF8Length,
2558 char char0[8], int32_t char0Length,
2559 char char1[8], int32_t char1Length) {
2560 char utf8[600], expect[600];
2561 int32_t utf8Length, expectLength;
2565 UErrorCode errorCode;
2568 errorCode=U_ZERO_ERROR;
2569 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2572 * assemble an input string with the test character between each
2574 * and an expected string with repeated test character output
2576 memcpy(utf8, charUTF8, charUTF8Length);
2577 utf8Length=charUTF8Length;
2579 memcpy(expect, char0, char0Length);
2580 expectLength=char0Length;
2582 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2583 int32_t length=strlen(badUTF8[i]);
2584 memcpy(utf8+utf8Length, badUTF8[i], length);
2587 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2588 utf8Length+=charUTF8Length;
2590 memcpy(expect+expectLength, char1, char1Length);
2591 expectLength+=char1Length;
2594 /* expect that each bad UTF-8 sequence is detected and skipped */
2595 strcpy(testName, "from bad UTF-8 to ");
2596 strcat(testName, converterName);
2598 convertExMultiStreaming(utf8Cnv, cnv,
2600 expect, expectLength,
2605 /* Test illegal UTF-8 input. */
2606 static void TestConvertExFromUTF8() {
2607 static const char *const converterNames[]={
2608 #if !UCONFIG_NO_LEGACY_CONVERSION
2617 UConverter *utf8Cnv, *cnv;
2618 UErrorCode errorCode;
2621 /* fromUnicode versions of some character, from initial state and later */
2622 char charUTF8[4], char0[8], char1[8];
2623 int32_t charUTF8Length, char0Length, char1Length;
2625 errorCode=U_ZERO_ERROR;
2626 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2627 if(U_FAILURE(errorCode)) {
2628 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2632 for(i=0; i<LENGTHOF(converterNames); ++i) {
2633 errorCode=U_ZERO_ERROR;
2634 cnv=ucnv_open(converterNames[i], &errorCode);
2635 if(U_FAILURE(errorCode)) {
2636 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2639 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2642 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2643 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2646 ucnv_close(utf8Cnv);
2649 static void TestConvertExFromUTF8_C5F0() {
2650 static const char *const converterNames[]={
2651 #if !UCONFIG_NO_LEGACY_CONVERSION
2660 UConverter *utf8Cnv, *cnv;
2661 UErrorCode errorCode;
2664 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2665 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2666 static const char twoNCRs[16]={
2667 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2668 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2670 static const char twoFFFD[6]={
2671 (char)0xef, (char)0xbf, (char)0xbd,
2672 (char)0xef, (char)0xbf, (char)0xbd
2674 const char *expected;
2675 int32_t expectedLength;
2676 char dest[20]; /* longer than longest expectedLength */
2681 UChar pivotBuffer[128];
2682 UChar *pivotSource, *pivotTarget;
2684 errorCode=U_ZERO_ERROR;
2685 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2686 if(U_FAILURE(errorCode)) {
2687 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2691 for(i=0; i<LENGTHOF(converterNames); ++i) {
2692 errorCode=U_ZERO_ERROR;
2693 cnv=ucnv_open(converterNames[i], &errorCode);
2694 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2695 NULL, NULL, &errorCode);
2696 if(U_FAILURE(errorCode)) {
2697 log_data_err("unable to open %s converter - %s\n",
2698 converterNames[i], u_errorName(errorCode));
2703 uprv_memset(dest, 9, sizeof(dest));
2704 if(i==LENGTHOF(converterNames)-1) {
2705 /* conversion to UTF-8 yields two U+FFFD directly */
2709 /* conversion to a non-Unicode charset yields two NCRs */
2716 pivotSource=pivotTarget=pivotBuffer;
2719 &target, dest+expectedLength,
2720 &src, bad_utf8+sizeof(bad_utf8),
2721 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2722 TRUE, TRUE, &errorCode);
2723 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2724 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2725 dest[expectedLength]!=9
2727 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2731 ucnv_close(utf8Cnv);
2735 TestConvertAlgorithmic() {
2736 #if !UCONFIG_NO_LEGACY_CONVERSION
2737 static const uint8_t
2739 /* 4e00 30a1 ff61 0410 */
2740 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2743 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2747 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2748 * SUB, SUB, 0x40, SUB, SUB, 0x40
2750 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2753 0xfe, 0xff /* BOM only, no text */
2756 0xff, 0xfe, 0, 0 /* BOM only, no text */
2759 char target[100], utf8NUL[100], shiftJISNUL[100];
2762 UErrorCode errorCode;
2766 errorCode=U_ZERO_ERROR;
2767 cnv=ucnv_open("Shift-JIS", &errorCode);
2768 if(U_FAILURE(errorCode)) {
2769 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2774 memcpy(utf8NUL, utf8, sizeof(utf8));
2775 utf8NUL[sizeof(utf8)]=0;
2776 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2777 shiftJISNUL[sizeof(shiftJIS)]=0;
2780 * The to/from algorithmic convenience functions share a common implementation,
2781 * so we need not test all permutations of them.
2784 /* length in, not terminated out */
2785 errorCode=U_ZERO_ERROR;
2786 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2787 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2788 length!=sizeof(shiftJIS) ||
2789 memcmp(target, shiftJIS, length)!=0
2791 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2792 u_errorName(errorCode), length, sizeof(shiftJIS));
2795 /* terminated in and out */
2796 memset(target, 0x55, sizeof(target));
2797 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2798 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2799 if( errorCode!=U_ZERO_ERROR ||
2800 length!=sizeof(utf8) ||
2801 memcmp(target, utf8, length)!=0
2803 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2804 u_errorName(errorCode), length, sizeof(shiftJIS));
2807 /* empty string, some target buffer */
2808 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2809 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2810 if( errorCode!=U_ZERO_ERROR ||
2813 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2814 u_errorName(errorCode), length);
2817 /* pseudo-empty string, no target buffer */
2818 errorCode=U_ZERO_ERROR;
2819 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2820 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2823 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2824 u_errorName(errorCode), length);
2827 errorCode=U_ZERO_ERROR;
2828 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
2829 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2832 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2833 u_errorName(errorCode), length);
2837 errorCode=U_MESSAGE_PARSE_ERROR;
2838 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2839 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2840 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2844 errorCode=U_ZERO_ERROR;
2845 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
2846 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2847 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
2850 /* illegal alg. type */
2851 errorCode=U_ZERO_ERROR;
2852 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
2853 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2854 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
2860 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2861 static void TestLMBCSMaxChar(void) {
2862 static const struct {
2866 /* some non-LMBCS converters - perfect test setup here */
2877 { 4, "IMAP-mailbox-name"},
2880 { 1, "windows-1256"},
2892 { 3, "ISO-2022-KR"},
2893 { 6, "ISO-2022-JP"},
2894 { 8, "ISO-2022-CN"},
2912 for (idx = 0; idx < LENGTHOF(converter); idx++) {
2913 UErrorCode status = U_ZERO_ERROR;
2914 UConverter *cnv = cnv_open(converter[idx].name, &status);
2915 if (U_FAILURE(status)) {
2918 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
2919 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2920 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
2925 /* mostly test that the macro compiles */
2926 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2927 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2932 static void TestJ1968(void) {
2933 UErrorCode err = U_ZERO_ERROR;
2935 char myConvName[] = "My really really really really really really really really really really really"
2936 " really really really really really really really really really really really"
2937 " really really really really really really really really long converter name";
2938 UChar myConvNameU[sizeof(myConvName)];
2940 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
2943 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
2944 cnv = ucnv_openU(myConvNameU, &err);
2945 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2946 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2950 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2951 cnv = ucnv_openU(myConvNameU, &err);
2952 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2953 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2957 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
2958 cnv = ucnv_openU(myConvNameU, &err);
2959 if (cnv || err != U_FILE_ACCESS_ERROR) {
2960 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2967 cnv = ucnv_open(myConvName, &err);
2968 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2969 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2973 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
2974 cnv = ucnv_open(myConvName, &err);
2975 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2976 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2980 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2981 cnv = ucnv_open(myConvName, &err);
2982 if (cnv || err != U_FILE_ACCESS_ERROR) {
2983 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2987 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2988 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
2989 cnv = ucnv_open(myConvName, &err);
2990 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2991 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2994 /* The comma isn't really a part of the converter name. */
2996 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2997 cnv = ucnv_open(myConvName, &err);
2998 if (cnv || err != U_FILE_ACCESS_ERROR) {
2999 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3003 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3004 cnv = ucnv_open(myConvName, &err);
3005 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3006 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3010 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3011 cnv = ucnv_open(myConvName, &err);
3012 if (cnv || err != U_FILE_ACCESS_ERROR) {
3013 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3018 #if !UCONFIG_NO_LEGACY_CONVERSION
3020 testSwap(const char *name, UBool swap) {
3022 * Test Unicode text.
3023 * Contains characters that are the highest for some of the
3024 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3025 * tables copies the entire tables.
3027 static const UChar text[]={
3028 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3031 UChar uNormal[32], uSwapped[32];
3032 char normal[32], swapped[32];
3036 int32_t i, normalLength, swappedLength;
3040 const char *swappedName;
3041 UConverter *cnv, *swapCnv;
3042 UErrorCode errorCode;
3044 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3046 /* open both the normal and the LF/NL-swapping converters */
3047 strcpy(swapped, name);
3048 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3050 errorCode=U_ZERO_ERROR;
3051 swapCnv=ucnv_open(swapped, &errorCode);
3052 cnv=ucnv_open(name, &errorCode);
3053 if(U_FAILURE(errorCode)) {
3054 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3058 /* the name must contain the swap option if and only if we expect the converter to swap */
3059 swappedName=ucnv_getName(swapCnv, &errorCode);
3060 if(U_FAILURE(errorCode)) {
3061 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3065 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3066 if(swap != (pc!=NULL)) {
3067 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3071 /* convert to EBCDIC */
3074 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3075 normalLength=(int32_t)(pc-normal);
3079 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3080 swappedLength=(int32_t)(pc-swapped);
3082 if(U_FAILURE(errorCode)) {
3083 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3087 /* compare EBCDIC output */
3088 if(normalLength!=swappedLength) {
3089 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3092 for(i=0; i<normalLength; ++i) {
3093 /* swap EBCDIC LF/NL for comparison */
3098 } else if(c==0x25) {
3104 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3109 /* convert back to Unicode (may not roundtrip) */
3112 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
3113 normalLength=(int32_t)(pu-uNormal);
3117 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
3118 swappedLength=(int32_t)(pu-uSwapped);
3120 if(U_FAILURE(errorCode)) {
3121 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3125 /* compare EBCDIC output */
3126 if(normalLength!=swappedLength) {
3127 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3130 for(i=0; i<normalLength; ++i) {
3131 /* swap EBCDIC LF/NL for comparison */
3136 } else if(u==0x85) {
3141 if(u!=uSwapped[i]) {
3142 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3150 ucnv_close(swapCnv);
3154 TestEBCDICSwapLFNL() {
3155 static const struct {
3160 { "ibm-1047", TRUE },
3161 { "ibm-1140", TRUE },
3162 { "ibm-930", TRUE },
3163 { "iso-8859-3", FALSE }
3168 for(i=0; i<LENGTHOF(tests); ++i) {
3169 testSwap(tests[i].name, tests[i].swap);
3174 TestEBCDICSwapLFNL() {
3175 /* test nothing... */
3179 static const UVersionInfo ICU_34 = {3,4,0,0};
3181 static void TestFromUCountPending(){
3182 #if !UCONFIG_NO_LEGACY_CONVERSION
3183 UErrorCode status = U_ZERO_ERROR;
3184 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3185 static const struct {
3189 }fromUnicodeTests[] = {
3192 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3193 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3196 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3197 if(U_FAILURE(status)){
3198 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3201 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) {
3204 char* targetLimit = target + 10;
3205 const UChar* source = fromUnicodeTests[i].input;
3206 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3209 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3210 len = ucnv_fromUCountPending(cnv, &status);
3211 if(U_FAILURE(status)){
3212 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3213 status = U_ZERO_ERROR;
3216 if(len != fromUnicodeTests[i].exp){
3217 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3220 status = U_ZERO_ERROR;
3223 * The converter has to read the tail before it knows that
3224 * only head alone matches.
3225 * At the end, the output for head will overflow the target,
3226 * middle will be pending, and tail will not have been consumed.
3229 \U00101234 -> x (<U101234> \x07 |0)
3230 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3231 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3232 \U00060007 -> unassigned
3234 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3235 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3236 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3239 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3240 const UChar* source = head;
3241 const UChar* sourceLimit = source + u_strlen(head);
3244 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3245 len = ucnv_fromUCountPending(cnv, &status);
3246 if(U_FAILURE(status)){
3247 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3248 status = U_ZERO_ERROR;
3251 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3254 sourceLimit = source + u_strlen(middle);
3255 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3256 len = ucnv_fromUCountPending(cnv, &status);
3257 if(U_FAILURE(status)){
3258 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3259 status = U_ZERO_ERROR;
3262 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3265 sourceLimit = source + u_strlen(tail);
3266 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3267 if(status != U_BUFFER_OVERFLOW_ERROR){
3268 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3270 status = U_ZERO_ERROR;
3271 len = ucnv_fromUCountPending(cnv, &status);
3272 /* middle[1] is pending, tail has not been consumed */
3273 if(U_FAILURE(status)){
3274 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3277 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3285 TestToUCountPending(){
3286 #if !UCONFIG_NO_LEGACY_CONVERSION
3287 UErrorCode status = U_ZERO_ERROR;
3288 static const struct {
3292 }toUnicodeTests[] = {
3294 {{0x05, 0x01, 0x02},3,3},
3296 {{0x07, 0x00, 0x01, 0x02},4,4},
3300 UConverterToUCallback *oldToUAction= NULL;
3301 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3302 if(U_FAILURE(status)){
3303 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3306 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3307 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) {
3309 UChar* target = tgt;
3310 UChar* targetLimit = target + 20;
3311 const char* source = toUnicodeTests[i].input;
3312 const char* sourceLimit = source + toUnicodeTests[i].len;
3315 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3316 len = ucnv_toUCountPending(cnv,&status);
3317 if(U_FAILURE(status)){
3318 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3319 status = U_ZERO_ERROR;
3322 if(len != toUnicodeTests[i].exp){
3323 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3326 status = U_ZERO_ERROR;
3331 * The converter has to read the tail before it knows that
3332 * only head alone matches.
3333 * At the end, the output for head will overflow the target,
3334 * mid will be pending, and tail will not have been consumed.
3336 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3337 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3338 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3340 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3341 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3342 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3343 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3346 UChar* target = tgt;
3347 UChar* targetLimit = target + 1; /* expect overflow from converting */
3348 const char* source = head;
3349 const char* sourceLimit = source + strlen(head);
3351 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3352 if(U_FAILURE(status)){
3353 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3356 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3357 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3358 len = ucnv_toUCountPending(cnv,&status);
3359 if(U_FAILURE(status)){
3360 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3363 log_err("Did not get the expected len for head.\n");
3366 sourceLimit = source+strlen(mid);
3367 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3368 len = ucnv_toUCountPending(cnv,&status);
3369 if(U_FAILURE(status)){
3370 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3373 log_err("Did not get the expected len for mid.\n");
3377 sourceLimit = source+strlen(tail);
3378 targetLimit = target;
3379 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3380 if(status != U_BUFFER_OVERFLOW_ERROR){
3381 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3383 status = U_ZERO_ERROR;
3384 len = ucnv_toUCountPending(cnv,&status);
3385 /* mid[4] is pending, tail has not been consumed */
3386 if(U_FAILURE(status)){
3387 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3390 log_err("Did not get the expected len for tail.\n");
3397 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3398 UErrorCode status = U_ZERO_ERROR;
3400 ucnv_setDefaultName(name);
3401 if(strcmp(ucnv_getDefaultName(), expected)==0)
3402 log_verbose("setDefaultName of %s works.\n", name);
3404 log_err("setDefaultName of %s failed\n", name);
3405 cnv=ucnv_open(NULL, &status);
3406 if (U_FAILURE(status) || cnv == NULL) {
3407 log_err("opening the default converter of %s failed\n", name);
3410 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3411 log_verbose("ucnv_getName of %s works.\n", name);
3413 log_err("ucnv_getName of %s failed\n", name);
3417 static void TestDefaultName(void) {
3418 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3419 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3420 strcpy(defaultName, ucnv_getDefaultName());
3422 log_verbose("getDefaultName returned %s\n", defaultName);
3424 /*change the default name by setting it */
3425 TestOneDefaultNameChange("UTF-8", "UTF-8");
3426 #if U_CHARSET_IS_UTF8
3427 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3428 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3429 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3431 # if !UCONFIG_NO_LEGACY_CONVERSION
3432 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3433 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3435 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3438 /*set the default name back*/
3439 ucnv_setDefaultName(defaultName);
3442 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3456 compareNames(const char **names) {
3457 const char *relation, *name1, *name2;
3461 if(*relation=='=') {
3463 } else if(*relation=='<') {
3473 while((name2=*names++)!=NULL) {
3474 result=ucnv_compareNames(name1, name2);
3475 if(sign(result)!=rel) {
3476 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3483 TestCompareNames() {
3484 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3485 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3486 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3487 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3489 compareNames(equalUTF8);
3490 compareNames(equalIBM);
3491 compareNames(lessMac);
3492 compareNames(lessUTF080);
3497 static const UChar surrogate[1]={ 0xd900 };
3500 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3501 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3503 UErrorCode errorCode;
3507 /* UTF-16/32: test that the BOM is output before the sub character */
3508 errorCode=U_ZERO_ERROR;
3509 cnv=ucnv_open("UTF-16", &errorCode);
3510 if(U_FAILURE(errorCode)) {
3511 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3514 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3516 if(U_FAILURE(errorCode) ||
3518 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3520 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3523 errorCode=U_ZERO_ERROR;
3524 cnv=ucnv_open("UTF-32", &errorCode);
3525 if(U_FAILURE(errorCode)) {
3526 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3529 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3531 if(U_FAILURE(errorCode) ||
3533 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3535 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3538 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3539 errorCode=U_ZERO_ERROR;
3540 cnv=ucnv_open("ISO-8859-1", &errorCode);
3541 if(U_FAILURE(errorCode)) {
3542 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3545 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3546 if(U_FAILURE(errorCode)) {
3547 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3549 len8 = sizeof(buffer);
3550 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3551 /* Stateless converter, we expect the string converted to charset bytes. */
3552 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3553 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3558 #if !UCONFIG_NO_LEGACY_CONVERSION
3559 errorCode=U_ZERO_ERROR;
3560 cnv=ucnv_open("HZ", &errorCode);
3561 if(U_FAILURE(errorCode)) {
3562 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3565 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3566 if(U_FAILURE(errorCode)) {
3567 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3569 len8 = sizeof(buffer);
3570 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3571 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3572 if(U_FAILURE(errorCode) || len8!=0) {
3573 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3579 * Further testing of ucnv_setSubstString() is done via intltest convert.
3580 * We do not test edge cases of illegal arguments and similar because the
3581 * function implementation uses all of its parameters in calls to other
3582 * functions with UErrorCode parameters.
3587 InvalidArguments() {
3589 UErrorCode errorCode;
3590 char charBuffer[2] = {1, 1};
3591 char ucharAsCharBuffer[2] = {2, 2};
3592 char *charsPtr = charBuffer;
3593 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3594 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3596 errorCode=U_ZERO_ERROR;
3597 cnv=ucnv_open("UTF-8", &errorCode);
3598 if(U_FAILURE(errorCode)) {
3599 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3603 errorCode=U_ZERO_ERROR;
3604 /* This one should fail because an incomplete UChar is being passed in */
3605 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
3606 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3607 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3610 errorCode=U_ZERO_ERROR;
3611 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3612 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
3613 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3614 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3617 errorCode=U_ZERO_ERROR;
3618 /* This one should fail because an incomplete UChar is being passed in */
3619 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3620 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3621 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3624 errorCode=U_ZERO_ERROR;
3625 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3626 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3627 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3628 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3631 if (charBuffer[0] != 1 || charBuffer[1] != 1
3632 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3634 log_err("Data was incorrectly written to buffers\n");
3640 static void TestGetName() {
3641 static const char *const names[] = {
3642 "Unicode", "UTF-16",
3643 "UnicodeBigUnmarked", "UTF-16BE",
3644 "UnicodeBig", "UTF-16BE,version=1",
3645 "UnicodeLittleUnmarked", "UTF-16LE",
3646 "UnicodeLittle", "UTF-16LE,version=1",
3647 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3650 for(i = 0; i < LENGTHOF(names); i += 2) {
3651 UErrorCode errorCode = U_ZERO_ERROR;
3652 UConverter *cnv = ucnv_open(names[i], &errorCode);
3653 if(U_SUCCESS(errorCode)) {
3654 const char *name = ucnv_getName(cnv, &errorCode);
3655 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3656 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3657 names[i], name, names[i+1], u_errorName(errorCode));
3664 static void TestUTFBOM() {
3665 static const UChar a16[] = { 0x61 };
3666 static const char *const names[] = {
3674 static const uint8_t expected[][5] = {
3676 { 4, 0xfe, 0xff, 0, 0x61 },
3677 { 4, 0xfe, 0xff, 0, 0x61 },
3679 { 4, 0xff, 0xfe, 0x61, 0 },
3680 { 4, 0xff, 0xfe, 0x61, 0 },
3684 { 4, 0xfe, 0xff, 0, 0x61 },
3687 { 4, 0xff, 0xfe, 0x61, 0 }
3693 for(i = 0; i < LENGTHOF(names); ++i) {
3694 UErrorCode errorCode = U_ZERO_ERROR;
3695 UConverter *cnv = ucnv_open(names[i], &errorCode);
3697 const uint8_t *exp = expected[i];
3698 if (U_FAILURE(errorCode)) {
3699 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3702 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3704 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3705 log_err("unexpected %s BOM writing behavior -- %s\n",
3706 names[i], u_errorName(errorCode));