1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*****************************************************************************
12 * Modification History:
14 * Madhu Katragadda Ported for C API
15 ******************************************************************************
21 #include "unicode/uloc.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/ucnv_err.h"
24 #include "unicode/putil.h"
25 #include "unicode/uset.h"
26 #include "unicode/ustring.h"
27 #include "ucnv_bld.h" /* for sizeof(UConverter) */
28 #include "cmemory.h" /* for UAlignedMemory */
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
37 /*returns an action other than the one provided*/
38 #if !UCONFIG_NO_LEGACY_CONVERSION
39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
44 cnv_open(const char *name, UErrorCode *pErrorCode) {
45 if(name!=NULL && name[0]=='*') {
46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
48 return ucnv_open(name, pErrorCode);
53 static void ListNames(void);
54 static void TestFlushCache(void);
55 static void TestDuplicateAlias(void);
56 static void TestCCSID(void);
57 static void TestJ932(void);
58 static void TestJ1968(void);
59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestLMBCSMaxChar(void);
63 #if !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestConvertSafeCloneCallback(void);
67 static void TestEBCDICSwapLFNL(void);
68 static void TestConvertEx(void);
69 static void TestConvertExFromUTF8(void);
70 static void TestConvertExFromUTF8_C5F0(void);
71 static void TestConvertAlgorithmic(void);
72 void TestDefaultConverterError(void); /* defined in cctest.c */
73 void TestDefaultConverterSet(void); /* defined in cctest.c */
74 static void TestToUCountPending(void);
75 static void TestFromUCountPending(void);
76 static void TestDefaultName(void);
77 static void TestCompareNames(void);
78 static void TestSubstString(void);
79 static void InvalidArguments(void);
80 static void TestGetName(void);
81 static void TestUTFBOM(void);
83 void addTestConvert(TestNode** root);
85 void addTestConvert(TestNode** root)
87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
93 #if !UCONFIG_NO_LEGACY_CONVERSION
94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
121 static void ListNames(void) {
122 UErrorCode err = U_ZERO_ERROR;
123 int32_t testLong1 = 0;
124 const char* available_conv;
125 UEnumeration *allNamesEnum = NULL;
126 int32_t allNamesCount = 0;
129 log_verbose("Testing ucnv_openAllNames()...");
130 allNamesEnum = ucnv_openAllNames(&err);
132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
135 const char *string = NULL;
139 allNamesCount = uenum_count(allNamesEnum, &err);
140 while ((string = uenum_next(allNamesEnum, &len, &err))) {
142 log_verbose("read \"%s\", length %i\n", string, len);
144 if (U_FAILURE(err)) {
145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
148 uenum_reset(allNamesEnum, &err);
149 while ((string = uenum_next(allNamesEnum, &len, &err))) {
151 ucnv_close(ucnv_open(string, &err));
152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
155 if (count1 != count2) {
156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
159 uenum_close(allNamesEnum);
162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
164 log_verbose("Testing ucnv_countAvailable()...");
166 testLong1=ucnv_countAvailable();
167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
171 available_conv = ucnv_getAvailableName(testLong1);
172 /*test ucnv_getAvailableName with err condition*/
173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
174 available_conv = ucnv_getAvailableName(-1);
175 if(available_conv != NULL){
176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
179 /* Test ucnv_countAliases() etc. */
180 count = ucnv_countAliases("utf-8", &err);
182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
183 } else if(count <= 0) {
184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
186 /* try to get the aliases individually */
188 alias = ucnv_getAlias("utf-8", 0, &err);
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
191 } else if(strcmp("UTF-8", alias) != 0) {
192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
195 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
196 alias = ucnv_getAlias("utf-8", aliasNum, &err);
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
199 } else if(strlen(alias) > 20) {
201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
207 /* try to fill an array with all aliases */
208 const char **aliases;
209 aliases=(const char **)malloc(count * sizeof(const char *));
211 ucnv_getAliases("utf-8", aliases, &err);
213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
215 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
216 /* compare the pointers with the ones returned individually */
217 alias = ucnv_getAlias("utf-8", aliasNum, &err);
219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
220 } else if(aliases[aliasNum] != alias) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
225 free((char **)aliases);
233 static void TestConvert()
235 #if !UCONFIG_NO_LEGACY_CONVERSION
238 int32_t testLong1 = 0;
242 FILE* ucs_file_in = NULL;
244 UChar myUChar = 0x0000;
245 char* mytarget; /* [MAX_FILE_LEN] */
248 UChar* consumedUni = NULL;
249 char* consumed = NULL;
250 char* output_cp_buffer; /* [MAX_FILE_LEN] */
251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
252 UChar* ucs_file_buffer_use;
253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
254 UChar* my_ucs_file_buffer_1;
256 uint16_t codepage_index = 0;
258 UErrorCode err = U_ZERO_ERROR;
259 char ucs_file_name[UCS_FILE_NAME_SIZE];
260 UConverterFromUCallback MIA1, MIA1_2;
261 UConverterToUCallback MIA2, MIA2_2;
262 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
263 UConverter* someConverters[5];
264 UConverter* myConverter = 0;
265 UChar* displayname = 0;
272 int32_t targetcapacity2;
273 int32_t targetcapacity;
277 const UChar* tmp_ucs_buf;
278 const UChar* tmp_consumedUni=NULL;
279 const char* tmp_mytarget_use;
280 const char* tmp_consumed;
282 /******************************************************************
283 Checking Unicode -> ksc
284 ******************************************************************/
286 const char* CodePagesToTest[NUM_CODEPAGE] =
292 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
298 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
304 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
310 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
315 const char* CodePagesTestFiles[NUM_CODEPAGE] =
321 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
327 const char* CodePagesLocale[NUM_CODEPAGE] =
332 UConverterFromUCallback oldFromUAction = NULL;
333 UConverterToUCallback oldToUAction = NULL;
334 const void* oldFromUContext = NULL;
335 const void* oldToUContext = NULL;
337 /* Allocate memory */
338 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
339 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
340 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
341 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
343 ucs_file_buffer_use = ucs_file_buffer;
345 mytarget_use = mytarget;
346 my_ucs_file_buffer_1=my_ucs_file_buffer;
348 /* flush the converter cache to get a consistent state before the flushing is tested */
351 /*Testing ucnv_openU()*/
353 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
354 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
355 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
356 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
357 UChar illegalName[100];
358 UConverter *converter=NULL;
360 converter=ucnv_openU(converterName, &err);
362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
364 ucnv_close(converter);
366 converter=ucnv_openU(NULL, &err);
368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
370 ucnv_close(converter);
371 /*testing with error value*/
372 err=U_ILLEGAL_ARGUMENT_ERROR;
373 converter=ucnv_openU(converterName, &err);
374 if(!(converter == NULL)){
375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
377 ucnv_close(converter);
379 u_uastrcpy(illegalName, "");
380 u_uastrcpy(illegalName, illegalNameChars);
381 ucnv_openU(illegalName, &err);
382 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
387 ucnv_openU(firstSortedName, &err);
388 if(err!=U_FILE_ACCESS_ERROR){
389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
393 ucnv_openU(lastSortedName, &err);
394 if(err!=U_FILE_ACCESS_ERROR){
395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
402 UConverter *cnv=NULL;
404 cnv=ucnv_open("ibm-949,Madhu", &err);
406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
411 /*Testing ucnv_convert()*/
413 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
414 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
415 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
417 sourceLimit=UPRV_LENGTHOF(source);
421 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
422 if(err == U_BUFFER_OVERFLOW_ERROR){
424 targetLimit=targetCapacity+1;
425 target=(char*)malloc(sizeof(char) * targetLimit);
426 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
432 for(i=0; i<targetCapacity; i++){
433 if(target[i] != expectedTarget[i]){
434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
438 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
439 if(U_FAILURE(err) || i!=7){
440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
441 u_errorName(err), i);
444 /*Test error conditions*/
446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
451 err=U_ILLEGAL_ARGUMENT_ERROR;
452 sourceLimit=UPRV_LENGTHOF(source);
453 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
459 sourceLimit=UPRV_LENGTHOF(source);
461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
462 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
472 err=U_ILLEGAL_ARGUMENT_ERROR;
473 if(ucnv_open(NULL, &err) != NULL){
474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
476 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
482 log_verbose("\n---Testing ucnv_open default...\n");
483 someConverters[0] = ucnv_open(NULL,&err);
484 someConverters[1] = ucnv_open(NULL,&err);
485 someConverters[2] = ucnv_open("utf8", &err);
486 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
488 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
490 /* Testing ucnv_getName()*/
491 /*default code page */
492 ucnv_getName(someConverters[0], &err);
494 log_data_err("getName[0] failed\n");
496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
498 ucnv_getName(someConverters[1], &err);
500 log_data_err("getName[1] failed\n");
502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
505 ucnv_close(someConverters[0]);
506 ucnv_close(someConverters[1]);
507 ucnv_close(someConverters[2]);
508 ucnv_close(someConverters[3]);
511 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
517 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
519 strcpy(ucs_file_name, loadTestData(&err));
522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
527 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
529 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
534 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
536 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
538 ucs_file_in = fopen(ucs_file_name,"rb");
541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
548 /* ucnv_flushCache(); */
549 myConverter =ucnv_open( "ibm-949", &err);
550 if (!myConverter || U_FAILURE(err))
552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
557 /*testing for ucnv_getName() */
558 log_verbose("Testing ucnv_getName()...\n");
559 ucnv_getName(myConverter, &err);
561 log_err("Error in getName\n");
564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
566 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
567 log_err("getName failed\n");
569 log_verbose("getName ok\n");
570 /*Test getName with error condition*/
573 err=U_ILLEGAL_ARGUMENT_ERROR;
574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
575 name=ucnv_getName(myConverter, &err);
577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
585 log_verbose("Testing ucnv_getMaxCharSize()...\n");
586 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
587 log_verbose("Max byte per character OK\n");
589 log_err("Max byte per character failed\n");
591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
592 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
593 log_verbose("Min byte per character OK\n");
595 log_err("Min byte per character failed\n");
598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
599 log_verbose("\n---Testing ucnv_getSubstChars...\n");
601 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
607 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
608 if (rest==CodePagesSubstitutionChars[codepage_index])
609 log_verbose("Substitution character ok\n");
611 log_err("Substitution character failed.\n");
613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
614 ucnv_setSubstChars(myConverter, myptr, ii, &err);
617 log_err("FAILURE! %s\n", myErrorName(err));
619 ucnv_getSubstChars(myConverter,save, &ii, &err);
622 log_err("FAILURE! %s\n", myErrorName(err));
625 if (strncmp(save, myptr, ii))
626 log_err("Saved substitution character failed\n");
628 log_verbose("Saved substitution character ok\n");
630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
633 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
634 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
639 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
641 ucnv_setSubstChars(myConverter, myptr, 0, &err);
642 if(err != U_ILLEGAL_ARGUMENT_ERROR){
643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
646 strcpy(myptr, "abc");
647 ucnv_setSubstChars(myConverter, myptr, ii, &err);
649 ucnv_getSubstChars(myConverter, save, &ii, &err);
650 if(strncmp(save, myptr, ii) == 0){
651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
655 strcpy(myptr, "abc");
656 ucnv_setSubstChars(myConverter, myptr, ii, &err);
657 err=U_ILLEGAL_ARGUMENT_ERROR;
658 ucnv_getSubstChars(myConverter, save, &ii, &err);
659 if(strncmp(save, myptr, ii) == 0){
660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
665 #ifdef U_ENABLE_GENERIC_ISO_2022
666 /*resetState ucnv_reset()*/
667 log_verbose("\n---Testing ucnv_reset()..\n");
668 ucnv_reset(myConverter);
671 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
673 UConverter *cnv=ucnv_open("ISO_2022", &err);
675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
677 c=ucnv_getNextUChar(cnv, &source, limit, &err);
678 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
688 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
689 locale=CodePagesLocale[codepage_index];
692 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
693 if(err==U_BUFFER_OVERFLOW_ERROR) {
695 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
696 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
701 log_verbose(" getDisplayName o.k.\n");
707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
709 /*test ucnv_getDiaplayName with error condition*/
710 err= U_ILLEGAL_ARGUMENT_ERROR;
711 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
715 /*test ucnv_getDiaplayName with error condition*/
717 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
718 if( len !=0 || U_SUCCESS(err)){
719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
724 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
727 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
728 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
730 log_err("FAILURE! %s\n", myErrorName(err));
733 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
734 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
735 log_err("get From UCallBack failed\n");
737 log_verbose("get From UCallBack ok\n");
739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
740 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
741 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
743 log_err("FAILURE! %s\n", myErrorName(err));
746 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
747 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
748 log_err("get From UCallBack action failed\n");
750 log_verbose("get From UCallBack action ok\n");
752 /*testing ucnv_setToUCallBack with error conditions*/
753 err=U_ILLEGAL_ARGUMENT_ERROR;
754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
755 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
756 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
757 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
764 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
766 log_verbose("\n---Testing setTo UCallBack...\n");
767 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
768 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
770 log_err("FAILURE! %s\n", myErrorName(err));
773 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
774 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
775 log_err("To UCallBack failed\n");
777 log_verbose("To UCallBack ok\n");
779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
780 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
781 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
782 { log_err("FAILURE! %s\n", myErrorName(err)); }
784 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
785 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
786 log_err("To UCallBack failed\n");
788 log_verbose("To UCallBack ok\n");
790 /*testing ucnv_setToUCallBack with error conditions*/
791 err=U_ILLEGAL_ARGUMENT_ERROR;
792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
793 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
794 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
795 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
801 /*getcodepageid testing ucnv_getCCSID() */
802 log_verbose("\n----Testing getCCSID....\n");
803 cp = ucnv_getCCSID(myConverter,&err);
806 log_err("FAILURE!..... %s\n", myErrorName(err));
808 if (cp != CodePageNumberToTest[codepage_index])
809 log_err("Codepage number test failed\n");
811 log_verbose("Codepage number test OK\n");
813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
814 err=U_ILLEGAL_ARGUMENT_ERROR;
815 if( ucnv_getCCSID(myConverter,&err) != -1){
816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
820 /*getCodepagePlatform testing ucnv_getPlatform()*/
821 log_verbose("\n---Testing getCodepagePlatform ..\n");
822 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
823 log_err("Platform codepage test failed\n");
825 log_verbose("Platform codepage test ok\n");
829 log_err("FAILURE! %s\n", myErrorName(err));
831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
832 err= U_ILLEGAL_ARGUMENT_ERROR;
833 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
841 // Note: gcc produces a compile warning if the return value from fread() is ignored.
842 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in);
845 if (BOM!=0xFEFF && BOM!=0xFFFE)
847 log_err("File Missing BOM...Bailing!\n");
853 /*Reads in the file*/
854 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
856 myUChar = ucs_file_buffer[i-1];
858 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
861 myUChar = ucs_file_buffer[i-1];
862 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
865 /*testing ucnv_fromUChars() and ucnv_toUChars() */
866 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
868 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
869 u_uastrcpy(uchar1,"");
870 u_strncpy(uchar1,ucs_file_buffer,i);
873 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
874 u_uastrcpy(uchar3,"");
875 u_strncpy(uchar3,ucs_file_buffer,i);
878 /*Calls the Conversion Routine */
879 testLong1 = MAX_FILE_LEN;
880 log_verbose("\n---Testing ucnv_fromUChars()\n");
881 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
884 log_err("\nFAILURE...%s\n", myErrorName(err));
887 log_verbose(" ucnv_fromUChars() o.k.\n");
889 /*test the conversion routine */
890 log_verbose("\n---Testing ucnv_toUChars()\n");
891 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
893 targetsize = ucnv_toUChars(myConverter,
897 strlen(output_cp_buffer),
899 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
901 if(err==U_BUFFER_OVERFLOW_ERROR)
904 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
905 targetsize = ucnv_toUChars(myConverter,
909 strlen(output_cp_buffer),
913 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
915 log_verbose(" ucnv_toUChars() o.k.\n");
917 if(u_strcmp(uchar1,uchar2)!=0)
918 log_err("equality test failed with conversion routine\n");
922 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
924 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
925 err=U_ILLEGAL_ARGUMENT_ERROR;
926 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
927 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
928 if (targetcapacity !=0) {
929 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
932 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
933 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
934 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
935 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
938 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
939 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
940 if (targetcapacity !=0) {
941 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
943 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
944 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
945 if (err != U_BUFFER_OVERFLOW_ERROR) {
946 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
948 /*toUChars with error conditions*/
949 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err);
951 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
954 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err);
955 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
956 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
959 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
960 if (targetsize !=0) {
961 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
964 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err);
965 if (err != U_STRING_NOT_TERMINATED_WARNING) {
966 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
973 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
974 /*Clean up re-usable vars*/
975 log_verbose("Testing ucnv_fromUnicode().....\n");
976 tmp_ucs_buf=ucs_file_buffer_use;
977 ucnv_fromUnicode(myConverter, &mytarget_1,
978 mytarget + MAX_FILE_LEN,
980 ucs_file_buffer_use+i,
984 consumedUni = (UChar*)tmp_consumedUni;
985 (void)consumedUni; /* Suppress set but not used warning. */
989 log_err("FAILURE! %s\n", myErrorName(err));
992 log_verbose("ucnv_fromUnicode() o.k.\n");
994 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
995 log_verbose("Testing ucnv_toUnicode().....\n");
996 tmp_mytarget_use=mytarget_use;
997 tmp_consumed = consumed;
998 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
999 my_ucs_file_buffer + MAX_FILE_LEN,
1001 mytarget_use + (mytarget_1 - mytarget),
1005 consumed = (char*)tmp_consumed;
1008 log_err("FAILURE! %s\n", myErrorName(err));
1011 log_verbose("ucnv_toUnicode() o.k.\n");
1014 log_verbose("\n---Testing RoundTrip ...\n");
1017 u_strncpy(uchar3, my_ucs_file_buffer,i);
1020 if(u_strcmp(uchar1,uchar3)==0)
1021 log_verbose("Equality test o.k.\n");
1023 log_err("Equality test failed\n");
1028 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1032 if(u_strcmp(uchar2, uchar3)==0)
1033 log_verbose("Equality test o.k.\n");
1035 log_err("Equality test failed\n");
1038 fclose(ucs_file_in);
1039 ucnv_close(myConverter);
1040 if (uchar1 != 0) free(uchar1);
1041 if (uchar2 != 0) free(uchar2);
1042 if (uchar3 != 0) free(uchar3);
1045 free((void*)mytarget);
1046 free((void*)output_cp_buffer);
1047 free((void*)ucs_file_buffer);
1048 free((void*)my_ucs_file_buffer);
1052 #if !UCONFIG_NO_LEGACY_CONVERSION
1053 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1055 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1058 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1060 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1064 static void TestFlushCache(void) {
1065 #if !UCONFIG_NO_LEGACY_CONVERSION
1066 UErrorCode err = U_ZERO_ERROR;
1067 UConverter* someConverters[5];
1070 /* flush the converter cache to get a consistent state before the flushing is tested */
1073 /*Testing ucnv_open()*/
1074 /* Note: These converters have been chosen because they do NOT
1075 encode the Latin characters (U+0041, ...), and therefore are
1076 highly unlikely to be chosen as system default codepages */
1078 someConverters[0] = ucnv_open("ibm-1047", &err);
1079 if (U_FAILURE(err)) {
1080 log_data_err("FAILURE! %s\n", myErrorName(err));
1083 someConverters[1] = ucnv_open("ibm-1047", &err);
1084 if (U_FAILURE(err)) {
1085 log_data_err("FAILURE! %s\n", myErrorName(err));
1088 someConverters[2] = ucnv_open("ibm-1047", &err);
1089 if (U_FAILURE(err)) {
1090 log_data_err("FAILURE! %s\n", myErrorName(err));
1093 someConverters[3] = ucnv_open("gb18030", &err);
1094 if (U_FAILURE(err)) {
1095 log_data_err("FAILURE! %s\n", myErrorName(err));
1098 someConverters[4] = ucnv_open("ibm-954", &err);
1099 if (U_FAILURE(err)) {
1100 log_data_err("FAILURE! %s\n", myErrorName(err));
1104 /* Testing ucnv_flushCache() */
1105 log_verbose("\n---Testing ucnv_flushCache...\n");
1106 if ((flushCount=ucnv_flushCache())==0)
1107 log_verbose("Flush cache ok\n");
1109 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1111 /*testing ucnv_close() and ucnv_flushCache() */
1112 ucnv_close(someConverters[0]);
1113 ucnv_close(someConverters[1]);
1115 if ((flushCount=ucnv_flushCache())==0)
1116 log_verbose("Flush cache ok\n");
1118 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1120 ucnv_close(someConverters[2]);
1121 ucnv_close(someConverters[3]);
1123 if ((flushCount=ucnv_flushCache())==2)
1124 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1126 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1130 ucnv_close(someConverters[4]);
1131 if ( (flushCount=ucnv_flushCache())==1)
1132 log_verbose("Flush cache ok\n");
1134 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1139 * Test the converter alias API, specifically the fuzzy matching of
1140 * alias names and the alias table integrity. Make sure each
1141 * converter has at least one alias (itself), and that its listed
1142 * aliases map back to itself. Check some hard-coded UTF-8 and
1143 * ISO_2022 aliases to make sure they work.
1145 static void TestAlias() {
1147 UErrorCode status = U_ZERO_ERROR;
1149 /* Predetermined aliases that we expect to map back to ISO_2022
1150 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1151 const char* ISO_2022_NAMES[] =
1152 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1153 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1154 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES);
1155 const char *UTF8_NAMES[] =
1156 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1157 "utf_8", "ibm1208", "cp1208" };
1158 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES);
1163 } CONVERTERS_NAMES[] = {
1164 { "UTF-32BE", "UTF32_BigEndian" },
1165 { "UTF-32LE", "UTF32_LittleEndian" },
1166 { "UTF-32", "ISO-10646-UCS-4" },
1167 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1168 { "UTF-32", "ucs-4" }
1170 int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES);
1172 /* When there are bugs in gencnval or in ucnv_io, converters can
1173 appear to have no aliases. */
1174 ncnv = ucnv_countAvailable();
1175 log_verbose("%d converters\n", ncnv);
1176 for (i=0; i<ncnv; ++i) {
1177 const char *name = ucnv_getAvailableName(i);
1179 uint16_t na = ucnv_countAliases(name, &status);
1184 log_err("FAIL: Converter \"%s\" (i=%d)"
1185 " has no aliases; expect at least one\n",
1189 cnv = ucnv_open(name, &status);
1190 if (U_FAILURE(status)) {
1191 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1192 " can't be opened.\n",
1196 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1197 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1198 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1199 "They should be the same\n",
1200 name, ucnv_getName(cnv, &status));
1205 status = U_ZERO_ERROR;
1206 alias0 = ucnv_getAlias(name, 0, &status);
1207 for (j=1; j<na; ++j) {
1209 /* Make sure each alias maps back to the the same list of
1210 aliases. Assume that if alias 0 is the same, the whole
1211 list is the same (this should always be true). */
1212 const char *mapBack;
1214 status = U_ZERO_ERROR;
1215 alias = ucnv_getAlias(name, j, &status);
1216 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1217 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1220 if (alias == NULL) {
1221 log_err("FAIL: Converter \"%s\" -> "
1227 mapBack = ucnv_getAlias(alias, 0, &status);
1229 if (mapBack == NULL) {
1230 log_err("FAIL: Converter \"%s\" -> "
1231 "alias[%d]=\"%s\" -> "
1232 "alias[0]=NULL, exp. \"%s\"\n",
1233 name, j, alias, alias0);
1237 if (0 != strcmp(alias0, mapBack)) {
1239 UBool foundAlias = FALSE;
1240 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1241 /* Make sure that we only get this mismapping when there is
1242 an ambiguous alias, and the other converter has this alias too. */
1243 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1244 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1250 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1253 log_err("FAIL: Converter \"%s\" -> "
1254 "alias[%d]=\"%s\" -> "
1255 "alias[0]=\"%s\", exp. \"%s\"\n",
1256 name, j, alias, mapBack, alias0);
1263 /* Check a list of predetermined aliases that we expect to map
1264 * back to ISO_2022 and UTF-8. */
1265 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1266 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1268 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1271 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1272 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1273 ISO_2022_NAMES[i], mapBack);
1278 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1279 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1281 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1284 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1285 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1286 UTF8_NAMES[i], mapBack);
1291 * Check a list of predetermined aliases that we expect to map
1292 * back to predermined converter names.
1295 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1296 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1298 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1301 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1302 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1303 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1309 static void TestDuplicateAlias(void) {
1311 UErrorCode status = U_ZERO_ERROR;
1313 status = U_ZERO_ERROR;
1314 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1315 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1316 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1318 status = U_ZERO_ERROR;
1319 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1320 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1321 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1323 status = U_ZERO_ERROR;
1324 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1325 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1326 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1331 /* Test safe clone callback */
1333 static uint32_t TSCC_nextSerial()
1335 static uint32_t n = 1;
1342 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1343 uint32_t serial; /* minted from nextSerial, above */
1344 UBool wasClosed; /* close happened on the object */
1347 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1349 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1351 newCtx->serial = TSCC_nextSerial();
1352 newCtx->wasClosed = 0;
1353 newCtx->magic = 0xC0FFEE;
1355 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1360 #if !UCONFIG_NO_LEGACY_CONVERSION
1361 static void TSCC_fromU(const void *context,
1362 UConverterFromUnicodeArgs *fromUArgs,
1363 const UChar* codeUnits,
1366 UConverterCallbackReason reason,
1369 TSCCContext *ctx = (TSCCContext*)context;
1370 UConverterFromUCallback junkFrom;
1372 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1374 if(ctx->magic != 0xC0FFEE) {
1375 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1379 if(reason == UCNV_CLONE) {
1380 UErrorCode subErr = U_ZERO_ERROR;
1381 TSCCContext *newCtx;
1382 TSCCContext *junkCtx;
1383 TSCCContext **pjunkCtx = &junkCtx;
1386 log_verbose("TSCC_fromU: cloning..\n");
1387 newCtx = TSCC_clone(ctx);
1389 if(newCtx == NULL) {
1390 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1394 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1395 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1397 if(U_FAILURE(subErr)) {
1402 if(reason == UCNV_CLOSE) {
1403 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1404 ctx->wasClosed = TRUE;
1408 static void TSCC_toU(const void *context,
1409 UConverterToUnicodeArgs *toUArgs,
1410 const char* codeUnits,
1412 UConverterCallbackReason reason,
1415 TSCCContext *ctx = (TSCCContext*)context;
1416 UConverterToUCallback junkFrom;
1418 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1420 if(ctx->magic != 0xC0FFEE) {
1421 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1425 if(reason == UCNV_CLONE) {
1426 UErrorCode subErr = U_ZERO_ERROR;
1427 TSCCContext *newCtx;
1428 TSCCContext *junkCtx;
1429 TSCCContext **pjunkCtx = &junkCtx;
1432 log_verbose("TSCC_toU: cloning..\n");
1433 newCtx = TSCC_clone(ctx);
1435 if(newCtx == NULL) {
1436 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1440 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1441 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1443 if(U_FAILURE(subErr)) {
1448 if(reason == UCNV_CLOSE) {
1449 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1450 ctx->wasClosed = TRUE;
1454 static void TSCC_init(TSCCContext *q)
1456 q->magic = 0xC0FFEE;
1457 q->serial = TSCC_nextSerial();
1461 static void TSCC_print_log(TSCCContext *q, const char *name)
1464 log_verbose("TSCContext: %s is NULL!!\n", name);
1466 if(q->magic != 0xC0FFEE) {
1467 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1468 q,q->serial, q->magic);
1470 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1471 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1475 static void TestConvertSafeCloneCallback()
1477 UErrorCode err = U_ZERO_ERROR;
1478 TSCCContext from1, to1;
1479 TSCCContext *from2, *from3, *to2, *to3;
1480 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1482 int32_t hunkSize = 8192;
1483 UConverterFromUCallback junkFrom;
1484 UConverterToUCallback junkTo;
1485 UConverter *conv1, *conv2 = NULL;
1487 conv1 = ucnv_open("iso-8859-3", &err);
1489 if(U_FAILURE(err)) {
1490 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1494 log_verbose("Opened conv1=%p\n", conv1);
1499 TSCC_print_log(&from1, "from1");
1500 TSCC_print_log(&to1, "to1");
1502 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1503 log_verbose("Set from1 on conv1\n");
1504 TSCC_print_log(&from1, "from1");
1506 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1507 log_verbose("Set to1 on conv1\n");
1508 TSCC_print_log(&to1, "to1");
1510 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1511 if(U_FAILURE(err)) {
1512 log_err("safeClone failed: %s\n", u_errorName(err));
1515 log_verbose("Cloned to conv2=%p.\n", conv2);
1517 /********** from *********************/
1518 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1519 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1521 TSCC_print_log(from2, "from2");
1522 TSCC_print_log(from3, "from3(==from1)");
1525 log_err("FAIL! from2 is null \n");
1530 log_err("FAIL! from3 is null \n");
1534 if(from3 != (&from1) ) {
1535 log_err("FAIL! conv1's FROM context changed!\n");
1538 if(from2 == (&from1) ) {
1539 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1542 if(from1.wasClosed) {
1543 log_err("FAIL! from1 is closed \n");
1546 if(from2->wasClosed) {
1547 log_err("FAIL! from2 was closed\n");
1550 /********** to *********************/
1551 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1552 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1554 TSCC_print_log(to2, "to2");
1555 TSCC_print_log(to3, "to3(==to1)");
1558 log_err("FAIL! to2 is null \n");
1563 log_err("FAIL! to3 is null \n");
1567 if(to3 != (&to1) ) {
1568 log_err("FAIL! conv1's TO context changed!\n");
1571 if(to2 == (&to1) ) {
1572 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1576 log_err("FAIL! to1 is closed \n");
1579 if(to2->wasClosed) {
1580 log_err("FAIL! to2 was closed\n");
1583 /*************************************/
1586 log_verbose("ucnv_closed (conv1)\n");
1587 TSCC_print_log(&from1, "from1");
1588 TSCC_print_log(from2, "from2");
1589 TSCC_print_log(&to1, "to1");
1590 TSCC_print_log(to2, "to2");
1592 if(from1.wasClosed == FALSE) {
1593 log_err("FAIL! from1 is NOT closed \n");
1596 if(from2->wasClosed) {
1597 log_err("FAIL! from2 was closed\n");
1600 if(to1.wasClosed == FALSE) {
1601 log_err("FAIL! to1 is NOT closed \n");
1604 if(to2->wasClosed) {
1605 log_err("FAIL! to2 was closed\n");
1609 log_verbose("ucnv_closed (conv2)\n");
1611 TSCC_print_log(&from1, "from1");
1612 TSCC_print_log(from2, "from2");
1614 if(from1.wasClosed == FALSE) {
1615 log_err("FAIL! from1 is NOT closed \n");
1618 if(from2->wasClosed == FALSE) {
1619 log_err("FAIL! from2 was NOT closed\n");
1622 TSCC_print_log(&to1, "to1");
1623 TSCC_print_log(to2, "to2");
1625 if(to1.wasClosed == FALSE) {
1626 log_err("FAIL! to1 is NOT closed \n");
1629 if(to2->wasClosed == FALSE) {
1630 log_err("FAIL! to2 was NOT closed\n");
1634 free(to2); /* to1 is stack based */
1636 if(from2 != (&from1)) {
1637 free(from2); /* from1 is stack based */
1643 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1654 static void TestConvertSafeClone()
1656 /* one 'regular' & all the 'private stateful' converters */
1657 static const char *const names[] = {
1658 #if !UCONFIG_NO_LEGACY_CONVERSION
1660 "ISO_2022,locale=zh,version=1",
1663 #if !UCONFIG_NO_LEGACY_CONVERSION
1667 "ISO_2022,locale=kr,version=1",
1668 "ISO_2022,locale=jp,version=2",
1672 #if !UCONFIG_NO_LEGACY_CONVERSION
1673 "IMAP-mailbox-name",
1680 /* store the actual sizes of each converter */
1681 int32_t actualSizes[UPRV_LENGTHOF(names)];
1683 static const int32_t bufferSizes[] = {
1684 U_CNV_SAFECLONE_BUFFERSIZE,
1685 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1686 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1689 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1690 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1691 int32_t bufferSize, maxBufferSize;
1692 const char *maxName;
1693 UConverter * cnv, *cnv2;
1697 const char *pConstCharBuffer;
1698 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1699 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1700 UChar uniCharBuffer[20];
1701 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1702 const char *pCharSource = charSourceBuffer;
1703 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1704 UChar *pUCharTarget = uniCharBuffer;
1705 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1706 const UChar * pUniBuffer;
1707 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1711 cnv = ucnv_open(names[0], &err);
1712 if(U_SUCCESS(err)) {
1713 /* Check the various error & informational states: */
1715 /* Null status - just returns NULL */
1716 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1717 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
1719 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1721 /* error status - should return 0 & keep error the same */
1722 err = U_MEMORY_ALLOCATION_ERROR;
1723 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1725 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1729 /* Null buffer size pointer is ok */
1730 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
1732 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1737 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1739 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1741 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1743 /* Verify our define is large enough */
1744 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1746 log_err("FAIL: Pre-calculated buffer size is too small\n");
1748 /* Verify we can use this run-time calculated size */
1749 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1751 log_err("FAIL: Converter can't be cloned with run-time size\n");
1757 /* size one byte too small - should allocate & let us know */
1759 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1761 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1768 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1770 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1771 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1773 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1781 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1782 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1784 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1793 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1795 for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) {
1796 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1799 cnv = ucnv_open(names[idx], &err);
1800 if(U_FAILURE(err)) {
1801 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1806 /* preflight to get maxBufferSize */
1807 actualSizes[idx] = 0;
1808 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1809 if(actualSizes[idx] > maxBufferSize) {
1810 maxBufferSize = actualSizes[idx];
1811 maxName = names[idx];
1815 memset(buffer, 0xaa, sizeof(buffer));
1817 bufferSize = bufferSizes[j];
1818 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1820 /* close the original immediately to make sure that the clone works by itself */
1823 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
1824 err == U_SAFECLONE_ALLOCATED_WARNING
1826 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1829 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1830 if(bufferSize <= bufferSizes[j]) {
1831 /* used the stack buffer */
1832 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1833 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1835 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1836 names[idx], bufferSize, bufferSizes[j]);
1839 /* heap-allocated the clone */
1840 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1841 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1842 names[idx], bufferSize, bufferSizes[j]);
1846 pCharBuffer = charBuffer;
1847 pUniBuffer = uniBuffer;
1849 ucnv_fromUnicode(cnv2,
1858 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1860 ucnv_toUnicode(cnv2,
1871 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1874 pConstCharBuffer = charBuffer;
1875 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1877 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1883 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1884 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1885 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1886 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1887 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1891 static void TestCCSID() {
1892 #if !UCONFIG_NO_LEGACY_CONVERSION
1894 UErrorCode errorCode;
1895 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1898 for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) {
1901 errorCode=U_ZERO_ERROR;
1902 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
1903 if(U_FAILURE(errorCode)) {
1904 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
1908 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
1909 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
1912 /* skip gb18030(ccsid 1392) */
1913 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
1914 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
1922 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1924 /* CHUNK_SIZE defined in common\ucnv.c: */
1925 #define CHUNK_SIZE 1024
1927 static void bug1(void);
1928 static void bug2(void);
1929 static void bug3(void);
1934 bug1(); /* Unicode intermediate buffer straddle bug */
1935 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1936 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1940 * jitterbug 932: test chunking boundary conditions in
1942 int32_t ucnv_convert(const char *toConverterName,
1943 const char *fromConverterName,
1950 * See discussions on the icu mailing list in
1951 * 2001-April with the subject "converter 'flush' question".
1953 * Bug report and test code provided by Edward J. Batutis.
1957 #if !UCONFIG_NO_LEGACY_CONVERSION
1958 char char_in[CHUNK_SIZE+32];
1959 char char_out[CHUNK_SIZE*2];
1961 /* GB 18030 equivalent of U+10000 is 90308130 */
1962 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
1964 UErrorCode err = U_ZERO_ERROR;
1965 int32_t i, test_seq_len = sizeof(test_seq);
1968 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1969 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1970 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1971 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1972 * other Unicode sequences cause a bug since combining sequences are not supported by the
1976 for (i = test_seq_len; i >= 0; i--) {
1977 /* put character sequence into input buffer */
1978 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
1979 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
1981 /* do the conversion */
1982 ucnv_convert("us-ascii", /* out */
1991 if (err == U_TRUNCATED_CHAR_FOUND) {
1992 /* this happens when surrogate pair straddles the intermediate buffer in
1993 * T_UConverter_fromCodepageToCodepage */
1994 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2000 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2003 /* US-ASCII "1234567890" */
2004 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2005 #if !UCONFIG_ONLY_HTML_CONVERSION
2006 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2007 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2008 0x00, 0x00, 0x00, 0x31,
2009 0x00, 0x00, 0x00, 0x32,
2010 0x00, 0x00, 0x00, 0x33,
2011 0x00, 0x00, 0x00, 0x34,
2012 0x00, 0x00, 0x00, 0x35,
2013 0x00, 0x00, 0x00, 0x36,
2014 0x00, 0x00, 0x00, 0x37,
2015 0x00, 0x00, 0x00, 0x38,
2016 0x00, 0x00, (char)0xf0, 0x00};
2019 static char target[5];
2021 UErrorCode err = U_ZERO_ERROR;
2024 /* do the conversion */
2025 size = ucnv_convert("iso-8859-1", /* out */
2026 "us-ascii", /* in */
2034 /* bug2: size is 5, should be 10 */
2035 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2038 #if !UCONFIG_ONLY_HTML_CONVERSION
2040 /* do the conversion */
2041 size = ucnv_convert("UTF-32BE", /* out */
2050 /* bug2: size is 5, should be 32 */
2051 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2055 /* do the conversion */
2056 size = ucnv_convert("UTF-8", /* out */
2057 "UTF-32BE", /* in */
2061 sizeof(sourceUTF32),
2065 /* bug2: size is 5, should be 12 */
2066 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2072 * bug3: when the characters expand going from source to target codepage
2073 * you get bug3 in addition to bug2
2077 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
2078 char char_in[CHUNK_SIZE*4];
2080 UErrorCode err = U_ZERO_ERROR;
2084 * first get the buggy size from bug2 then
2085 * compare it to buggy size with an expansion
2087 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2089 /* do the conversion */
2090 size = ucnv_convert("lmbcs", /* out */
2091 "us-ascii", /* in */
2098 if ( size != sizeof(char_in) ) {
2100 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2101 * in the converter?), should be CHUNK_SIZE*4
2103 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2105 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2109 * now do the conversion with expansion
2110 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2112 memset(char_in, 8, sizeof(char_in));
2115 /* do the conversion */
2116 size = ucnv_convert("lmbcs", /* out */
2117 "us-ascii", /* in */
2124 /* expect 2X expansion */
2125 if ( size != sizeof(char_in) * 2 ) {
2128 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2130 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2136 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2137 const char *src, int32_t srcLength,
2138 const char *expectTarget, int32_t expectTargetLength,
2140 const char *testName,
2141 UErrorCode expectCode) {
2142 UChar pivotBuffer[CHUNK_SIZE];
2143 UChar *pivotSource, *pivotTarget;
2144 const UChar *pivotLimit;
2146 char targetBuffer[CHUNK_SIZE];
2148 const char *srcLimit, *finalSrcLimit, *targetLimit;
2150 int32_t targetLength;
2154 UErrorCode errorCode;
2157 if(chunkSize>CHUNK_SIZE) {
2158 chunkSize=CHUNK_SIZE;
2161 pivotSource=pivotTarget=pivotBuffer;
2162 pivotLimit=pivotBuffer+chunkSize;
2164 finalSrcLimit=src+srcLength;
2165 target=targetBuffer;
2166 targetLimit=targetBuffer+chunkSize;
2168 ucnv_resetToUnicode(srcCnv);
2169 ucnv_resetFromUnicode(targetCnv);
2171 errorCode=U_ZERO_ERROR;
2174 /* convert, streaming-style (both converters and pivot keep state) */
2176 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2177 if(src+chunkSize<=finalSrcLimit) {
2178 srcLimit=src+chunkSize;
2180 srcLimit=finalSrcLimit;
2182 ucnv_convertEx(targetCnv, srcCnv,
2183 &target, targetLimit,
2185 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2186 FALSE, flush, &errorCode);
2187 targetLength=(int32_t)(target-targetBuffer);
2188 if(target>targetLimit) {
2189 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2190 testName, chunkSize, target, targetLimit);
2191 break; /* TODO: major problem! */
2193 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2194 /* continue converting another chunk */
2195 errorCode=U_ZERO_ERROR;
2196 if(targetLength+chunkSize<=sizeof(targetBuffer)) {
2197 targetLimit=target+chunkSize;
2199 targetLimit=targetBuffer+sizeof(targetBuffer);
2201 } else if(U_FAILURE(errorCode)) {
2207 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2208 /* all consumed, now flush without input (separate from conversion for testing) */
2213 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2214 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2215 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2216 } else if(targetLength!=expectTargetLength) {
2217 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2218 testName, chunkSize, targetLength, expectTargetLength);
2219 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2220 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2221 testName, chunkSize);
2226 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2227 const char *src, int32_t srcLength,
2228 const char *expectTarget, int32_t expectTargetLength,
2229 const char *testName,
2230 UErrorCode expectCode) {
2231 convertExStreaming(srcCnv, targetCnv,
2233 expectTarget, expectTargetLength,
2234 1, testName, expectCode);
2235 convertExStreaming(srcCnv, targetCnv,
2237 expectTarget, expectTargetLength,
2238 3, testName, expectCode);
2239 convertExStreaming(srcCnv, targetCnv,
2241 expectTarget, expectTargetLength,
2242 7, testName, expectCode);
2245 static void TestConvertEx() {
2246 #if !UCONFIG_NO_LEGACY_CONVERSION
2247 static const uint8_t
2249 /* 4e00 30a1 ff61 0410 */
2250 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2253 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2257 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2258 * SUB, SUB, 0x40, SUB, SUB, 0x40
2260 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2263 char srcBuffer[100], targetBuffer[100];
2268 UChar pivotBuffer[100];
2269 UChar *pivotSource, *pivotTarget;
2271 UConverter *cnv1, *cnv2;
2272 UErrorCode errorCode;
2274 errorCode=U_ZERO_ERROR;
2275 cnv1=ucnv_open("UTF-8", &errorCode);
2276 if(U_FAILURE(errorCode)) {
2277 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2281 cnv2=ucnv_open("Shift-JIS", &errorCode);
2282 if(U_FAILURE(errorCode)) {
2283 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2288 /* test ucnv_convertEx() with streaming conversion style */
2289 convertExMultiStreaming(cnv1, cnv2,
2290 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2291 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2293 convertExMultiStreaming(cnv2, cnv1,
2294 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2295 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2297 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2298 convertExMultiStreaming(cnv1, cnv2,
2299 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2300 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2302 /* test some simple conversions */
2304 /* NUL-terminated source and target */
2305 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2306 memcpy(srcBuffer, utf8, sizeof(utf8));
2307 srcBuffer[sizeof(utf8)]=0;
2309 target=targetBuffer;
2310 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2311 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2312 if( errorCode!=U_ZERO_ERROR ||
2313 target-targetBuffer!=sizeof(shiftJIS) ||
2315 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2317 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2318 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2321 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2322 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2323 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2325 target=targetBuffer;
2326 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2327 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2328 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2329 target-targetBuffer!=sizeof(shiftJIS) ||
2330 *target!=(char)0xff ||
2331 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2333 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2334 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2338 errorCode=U_MESSAGE_PARSE_ERROR;
2340 target=targetBuffer;
2341 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2342 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2343 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2344 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2347 /* pivotLimit==pivotStart */
2348 errorCode=U_ZERO_ERROR;
2349 pivotSource=pivotTarget=pivotBuffer;
2350 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2351 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode);
2352 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2353 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2356 /* *pivotSource==NULL */
2357 errorCode=U_ZERO_ERROR;
2359 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2360 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2361 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2362 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2366 errorCode=U_ZERO_ERROR;
2368 pivotSource=pivotBuffer;
2369 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2370 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2371 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2372 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2375 /* streaming conversion without a pivot buffer */
2376 errorCode=U_ZERO_ERROR;
2378 pivotSource=pivotBuffer;
2379 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2380 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode);
2381 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2382 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2390 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2391 static const char *const badUTF8[]={
2395 /* truncated multi-byte sequences */
2432 "\xfc\x80\x80\x80\x80",
2434 /* complete sequences but non-shortest forms or out of range etc. */
2440 "\xf8\x80\x80\x80\x80",
2441 "\xfc\x80\x80\x80\x80\x80",
2446 #define ARG_CHAR_ARR_SIZE 8
2448 /* get some character that can be converted and convert it */
2449 static UBool getTestChar(UConverter *cnv, const char *converterName,
2450 char charUTF8[4], int32_t *pCharUTF8Length,
2451 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2452 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2453 UChar utf16[U16_MAX_LENGTH];
2454 int32_t utf16Length;
2456 const UChar *utf16Source;
2461 UErrorCode errorCode;
2463 errorCode=U_ZERO_ERROR;
2464 set=uset_open(1, 0);
2465 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2466 c=uset_charAt(set, uset_size(set)/2);
2470 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2472 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2476 ucnv_fromUnicode(cnv,
2477 &target, char0+ARG_CHAR_ARR_SIZE,
2478 &utf16Source, utf16+utf16Length,
2479 NULL, FALSE, &errorCode);
2480 *pChar0Length=(int32_t)(target-char0);
2484 ucnv_fromUnicode(cnv,
2485 &target, char1+ARG_CHAR_ARR_SIZE,
2486 &utf16Source, utf16+utf16Length,
2487 NULL, FALSE, &errorCode);
2488 *pChar1Length=(int32_t)(target-char1);
2490 if(U_FAILURE(errorCode)) {
2491 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2497 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2498 char charUTF8[4], int32_t charUTF8Length,
2499 char char0[8], int32_t char0Length,
2500 char char1[8], int32_t char1Length) {
2505 int32_t outputLength;
2507 char invalidChars[8];
2508 int8_t invalidLength;
2513 UChar pivotBuffer[8];
2514 UChar *pivotSource, *pivotTarget;
2516 UErrorCode errorCode;
2519 /* test truncated sequences */
2520 errorCode=U_ZERO_ERROR;
2521 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2523 memcpy(utf8, charUTF8, charUTF8Length);
2525 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2526 /* truncated sequence? */
2527 int32_t length=strlen(badUTF8[i]);
2528 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) {
2532 /* assemble a string with the test character and the truncated sequence */
2533 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2534 utf8Length=charUTF8Length+length;
2536 /* convert and check the invalidChars */
2539 pivotSource=pivotTarget=pivotBuffer;
2540 errorCode=U_ZERO_ERROR;
2541 ucnv_convertEx(cnv, utf8Cnv,
2542 &target, output+sizeof(output),
2543 &source, utf8+utf8Length,
2544 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2545 TRUE, TRUE, /* reset & flush */
2547 outputLength=(int32_t)(target-output);
2548 (void)outputLength; /* Suppress set but not used warning. */
2549 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2550 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2554 errorCode=U_ZERO_ERROR;
2555 invalidLength=(int8_t)sizeof(invalidChars);
2556 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2557 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2558 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2563 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2564 char charUTF8[4], int32_t charUTF8Length,
2565 char char0[8], int32_t char0Length,
2566 char char1[8], int32_t char1Length) {
2567 char utf8[600], expect[600];
2568 int32_t utf8Length, expectLength;
2572 UErrorCode errorCode;
2575 errorCode=U_ZERO_ERROR;
2576 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2579 * assemble an input string with the test character between each
2581 * and an expected string with repeated test character output
2583 memcpy(utf8, charUTF8, charUTF8Length);
2584 utf8Length=charUTF8Length;
2586 memcpy(expect, char0, char0Length);
2587 expectLength=char0Length;
2589 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2590 int32_t length=strlen(badUTF8[i]);
2591 memcpy(utf8+utf8Length, badUTF8[i], length);
2594 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2595 utf8Length+=charUTF8Length;
2597 memcpy(expect+expectLength, char1, char1Length);
2598 expectLength+=char1Length;
2601 /* expect that each bad UTF-8 sequence is detected and skipped */
2602 strcpy(testName, "from bad UTF-8 to ");
2603 strcat(testName, converterName);
2605 convertExMultiStreaming(utf8Cnv, cnv,
2607 expect, expectLength,
2612 /* Test illegal UTF-8 input. */
2613 static void TestConvertExFromUTF8() {
2614 static const char *const converterNames[]={
2615 #if !UCONFIG_NO_LEGACY_CONVERSION
2624 UConverter *utf8Cnv, *cnv;
2625 UErrorCode errorCode;
2628 /* fromUnicode versions of some character, from initial state and later */
2629 char charUTF8[4], char0[8], char1[8];
2630 int32_t charUTF8Length, char0Length, char1Length;
2632 errorCode=U_ZERO_ERROR;
2633 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2634 if(U_FAILURE(errorCode)) {
2635 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2639 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2640 errorCode=U_ZERO_ERROR;
2641 cnv=ucnv_open(converterNames[i], &errorCode);
2642 if(U_FAILURE(errorCode)) {
2643 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2646 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2649 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2650 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2653 ucnv_close(utf8Cnv);
2656 static void TestConvertExFromUTF8_C5F0() {
2657 static const char *const converterNames[]={
2658 #if !UCONFIG_NO_LEGACY_CONVERSION
2667 UConverter *utf8Cnv, *cnv;
2668 UErrorCode errorCode;
2671 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2672 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2673 static const char twoNCRs[16]={
2674 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2675 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2677 static const char twoFFFD[6]={
2678 (char)0xef, (char)0xbf, (char)0xbd,
2679 (char)0xef, (char)0xbf, (char)0xbd
2681 const char *expected;
2682 int32_t expectedLength;
2683 char dest[20]; /* longer than longest expectedLength */
2688 UChar pivotBuffer[128];
2689 UChar *pivotSource, *pivotTarget;
2691 errorCode=U_ZERO_ERROR;
2692 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2693 if(U_FAILURE(errorCode)) {
2694 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2698 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2699 errorCode=U_ZERO_ERROR;
2700 cnv=ucnv_open(converterNames[i], &errorCode);
2701 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2702 NULL, NULL, &errorCode);
2703 if(U_FAILURE(errorCode)) {
2704 log_data_err("unable to open %s converter - %s\n",
2705 converterNames[i], u_errorName(errorCode));
2710 uprv_memset(dest, 9, sizeof(dest));
2711 if(i==UPRV_LENGTHOF(converterNames)-1) {
2712 /* conversion to UTF-8 yields two U+FFFD directly */
2716 /* conversion to a non-Unicode charset yields two NCRs */
2723 pivotSource=pivotTarget=pivotBuffer;
2726 &target, dest+expectedLength,
2727 &src, bad_utf8+sizeof(bad_utf8),
2728 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2729 TRUE, TRUE, &errorCode);
2730 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2731 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2732 dest[expectedLength]!=9
2734 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2738 ucnv_close(utf8Cnv);
2742 TestConvertAlgorithmic() {
2743 #if !UCONFIG_NO_LEGACY_CONVERSION
2744 static const uint8_t
2746 /* 4e00 30a1 ff61 0410 */
2747 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2750 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2754 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2755 * SUB, SUB, 0x40, SUB, SUB, 0x40
2757 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2760 0xfe, 0xff /* BOM only, no text */
2762 #if !UCONFIG_ONLY_HTML_CONVERSION
2763 static const uint8_t utf32[]={
2764 0xff, 0xfe, 0, 0 /* BOM only, no text */
2768 char target[100], utf8NUL[100], shiftJISNUL[100];
2771 UErrorCode errorCode;
2775 errorCode=U_ZERO_ERROR;
2776 cnv=ucnv_open("Shift-JIS", &errorCode);
2777 if(U_FAILURE(errorCode)) {
2778 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2783 memcpy(utf8NUL, utf8, sizeof(utf8));
2784 utf8NUL[sizeof(utf8)]=0;
2785 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2786 shiftJISNUL[sizeof(shiftJIS)]=0;
2789 * The to/from algorithmic convenience functions share a common implementation,
2790 * so we need not test all permutations of them.
2793 /* length in, not terminated out */
2794 errorCode=U_ZERO_ERROR;
2795 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2796 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2797 length!=sizeof(shiftJIS) ||
2798 memcmp(target, shiftJIS, length)!=0
2800 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2801 u_errorName(errorCode), length, sizeof(shiftJIS));
2804 /* terminated in and out */
2805 memset(target, 0x55, sizeof(target));
2806 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2807 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2808 if( errorCode!=U_ZERO_ERROR ||
2809 length!=sizeof(utf8) ||
2810 memcmp(target, utf8, length)!=0
2812 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2813 u_errorName(errorCode), length, sizeof(shiftJIS));
2816 /* empty string, some target buffer */
2817 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2818 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2819 if( errorCode!=U_ZERO_ERROR ||
2822 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2823 u_errorName(errorCode), length);
2826 /* pseudo-empty string, no target buffer */
2827 errorCode=U_ZERO_ERROR;
2828 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2829 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2832 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2833 u_errorName(errorCode), length);
2836 #if !UCONFIG_ONLY_HTML_CONVERSION
2837 errorCode=U_ZERO_ERROR;
2838 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
2839 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2842 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2843 u_errorName(errorCode), length);
2848 errorCode=U_MESSAGE_PARSE_ERROR;
2849 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2850 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2851 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2855 errorCode=U_ZERO_ERROR;
2856 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
2857 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2858 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
2861 /* illegal alg. type */
2862 errorCode=U_ZERO_ERROR;
2863 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
2864 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2865 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
2871 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2872 static void TestLMBCSMaxChar(void) {
2873 static const struct {
2877 /* some non-LMBCS converters - perfect test setup here */
2888 { 4, "IMAP-mailbox-name"},
2891 { 1, "windows-1256"},
2903 { 8, "ISO-2022-KR"},
2904 { 6, "ISO-2022-JP"},
2905 { 8, "ISO-2022-CN"},
2923 for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) {
2924 UErrorCode status = U_ZERO_ERROR;
2925 UConverter *cnv = cnv_open(converter[idx].name, &status);
2926 if (U_FAILURE(status)) {
2929 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
2930 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2931 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
2936 /* mostly test that the macro compiles */
2937 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2938 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2943 static void TestJ1968(void) {
2944 UErrorCode err = U_ZERO_ERROR;
2946 char myConvName[] = "My really really really really really really really really really really really"
2947 " really really really really really really really really really really really"
2948 " really really really really really really really really long converter name";
2949 UChar myConvNameU[sizeof(myConvName)];
2951 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
2954 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
2955 cnv = ucnv_openU(myConvNameU, &err);
2956 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2957 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2961 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2962 cnv = ucnv_openU(myConvNameU, &err);
2963 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2964 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2968 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
2969 cnv = ucnv_openU(myConvNameU, &err);
2970 if (cnv || err != U_FILE_ACCESS_ERROR) {
2971 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2978 cnv = ucnv_open(myConvName, &err);
2979 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2980 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2984 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
2985 cnv = ucnv_open(myConvName, &err);
2986 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2987 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2991 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2992 cnv = ucnv_open(myConvName, &err);
2993 if (cnv || err != U_FILE_ACCESS_ERROR) {
2994 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2998 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2999 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
3000 cnv = ucnv_open(myConvName, &err);
3001 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3002 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3005 /* The comma isn't really a part of the converter name. */
3007 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3008 cnv = ucnv_open(myConvName, &err);
3009 if (cnv || err != U_FILE_ACCESS_ERROR) {
3010 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3014 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3015 cnv = ucnv_open(myConvName, &err);
3016 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3017 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3021 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3022 cnv = ucnv_open(myConvName, &err);
3023 if (cnv || err != U_FILE_ACCESS_ERROR) {
3024 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3029 #if !UCONFIG_NO_LEGACY_CONVERSION
3031 testSwap(const char *name, UBool swap) {
3033 * Test Unicode text.
3034 * Contains characters that are the highest for some of the
3035 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3036 * tables copies the entire tables.
3038 static const UChar text[]={
3039 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3042 UChar uNormal[32], uSwapped[32];
3043 char normal[32], swapped[32];
3047 int32_t i, normalLength, swappedLength;
3051 const char *swappedName;
3052 UConverter *cnv, *swapCnv;
3053 UErrorCode errorCode;
3055 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3057 /* open both the normal and the LF/NL-swapping converters */
3058 strcpy(swapped, name);
3059 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3061 errorCode=U_ZERO_ERROR;
3062 swapCnv=ucnv_open(swapped, &errorCode);
3063 cnv=ucnv_open(name, &errorCode);
3064 if(U_FAILURE(errorCode)) {
3065 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3069 /* the name must contain the swap option if and only if we expect the converter to swap */
3070 swappedName=ucnv_getName(swapCnv, &errorCode);
3071 if(U_FAILURE(errorCode)) {
3072 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3076 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3077 if(swap != (pc!=NULL)) {
3078 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3082 /* convert to EBCDIC */
3085 ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode);
3086 normalLength=(int32_t)(pc-normal);
3090 ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode);
3091 swappedLength=(int32_t)(pc-swapped);
3093 if(U_FAILURE(errorCode)) {
3094 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3098 /* compare EBCDIC output */
3099 if(normalLength!=swappedLength) {
3100 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3103 for(i=0; i<normalLength; ++i) {
3104 /* swap EBCDIC LF/NL for comparison */
3109 } else if(c==0x25) {
3115 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3120 /* convert back to Unicode (may not roundtrip) */
3123 ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
3124 normalLength=(int32_t)(pu-uNormal);
3128 ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
3129 swappedLength=(int32_t)(pu-uSwapped);
3131 if(U_FAILURE(errorCode)) {
3132 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3136 /* compare EBCDIC output */
3137 if(normalLength!=swappedLength) {
3138 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3141 for(i=0; i<normalLength; ++i) {
3142 /* swap EBCDIC LF/NL for comparison */
3147 } else if(u==0x85) {
3152 if(u!=uSwapped[i]) {
3153 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3161 ucnv_close(swapCnv);
3165 TestEBCDICSwapLFNL() {
3166 static const struct {
3171 { "ibm-1047", TRUE },
3172 { "ibm-1140", TRUE },
3173 { "ibm-930", TRUE },
3174 { "iso-8859-3", FALSE }
3179 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
3180 testSwap(tests[i].name, tests[i].swap);
3185 TestEBCDICSwapLFNL() {
3186 /* test nothing... */
3190 static void TestFromUCountPending(){
3191 #if !UCONFIG_NO_LEGACY_CONVERSION
3192 UErrorCode status = U_ZERO_ERROR;
3193 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3194 static const struct {
3198 }fromUnicodeTests[] = {
3201 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3202 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3205 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3206 if(U_FAILURE(status)){
3207 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3210 for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) {
3213 char* targetLimit = target + 10;
3214 const UChar* source = fromUnicodeTests[i].input;
3215 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3218 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3219 len = ucnv_fromUCountPending(cnv, &status);
3220 if(U_FAILURE(status)){
3221 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3222 status = U_ZERO_ERROR;
3225 if(len != fromUnicodeTests[i].exp){
3226 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3229 status = U_ZERO_ERROR;
3232 * The converter has to read the tail before it knows that
3233 * only head alone matches.
3234 * At the end, the output for head will overflow the target,
3235 * middle will be pending, and tail will not have been consumed.
3238 \U00101234 -> x (<U101234> \x07 |0)
3239 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3240 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3241 \U00060007 -> unassigned
3243 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3244 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3245 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3248 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3249 const UChar* source = head;
3250 const UChar* sourceLimit = source + u_strlen(head);
3253 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3254 len = ucnv_fromUCountPending(cnv, &status);
3255 if(U_FAILURE(status)){
3256 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3257 status = U_ZERO_ERROR;
3260 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3263 sourceLimit = source + u_strlen(middle);
3264 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3265 len = ucnv_fromUCountPending(cnv, &status);
3266 if(U_FAILURE(status)){
3267 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3268 status = U_ZERO_ERROR;
3271 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3274 sourceLimit = source + u_strlen(tail);
3275 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3276 if(status != U_BUFFER_OVERFLOW_ERROR){
3277 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3279 status = U_ZERO_ERROR;
3280 len = ucnv_fromUCountPending(cnv, &status);
3281 /* middle[1] is pending, tail has not been consumed */
3282 if(U_FAILURE(status)){
3283 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3286 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3294 TestToUCountPending(){
3295 #if !UCONFIG_NO_LEGACY_CONVERSION
3296 UErrorCode status = U_ZERO_ERROR;
3297 static const struct {
3301 }toUnicodeTests[] = {
3303 {{0x05, 0x01, 0x02},3,3},
3305 {{0x07, 0x00, 0x01, 0x02},4,4},
3309 UConverterToUCallback *oldToUAction= NULL;
3310 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3311 if(U_FAILURE(status)){
3312 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3315 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3316 for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) {
3318 UChar* target = tgt;
3319 UChar* targetLimit = target + 20;
3320 const char* source = toUnicodeTests[i].input;
3321 const char* sourceLimit = source + toUnicodeTests[i].len;
3324 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3325 len = ucnv_toUCountPending(cnv,&status);
3326 if(U_FAILURE(status)){
3327 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3328 status = U_ZERO_ERROR;
3331 if(len != toUnicodeTests[i].exp){
3332 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3335 status = U_ZERO_ERROR;
3340 * The converter has to read the tail before it knows that
3341 * only head alone matches.
3342 * At the end, the output for head will overflow the target,
3343 * mid will be pending, and tail will not have been consumed.
3345 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3346 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3347 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3349 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3350 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3351 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3352 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3355 UChar* target = tgt;
3356 UChar* targetLimit = target + 1; /* expect overflow from converting */
3357 const char* source = head;
3358 const char* sourceLimit = source + strlen(head);
3360 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3361 if(U_FAILURE(status)){
3362 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3365 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3366 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3367 len = ucnv_toUCountPending(cnv,&status);
3368 if(U_FAILURE(status)){
3369 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3372 log_err("Did not get the expected len for head.\n");
3375 sourceLimit = source+strlen(mid);
3376 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3377 len = ucnv_toUCountPending(cnv,&status);
3378 if(U_FAILURE(status)){
3379 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3382 log_err("Did not get the expected len for mid.\n");
3386 sourceLimit = source+strlen(tail);
3387 targetLimit = target;
3388 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3389 if(status != U_BUFFER_OVERFLOW_ERROR){
3390 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3392 status = U_ZERO_ERROR;
3393 len = ucnv_toUCountPending(cnv,&status);
3394 /* mid[4] is pending, tail has not been consumed */
3395 if(U_FAILURE(status)){
3396 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3399 log_err("Did not get the expected len for tail.\n");
3406 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3407 UErrorCode status = U_ZERO_ERROR;
3409 ucnv_setDefaultName(name);
3410 if(strcmp(ucnv_getDefaultName(), expected)==0)
3411 log_verbose("setDefaultName of %s works.\n", name);
3413 log_err("setDefaultName of %s failed\n", name);
3414 cnv=ucnv_open(NULL, &status);
3415 if (U_FAILURE(status) || cnv == NULL) {
3416 log_err("opening the default converter of %s failed\n", name);
3419 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3420 log_verbose("ucnv_getName of %s works.\n", name);
3422 log_err("ucnv_getName of %s failed\n", name);
3426 static void TestDefaultName(void) {
3427 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3428 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3429 strcpy(defaultName, ucnv_getDefaultName());
3431 log_verbose("getDefaultName returned %s\n", defaultName);
3433 /*change the default name by setting it */
3434 TestOneDefaultNameChange("UTF-8", "UTF-8");
3435 #if U_CHARSET_IS_UTF8
3436 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3437 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3438 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3440 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
3441 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3442 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3444 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3447 /*set the default name back*/
3448 ucnv_setDefaultName(defaultName);
3451 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3465 compareNames(const char **names) {
3466 const char *relation, *name1, *name2;
3470 if(*relation=='=') {
3472 } else if(*relation=='<') {
3482 while((name2=*names++)!=NULL) {
3483 result=ucnv_compareNames(name1, name2);
3484 if(sign(result)!=rel) {
3485 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3492 TestCompareNames() {
3493 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3494 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3495 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3496 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3498 compareNames(equalUTF8);
3499 compareNames(equalIBM);
3500 compareNames(lessMac);
3501 compareNames(lessUTF080);
3506 static const UChar surrogate[1]={ 0xd900 };
3509 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3510 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3512 UErrorCode errorCode;
3516 /* UTF-16/32: test that the BOM is output before the sub character */
3517 errorCode=U_ZERO_ERROR;
3518 cnv=ucnv_open("UTF-16", &errorCode);
3519 if(U_FAILURE(errorCode)) {
3520 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3523 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3525 if(U_FAILURE(errorCode) ||
3527 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3529 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3532 errorCode=U_ZERO_ERROR;
3533 cnv=ucnv_open("UTF-32", &errorCode);
3534 if(U_FAILURE(errorCode)) {
3535 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3538 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3540 if(U_FAILURE(errorCode) ||
3542 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3544 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3547 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3548 errorCode=U_ZERO_ERROR;
3549 cnv=ucnv_open("ISO-8859-1", &errorCode);
3550 if(U_FAILURE(errorCode)) {
3551 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3554 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3555 if(U_FAILURE(errorCode)) {
3556 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3558 len8 = sizeof(buffer);
3559 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3560 /* Stateless converter, we expect the string converted to charset bytes. */
3561 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3562 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3567 #if !UCONFIG_NO_LEGACY_CONVERSION
3568 errorCode=U_ZERO_ERROR;
3569 cnv=ucnv_open("HZ", &errorCode);
3570 if(U_FAILURE(errorCode)) {
3571 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3574 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3575 if(U_FAILURE(errorCode)) {
3576 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3578 len8 = sizeof(buffer);
3579 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3580 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3581 if(U_FAILURE(errorCode) || len8!=0) {
3582 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3588 * Further testing of ucnv_setSubstString() is done via intltest convert.
3589 * We do not test edge cases of illegal arguments and similar because the
3590 * function implementation uses all of its parameters in calls to other
3591 * functions with UErrorCode parameters.
3596 InvalidArguments() {
3598 UErrorCode errorCode;
3599 char charBuffer[2] = {1, 1};
3600 char ucharAsCharBuffer[2] = {2, 2};
3601 char *charsPtr = charBuffer;
3602 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3603 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3605 errorCode=U_ZERO_ERROR;
3606 cnv=ucnv_open("UTF-8", &errorCode);
3607 if(U_FAILURE(errorCode)) {
3608 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3612 errorCode=U_ZERO_ERROR;
3613 /* This one should fail because an incomplete UChar is being passed in */
3614 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
3615 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3616 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3619 errorCode=U_ZERO_ERROR;
3620 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3621 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
3622 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3623 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3626 errorCode=U_ZERO_ERROR;
3627 /* This one should fail because an incomplete UChar is being passed in */
3628 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3629 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3630 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3633 errorCode=U_ZERO_ERROR;
3634 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3635 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3636 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3637 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3640 if (charBuffer[0] != 1 || charBuffer[1] != 1
3641 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3643 log_err("Data was incorrectly written to buffers\n");
3649 static void TestGetName() {
3650 static const char *const names[] = {
3651 "Unicode", "UTF-16",
3652 "UnicodeBigUnmarked", "UTF-16BE",
3653 "UnicodeBig", "UTF-16BE,version=1",
3654 "UnicodeLittleUnmarked", "UTF-16LE",
3655 "UnicodeLittle", "UTF-16LE,version=1",
3656 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3659 for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
3660 UErrorCode errorCode = U_ZERO_ERROR;
3661 UConverter *cnv = ucnv_open(names[i], &errorCode);
3662 if(U_SUCCESS(errorCode)) {
3663 const char *name = ucnv_getName(cnv, &errorCode);
3664 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3665 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3666 names[i], name, names[i+1], u_errorName(errorCode));
3673 static void TestUTFBOM() {
3674 static const UChar a16[] = { 0x61 };
3675 static const char *const names[] = {
3683 static const uint8_t expected[][5] = {
3685 { 4, 0xfe, 0xff, 0, 0x61 },
3686 { 4, 0xfe, 0xff, 0, 0x61 },
3688 { 4, 0xff, 0xfe, 0x61, 0 },
3689 { 4, 0xff, 0xfe, 0x61, 0 },
3693 { 4, 0xfe, 0xff, 0, 0x61 },
3696 { 4, 0xff, 0xfe, 0x61, 0 }
3702 for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
3703 UErrorCode errorCode = U_ZERO_ERROR;
3704 UConverter *cnv = ucnv_open(names[i], &errorCode);
3706 const uint8_t *exp = expected[i];
3707 if (U_FAILURE(errorCode)) {
3708 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3711 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3713 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3714 log_err("unexpected %s BOM writing behavior -- %s\n",
3715 names[i], u_errorName(errorCode));