1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: cstrcase.c
12 * tab size: 8 (not used)
15 * created on: 2002feb21
16 * created by: Markus W. Scherer
18 * Test file for string casing C API functions.
22 #include "unicode/utypes.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uloc.h"
26 #include "unicode/ubrk.h"
27 #include "unicode/ucasemap.h"
32 /* test string case mapping functions --------------------------------------- */
38 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
39 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
40 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff };
46 /* lowercase with root locale and separate buffers */
48 errorCode=U_ZERO_ERROR;
49 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
50 beforeLower, UPRV_LENGTHOF(beforeLower),
53 if( U_FAILURE(errorCode) ||
54 length!=(UPRV_LENGTHOF(lowerRoot)) ||
55 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
58 log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n",
60 u_errorName(errorCode),
61 uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 &&
62 buffer[length]==0 ? "yes" : "no",
63 aescstrdup(lowerRoot,-1),
64 aescstrdup(buffer,-1));
67 /* lowercase with turkish locale and in the same buffer */
68 uprv_memcpy(buffer, beforeLower, sizeof(beforeLower));
69 buffer[UPRV_LENGTHOF(beforeLower)]=0;
70 errorCode=U_ZERO_ERROR;
71 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer),
72 buffer, -1, /* implicit srcLength */
75 if( U_FAILURE(errorCode) ||
76 length!=(UPRV_LENGTHOF(lowerTurkish)) ||
77 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
80 log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n",
82 u_errorName(errorCode),
83 uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
86 /* test preflighting */
87 buffer[0]=buffer[2]=0xabcd;
88 errorCode=U_ZERO_ERROR;
89 length=u_strToLower(buffer, 2, /* set destCapacity=2 */
90 beforeLower, UPRV_LENGTHOF(beforeLower),
93 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
94 length!=(UPRV_LENGTHOF(lowerRoot)) ||
95 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
98 log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n",
100 u_errorName(errorCode),
101 uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
104 /* test error handling */
105 errorCode=U_ZERO_ERROR;
106 length=u_strToLower(NULL, UPRV_LENGTHOF(buffer),
107 beforeLower, UPRV_LENGTHOF(beforeLower),
110 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
111 log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n",
113 u_errorName(errorCode));
117 errorCode=U_ZERO_ERROR;
118 length=u_strToLower(buffer, -1,
119 beforeLower, UPRV_LENGTHOF(beforeLower),
122 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
125 log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
127 u_errorName(errorCode),
133 TestCaseUpper(void) {
136 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
137 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
138 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0xd93f, 0xdfff };
142 UErrorCode errorCode;
144 /* uppercase with root locale and in the same buffer */
145 uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper));
146 errorCode=U_ZERO_ERROR;
147 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
148 buffer, UPRV_LENGTHOF(beforeUpper),
151 if( U_FAILURE(errorCode) ||
152 length!=(UPRV_LENGTHOF(upperRoot)) ||
153 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
156 log_err("error in u_strToUpper(root locale)=%ld error=%s string matches: %s\n",
158 u_errorName(errorCode),
159 uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
162 /* uppercase with turkish locale and separate buffers */
164 errorCode=U_ZERO_ERROR;
165 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
166 beforeUpper, UPRV_LENGTHOF(beforeUpper),
169 if( U_FAILURE(errorCode) ||
170 length!=(UPRV_LENGTHOF(upperTurkish)) ||
171 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 ||
174 log_err("error in u_strToUpper(turkish locale)=%ld error=%s string matches: %s\n",
176 u_errorName(errorCode),
177 uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
180 /* test preflighting */
181 errorCode=U_ZERO_ERROR;
182 length=u_strToUpper(NULL, 0,
183 beforeUpper, UPRV_LENGTHOF(beforeUpper),
186 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
187 length!=(UPRV_LENGTHOF(upperTurkish))
189 log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld error=%s\n",
191 u_errorName(errorCode));
194 /* test error handling */
196 errorCode=U_ZERO_ERROR;
197 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
198 NULL, UPRV_LENGTHOF(beforeUpper),
201 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
204 log_err("error in u_strToUpper(turkish locale src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
206 u_errorName(errorCode),
211 errorCode=U_ZERO_ERROR;
212 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer),
216 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
219 log_err("error in u_strToUpper(turkish locale srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
221 u_errorName(errorCode),
226 #if !UCONFIG_NO_BREAK_ITERATION
229 TestCaseTitle(void) {
232 beforeTitle[]= { 0x61, 0x42, 0x20, 0x69, 0x3c2, 0x20, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xd93f, 0xdfff },
233 titleWord[]= { 0x41, 0x62, 0x20, 0x49, 0x3c2, 0x20, 0x53, 0x73, 0x3c3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff },
234 titleChar[]= { 0x41, 0x42, 0x20, 0x49, 0x3a3, 0x20, 0x53, 0x73, 0x3a3, 0x2f, 0x46, 0x66, 0x69, 0xd93f, 0xdfff };
237 UBreakIterator *titleIterChars;
239 UErrorCode errorCode;
241 errorCode=U_ZERO_ERROR;
242 titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
243 if(U_FAILURE(errorCode)) {
244 log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode));
248 /* titlecase with standard break iterator and in the same buffer */
249 uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle));
250 errorCode=U_ZERO_ERROR;
251 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
252 buffer, UPRV_LENGTHOF(beforeTitle),
255 if( U_FAILURE(errorCode) ||
256 length!=(UPRV_LENGTHOF(titleWord)) ||
257 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 ||
260 log_err("error in u_strToTitle(standard iterator)=%ld error=%s string matches: %s\n",
262 u_errorName(errorCode),
263 uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
266 /* titlecase with UBRK_CHARACTERS and separate buffers */
268 errorCode=U_ZERO_ERROR;
269 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
270 beforeTitle, UPRV_LENGTHOF(beforeTitle),
273 if( U_FAILURE(errorCode) ||
274 length!=(UPRV_LENGTHOF(titleChar)) ||
275 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 ||
278 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s string matches: %s\n",
280 u_errorName(errorCode),
281 uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
284 /* test preflighting */
285 errorCode=U_ZERO_ERROR;
286 length=u_strToTitle(NULL, 0,
287 beforeTitle, UPRV_LENGTHOF(beforeTitle),
290 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
291 length!=(UPRV_LENGTHOF(titleChar))
293 log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld error=%s\n",
295 u_errorName(errorCode));
298 /* test error handling */
300 errorCode=U_ZERO_ERROR;
301 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
302 NULL, UPRV_LENGTHOF(beforeTitle),
305 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
308 log_err("error in u_strToTitle(UBRK_CHARACTERS src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
310 u_errorName(errorCode),
315 errorCode=U_ZERO_ERROR;
316 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
320 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
323 log_err("error in u_strToTitle(UBRK_CHARACTERS srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
325 u_errorName(errorCode),
329 ubrk_close(titleIterChars);
333 TestCaseDutchTitle(void) {
336 beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C },
337 titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C },
338 titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C };
341 UBreakIterator *titleIterWord;
343 UErrorCode errorCode;
345 errorCode=U_ZERO_ERROR;
346 titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
347 if(U_FAILURE(errorCode)) {
348 log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorName(errorCode));
352 /* titlecase with default locale */
354 errorCode=U_ZERO_ERROR;
355 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
356 beforeTitle, UPRV_LENGTHOF(beforeTitle),
359 if( U_FAILURE(errorCode) ||
360 length!=(UPRV_LENGTHOF(titleRoot)) ||
361 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 ||
365 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
366 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale string matches: %s\noutput buffer is {%s}\n",
368 u_errorName(errorCode),
369 uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
371 /* titlecase with Dutch locale */
373 errorCode=U_ZERO_ERROR;
374 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer),
375 beforeTitle, UPRV_LENGTHOF(beforeTitle),
378 if( U_FAILURE(errorCode) ||
379 length!=(UPRV_LENGTHOF(titleDutch)) ||
380 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 ||
384 u_UCharsToChars(buffer,charsOut,sizeof(charsOut));
385 log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch locale string matches: %s\noutput buffer is {%s}\n",
387 u_errorName(errorCode),
388 uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut);
391 ubrk_close(titleIterWord);
396 /* test case folding and case-insensitive string compare -------------------- */
399 TestCaseFolding(void) {
401 * CaseFolding.txt says about i and its cousins:
402 * 0049; C; 0069; # LATIN CAPITAL LETTER I
403 * 0049; T; 0131; # LATIN CAPITAL LETTER I
405 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
406 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
408 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
412 /* input, default, exclude special i */
418 0xfb03, 0xfb03, 0xfb03,
419 0x1040e,0x10436,0x10436,
420 0x5ffff,0x5ffff,0x5ffff
424 mixed[]= { 0x61, 0x42, 0x130, 0x49, 0x131, 0x3d0, 0xdf, 0xfb03, 0xd93f, 0xdfff },
425 foldedDefault[]= { 0x61, 0x62, 0x69, 0x307, 0x69, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff },
426 foldedExcludeSpecialI[]={ 0x61, 0x62, 0x69, 0x131, 0x131, 0x3b2, 0x73, 0x73, 0x66, 0x66, 0x69, 0xd93f, 0xdfff };
428 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
435 UErrorCode errorCode;
438 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
439 u_getUnicodeVersion(unicodeVersion);
440 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
442 /* test simple case folding */
444 for(i=0; i<sizeof(simple)/12; p+=3, ++i) {
445 if(u_foldCase(p[0], U_FOLD_CASE_DEFAULT)!=p[1]) {
446 log_err("error: u_foldCase(0x%04lx, default)=0x%04lx instead of 0x%04lx\n",
447 p[0], u_foldCase(p[0], U_FOLD_CASE_DEFAULT), p[1]);
451 if(isUnicode_3_1 && u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I)!=p[2]) {
452 log_err("error: u_foldCase(0x%04lx, exclude special i)=0x%04lx instead of 0x%04lx\n",
453 p[0], u_foldCase(p[0], U_FOLD_CASE_EXCLUDE_SPECIAL_I), p[2]);
458 /* test full string case folding with default option and separate buffers */
460 errorCode=U_ZERO_ERROR;
461 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
462 mixed, UPRV_LENGTHOF(mixed),
465 if( U_FAILURE(errorCode) ||
466 length!=(UPRV_LENGTHOF(foldedDefault)) ||
467 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
470 log_err("error in u_strFoldCase(default)=%ld error=%s string matches: %s\n",
472 u_errorName(errorCode),
473 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
476 /* exclude special i */
479 errorCode=U_ZERO_ERROR;
480 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
481 mixed, UPRV_LENGTHOF(mixed),
482 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
484 if( U_FAILURE(errorCode) ||
485 length!=(UPRV_LENGTHOF(foldedExcludeSpecialI)) ||
486 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
489 log_err("error in u_strFoldCase(exclude special i)=%ld error=%s string matches: %s\n",
491 u_errorName(errorCode),
492 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
496 /* test full string case folding with default option and in the same buffer */
497 uprv_memcpy(buffer, mixed, sizeof(mixed));
498 buffer[UPRV_LENGTHOF(mixed)]=0;
499 errorCode=U_ZERO_ERROR;
500 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
501 buffer, -1, /* implicit srcLength */
504 if( U_FAILURE(errorCode) ||
505 length!=(UPRV_LENGTHOF(foldedDefault)) ||
506 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 ||
509 log_err("error in u_strFoldCase(default same buffer)=%ld error=%s string matches: %s\n",
511 u_errorName(errorCode),
512 uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
515 /* test full string case folding, exclude special i, in the same buffer */
517 uprv_memcpy(buffer, mixed, sizeof(mixed));
518 errorCode=U_ZERO_ERROR;
519 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
520 buffer, UPRV_LENGTHOF(mixed),
521 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
523 if( U_FAILURE(errorCode) ||
524 length!=UPRV_LENGTHOF(foldedExcludeSpecialI) ||
525 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 ||
528 log_err("error in u_strFoldCase(exclude special i same buffer)=%ld error=%s string matches: %s\n",
530 u_errorName(errorCode),
531 uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no");
535 /* test preflighting */
536 buffer[0]=buffer[2]=0xabcd;
537 errorCode=U_ZERO_ERROR;
538 length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */
539 mixed, UPRV_LENGTHOF(mixed),
542 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
543 length!=UPRV_LENGTHOF(foldedDefault) ||
544 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 ||
547 log_err("error in u_strFoldCase(default preflighting)=%ld error=%s string matches: %s\n",
549 u_errorName(errorCode),
550 uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no");
553 errorCode=U_ZERO_ERROR;
554 length=u_strFoldCase(NULL, 0,
555 mixed, UPRV_LENGTHOF(mixed),
558 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
559 length!=UPRV_LENGTHOF(foldedDefault)
561 log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\n",
563 u_errorName(errorCode));
566 /* test error handling */
567 errorCode=U_ZERO_ERROR;
568 length=u_strFoldCase(NULL, UPRV_LENGTHOF(buffer),
569 mixed, UPRV_LENGTHOF(mixed),
572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
573 log_err("error in u_strFoldCase(default dest=NULL)=%ld error=%s\n",
575 u_errorName(errorCode));
579 errorCode=U_ZERO_ERROR;
580 length=u_strFoldCase(buffer, -1,
581 mixed, UPRV_LENGTHOF(mixed),
584 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
587 log_err("error in u_strFoldCase(default destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n",
589 u_errorName(errorCode),
594 errorCode=U_ZERO_ERROR;
595 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
596 NULL, UPRV_LENGTHOF(mixed),
597 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
599 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
602 log_err("error in u_strFoldCase(exclude special i src=NULL)=%ld error=%s buffer[0]==0x%lx\n",
604 u_errorName(errorCode),
609 errorCode=U_ZERO_ERROR;
610 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer),
612 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
614 if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR ||
617 log_err("error in u_strFoldCase(exclude special i srcLength=-2)=%ld error=%s buffer[0]==0x%lx\n",
619 u_errorName(errorCode),
625 TestCaseCompare(void) {
628 mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0xfb03, 0xd93f, 0xdfff, 0 },
629 otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
630 otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
631 different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
633 UVersionInfo unicodeVersion={ 0, 0, 17, 89 }, unicode_3_1={ 3, 1, 0, 0 };
635 int32_t result, lenMixed, lenOtherDefault, lenOtherExcludeSpecialI, lenDifferent;
636 UErrorCode errorCode;
639 errorCode=U_ZERO_ERROR;
641 lenMixed=u_strlen(mixed);
642 lenOtherDefault=u_strlen(otherDefault);
643 (void)lenOtherDefault; /* Suppress set but not used warning. */
644 lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI);
645 lenDifferent=u_strlen(different);
647 /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */
648 u_getUnicodeVersion(unicodeVersion);
649 isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0;
650 (void)isUnicode_3_1; /* Suppress set but not used warning. */
652 /* test u_strcasecmp() */
653 result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT);
655 log_err("error: u_strcasecmp(mixed, other, default)=%ld instead of 0\n", result);
657 result=u_strCaseCompare(mixed, -1, otherDefault, -1, U_FOLD_CASE_DEFAULT, &errorCode);
659 log_err("error: u_strCaseCompare(mixed, other, default)=%ld instead of 0\n", result);
662 /* test u_strcasecmp() - exclude special i */
663 result=u_strcasecmp(mixed, otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
665 log_err("error: u_strcasecmp(mixed, other, exclude special i)=%ld instead of 0\n", result);
667 result=u_strCaseCompare(mixed, lenMixed, otherExcludeSpecialI, lenOtherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
669 log_err("error: u_strCaseCompare(mixed, other, exclude special i)=%ld instead of 0\n", result);
672 /* test u_strcasecmp() */
673 result=u_strcasecmp(mixed, different, U_FOLD_CASE_DEFAULT);
675 log_err("error: u_strcasecmp(mixed, different, default)=%ld instead of positive\n", result);
677 result=u_strCaseCompare(mixed, -1, different, lenDifferent, U_FOLD_CASE_DEFAULT, &errorCode);
679 log_err("error: u_strCaseCompare(mixed, different, default)=%ld instead of positive\n", result);
682 /* test u_strncasecmp() - stop before the sharp s (U+00df) */
683 result=u_strncasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
685 log_err("error: u_strncasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
687 result=u_strCaseCompare(mixed, 4, different, 4, U_FOLD_CASE_DEFAULT, &errorCode);
689 log_err("error: u_strCaseCompare(mixed, 4, different, 4, default)=%ld instead of 0\n", result);
692 /* test u_strncasecmp() - stop in the middle of the sharp s (U+00df) */
693 result=u_strncasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
695 log_err("error: u_strncasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
697 result=u_strCaseCompare(mixed, 5, different, 5, U_FOLD_CASE_DEFAULT, &errorCode);
699 log_err("error: u_strCaseCompare(mixed, 5, different, 5, default)=%ld instead of positive\n", result);
702 /* test u_memcasecmp() - stop before the sharp s (U+00df) */
703 result=u_memcasecmp(mixed, different, 4, U_FOLD_CASE_DEFAULT);
705 log_err("error: u_memcasecmp(mixed, different, 4, default)=%ld instead of 0\n", result);
708 /* test u_memcasecmp() - stop in the middle of the sharp s (U+00df) */
709 result=u_memcasecmp(mixed, different, 5, U_FOLD_CASE_DEFAULT);
711 log_err("error: u_memcasecmp(mixed, different, 5, default)=%ld instead of positive\n", result);
715 /* test UCaseMap ------------------------------------------------------------ */
718 * API test for UCaseMap;
719 * test cases for actual case mappings using UCaseMap see
720 * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing
725 aBc[] ={ 0x61, 0x42, 0x63, 0 },
726 abc[] ={ 0x61, 0x62, 0x63, 0 },
727 ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 },
728 defg[]={ 0x64, 0x65, 0x66, 0x67, 0 };
735 UErrorCode errorCode;
737 errorCode=U_ZERO_ERROR;
738 csm=ucasemap_open("tur", 0xa5, &errorCode);
739 if(U_FAILURE(errorCode)) {
740 log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode));
743 locale=ucasemap_getLocale(csm);
744 if(0!=strcmp(locale, "tr")) {
745 log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
747 /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */
748 ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
749 locale=ucasemap_getLocale(csm);
750 if(0!=strcmp(locale, "i-klingon")) {
751 log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale);
754 errorCode=U_ZERO_ERROR;
755 options=ucasemap_getOptions(csm);
757 log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long)options);
759 ucasemap_setOptions(csm, 0x333333, &errorCode);
760 options=ucasemap_getOptions(csm);
761 if(options!=0x333333) {
762 log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x333333\n", (long)options);
765 /* test case mapping API; not all permutations necessary due to shared implementation code */
767 /* NUL terminated source */
768 errorCode=U_ZERO_ERROR;
769 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
770 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
771 log_err("ucasemap_utf8ToLower(aBc\\0) failed\n");
774 /* incoming failure code */
775 errorCode=U_PARSE_ERROR;
776 strcpy(utf8Out, defg);
777 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
778 if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) {
779 log_err("ucasemap_utf8ToLower(failure) failed\n");
782 /* overlapping input & output */
783 errorCode=U_ZERO_ERROR;
784 strcpy(utf8Out, aBc);
785 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode);
786 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
787 log_err("ucasemap_utf8ToUpper(overlap 1) failed\n");
790 /* overlap in the other direction */
791 errorCode=U_ZERO_ERROR;
792 strcpy(utf8Out, aBc);
793 length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode);
794 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) {
795 log_err("ucasemap_utf8ToUpper(overlap 2) failed\n");
798 /* NULL destination */
799 errorCode=U_ZERO_ERROR;
800 strcpy(utf8Out, defg);
801 length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode);
802 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
803 log_err("ucasemap_utf8ToLower(dest=NULL) failed\n");
807 errorCode=U_ZERO_ERROR;
808 strcpy(utf8Out, defg);
809 length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode);
810 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
811 log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n");
815 errorCode=U_ZERO_ERROR;
816 strcpy(utf8Out, defg);
817 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1, &errorCode);
818 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
819 log_err("ucasemap_utf8ToLower(src=NULL) failed\n");
823 errorCode=U_ZERO_ERROR;
824 strcpy(utf8Out, defg);
825 length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2, &errorCode);
826 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) {
827 log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n");
830 /* buffer overflow */
831 errorCode=U_ZERO_ERROR;
832 strcpy(utf8Out, defg);
833 length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode);
834 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8Out+2)) {
835 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
838 /* dest not terminated (leaves g from defg alone) */
839 errorCode=U_ZERO_ERROR;
840 strcpy(utf8Out, defg);
841 length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode);
842 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg, utf8Out)) {
843 log_err("ucasemap_utf8ToUpper(overflow) failed\n");
846 /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
847 errorCode=U_ZERO_ERROR;
849 length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode);
850 if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) {
851 log_err("ucasemap_utf8FoldCase(aBc) failed\n");
857 #if !UCONFIG_NO_BREAK_ITERATION
859 /* Try titlecasing with options. */
861 TestUCaseMapToTitle(void) {
862 /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
864 * Note: The sentence BreakIterator does not recognize a '.'
865 * as a sentence terminator if it is followed by lowercase.
866 * That is why the example has the '!'.
870 beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
871 titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
872 titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
873 titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };
877 UBreakIterator *sentenceIter;
878 const UBreakIterator *iter;
880 UErrorCode errorCode;
882 errorCode=U_ZERO_ERROR;
883 csm=ucasemap_open("", 0, &errorCode);
884 if(U_FAILURE(errorCode)) {
885 log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
889 iter=ucasemap_getBreakIterator(csm);
891 log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
894 /* Use default UBreakIterator: Word breaks. */
895 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
896 if( U_FAILURE(errorCode) ||
897 length!=UPRV_LENGTHOF(titleWord) ||
898 0!=u_memcmp(buffer, titleWord, length) ||
901 log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
903 if (U_SUCCESS(errorCode)) {
904 iter=ucasemap_getBreakIterator(csm);
906 log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
910 /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
911 ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
912 if(U_FAILURE(errorCode)) {
913 log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
917 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
918 if( U_FAILURE(errorCode) ||
919 length!=UPRV_LENGTHOF(titleWordNoAdjust) ||
920 0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
923 log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
926 /* Set a sentence break iterator. */
927 errorCode=U_ZERO_ERROR;
928 sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
929 if(U_FAILURE(errorCode)) {
930 log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
934 ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
935 if(U_FAILURE(errorCode)) {
936 log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
937 ubrk_close(sentenceIter);
941 iter=ucasemap_getBreakIterator(csm);
942 if(iter!=sentenceIter) {
943 log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
946 ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
947 if(U_FAILURE(errorCode)) {
948 log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
952 /* Use the sentence break iterator with the option. Preflight first. */
953 length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
954 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
955 length!=UPRV_LENGTHOF(titleSentNoLower)
957 log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
960 errorCode=U_ZERO_ERROR;
962 length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
963 if( U_FAILURE(errorCode) ||
964 length!=UPRV_LENGTHOF(titleSentNoLower) ||
965 0!=u_memcmp(buffer, titleSentNoLower, length) ||
968 log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
971 /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
973 char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
974 int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;
976 errorCode=U_ZERO_ERROR;
977 u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
978 u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode);
980 length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
981 if( U_FAILURE(errorCode) ||
982 length!=utf8TitleSentNoLowerLength ||
983 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
986 log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
995 /* Test case for internal API u_caseInsensitivePrefixMatch */
997 TestUCaseInsensitivePrefixMatch(void) {
1004 {"ABC", "ab", 2, 2},
1005 {"ABCD", "abcx", 3, 3},
1006 {"ABC", "xyz", 0, 0},
1007 /* U+00DF LATIN SMALL LETTER SHARP S */
1008 {"A\\u00dfBC", "Ass", 2, 3},
1009 {"Fust", "Fu\\u00dfball", 2, 2},
1010 {"\\u00dfsA", "s\\u00dfB", 2, 2},
1011 {"\\u00dfs", "s\\u00df", 2, 2},
1012 /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */
1013 {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6},
1018 for (i = 0; testCases[i].s1 != 0; i++) {
1019 UErrorCode sts = U_ZERO_ERROR;
1020 UChar u1[64], u2[64];
1021 int32_t matchLen1, matchLen2;
1023 u_unescape(testCases[i].s1, u1, 64);
1024 u_unescape(testCases[i].s2, u2, 64);
1026 u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts);
1027 if (U_FAILURE(sts)) {
1028 log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2);
1029 } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) {
1030 log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d",
1031 testCases[i].s1, testCases[i].s2,
1032 matchLen1, matchLen2,
1033 testCases[i].r1, testCases[i].r2);
1038 void addCaseTest(TestNode** root);
1040 void addCaseTest(TestNode** root) {
1041 /* cstrcase.c functions, declared in cucdtst.h */
1042 addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower");
1043 addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper");
1044 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1045 addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle");
1046 addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle");
1048 addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding");
1049 addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare");
1050 addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap");
1051 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
1052 addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle");
1054 addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch");