1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 * Modification History:
13 * Date Name Description
14 * Madhu Katragadda Ported for C API
15 * 02/19/01 synwee Modified test case for new collation iterator
16 *********************************************************************************/
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/putil.h"
43 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
45 void addCollIterTest(TestNode** root)
47 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
48 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
49 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
50 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
51 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
52 addTest(root, &TestNormalizedUnicodeChar,
53 "tscoll/citertst/TestNormalizedUnicodeChar");
54 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
55 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
56 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
57 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
62 /* The locales we support */
64 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
66 static void TestBug672() {
67 UErrorCode status = U_ZERO_ERROR;
73 u_uastrcpy(pattern, "resume");
74 u_uastrcpy(text, "Time to resume updating my resume.");
76 for (i = 0; i < 3; ++ i) {
77 UCollator *coll = ucol_open(LOCALES[i], &status);
78 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
80 UCollationElements *titer = ucol_openElements(coll, text, -1,
82 if (U_FAILURE(status)) {
83 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
88 log_verbose("locale tested %s\n", LOCALES[i]);
90 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
93 if (U_FAILURE(status)) {
94 log_err("ERROR: reversing collation iterator :%s\n",
100 ucol_setOffset(titer, u_strlen(pattern), &status);
101 if (U_FAILURE(status)) {
102 log_err("ERROR: setting offset in collator :%s\n",
103 myErrorName(status));
106 result[i][0] = ucol_getOffset(titer);
107 log_verbose("Text iterator set to offset %d\n", result[i][0]);
110 ucol_previous(titer, &status);
111 result[i][1] = ucol_getOffset(titer);
112 log_verbose("Current offset %d after previous\n", result[i][1]);
114 /* Add one to index */
115 log_verbose("Adding one to current offset...\n");
116 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
117 if (U_FAILURE(status)) {
118 log_err("ERROR: setting offset in collator :%s\n",
119 myErrorName(status));
122 result[i][2] = ucol_getOffset(titer);
123 log_verbose("Current offset in text = %d\n", result[i][2]);
124 ucol_closeElements(pitr);
125 ucol_closeElements(titer);
129 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
130 uprv_memcmp(result[1], result[2], 3) != 0) {
131 log_err("ERROR: Different locales have different offsets at the same character\n");
137 /* Running this test with normalization enabled showed up a bug in the incremental
138 normalization code. */
139 static void TestBug672Normalize() {
140 UErrorCode status = U_ZERO_ERROR;
146 u_uastrcpy(pattern, "resume");
147 u_uastrcpy(text, "Time to resume updating my resume.");
149 for (i = 0; i < 3; ++ i) {
150 UCollator *coll = ucol_open(LOCALES[i], &status);
151 UCollationElements *pitr = NULL;
152 UCollationElements *titer = NULL;
154 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
156 pitr = ucol_openElements(coll, pattern, -1, &status);
157 titer = ucol_openElements(coll, text, -1, &status);
158 if (U_FAILURE(status)) {
159 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
160 myErrorName(status));
164 log_verbose("locale tested %s\n", LOCALES[i]);
166 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
169 if (U_FAILURE(status)) {
170 log_err("ERROR: reversing collation iterator :%s\n",
171 myErrorName(status));
176 ucol_setOffset(titer, u_strlen(pattern), &status);
177 if (U_FAILURE(status)) {
178 log_err("ERROR: setting offset in collator :%s\n",
179 myErrorName(status));
182 result[i][0] = ucol_getOffset(titer);
183 log_verbose("Text iterator set to offset %d\n", result[i][0]);
186 ucol_previous(titer, &status);
187 result[i][1] = ucol_getOffset(titer);
188 log_verbose("Current offset %d after previous\n", result[i][1]);
190 /* Add one to index */
191 log_verbose("Adding one to current offset...\n");
192 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
193 if (U_FAILURE(status)) {
194 log_err("ERROR: setting offset in collator :%s\n",
195 myErrorName(status));
198 result[i][2] = ucol_getOffset(titer);
199 log_verbose("Current offset in text = %d\n", result[i][2]);
200 ucol_closeElements(pitr);
201 ucol_closeElements(titer);
205 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
206 uprv_memcmp(result[1], result[2], 3) != 0) {
207 log_err("ERROR: Different locales have different offsets at the same character\n");
215 * Test for CollationElementIterator previous and next for the whole set of
216 * unicode characters.
218 static void TestUnicodeChar()
222 UCollationElements *iter;
223 UErrorCode status = U_ZERO_ERROR;
227 en_us = ucol_open("en_US", &status);
228 if (U_FAILURE(status)){
229 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
230 myErrorName(status));
234 for (codepoint = 1; codepoint < 0xFFFE;)
238 while (codepoint % 0xFF != 0)
240 if (u_isdefined(codepoint))
241 *(test ++) = codepoint;
245 if (u_isdefined(codepoint))
246 *(test ++) = codepoint;
248 if (codepoint != 0xFFFF)
252 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
253 if(U_FAILURE(status)){
254 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
255 myErrorName(status));
259 /* A basic test to see if it's working at all */
260 log_verbose("codepoint testing %x\n", codepoint);
262 ucol_closeElements(iter);
264 /* null termination test */
265 iter=ucol_openElements(en_us, source, -1, &status);
266 if(U_FAILURE(status)){
267 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
268 myErrorName(status));
272 /* A basic test to see if it's working at all */
274 ucol_closeElements(iter);
281 * Test for CollationElementIterator previous and next for the whole set of
282 * unicode characters with normalization on.
284 static void TestNormalizedUnicodeChar()
288 UCollationElements *iter;
289 UErrorCode status = U_ZERO_ERROR;
293 /* thai should have normalization on */
294 th_th = ucol_open("th_TH", &status);
295 if (U_FAILURE(status)){
296 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
297 myErrorName(status));
301 for (codepoint = 1; codepoint < 0xFFFE;)
305 while (codepoint % 0xFF != 0)
307 if (u_isdefined(codepoint))
308 *(test ++) = codepoint;
312 if (u_isdefined(codepoint))
313 *(test ++) = codepoint;
315 if (codepoint != 0xFFFF)
319 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
320 if(U_FAILURE(status)){
321 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
322 myErrorName(status));
328 ucol_closeElements(iter);
330 iter=ucol_openElements(th_th, source, -1, &status);
331 if(U_FAILURE(status)){
332 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
333 myErrorName(status));
339 ucol_closeElements(iter);
346 * Test the incremental normalization
348 static void TestNormalization()
350 UErrorCode status = U_ZERO_ERROR;
352 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
355 int rulelen = u_unescape(str, rule, 50);
357 const char *testdata[] =
358 {"\\u1ED9", "o\\u0323\\u0302",
359 "\\u0300\\u0315", "\\u0315\\u0300",
360 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
361 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
362 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
363 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
364 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
367 UCollationElements *iter;
369 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
370 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
371 if (U_FAILURE(status)){
372 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
373 myErrorName(status));
377 srclen = u_unescape(testdata[0], source, 10);
378 iter = ucol_openElements(coll, source, srclen, &status);
380 ucol_closeElements(iter);
382 srclen = u_unescape(testdata[1], source, 10);
383 iter = ucol_openElements(coll, source, srclen, &status);
385 ucol_closeElements(iter);
388 srclen = u_unescape(testdata[count], source, 10);
389 iter = ucol_openElements(coll, source, srclen, &status);
391 if (U_FAILURE(status)){
392 log_err("ERROR: in creation of collator element iterator\n %s\n",
393 myErrorName(status));
397 ucol_closeElements(iter);
399 iter = ucol_openElements(coll, source, -1, &status);
401 if (U_FAILURE(status)){
402 log_err("ERROR: in creation of collator element iterator\n %s\n",
403 myErrorName(status));
407 ucol_closeElements(iter);
414 * Test for CollationElementIterator.previous()
416 * @bug 4108758 - Make sure it works with contracting characters
419 static void TestPrevious()
421 UCollator *coll=NULL;
424 UCollator *c1, *c2, *c3;
425 UCollationElements *iter;
426 UErrorCode status = U_ZERO_ERROR;
430 u_uastrcpy(test1, "What subset of all possible test cases?");
431 u_uastrcpy(test2, "has the highest probability of detecting");
432 coll = ucol_open("en_US", &status);
434 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
435 log_verbose("English locale testing back and forth\n");
436 if(U_FAILURE(status)){
437 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
438 myErrorName(status));
442 /* A basic test to see if it's working at all */
444 ucol_closeElements(iter);
447 /* Test with a contracting character sequence */
448 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
449 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
451 log_verbose("Contraction rule testing back and forth with no normalization\n");
453 if (c1 == NULL || U_FAILURE(status))
455 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
456 myErrorName(status));
459 source=(UChar*)malloc(sizeof(UChar) * 20);
460 u_uastrcpy(source, "abchdcba");
461 iter=ucol_openElements(c1, source, u_strlen(source), &status);
462 if(U_FAILURE(status)){
463 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
464 myErrorName(status));
468 ucol_closeElements(iter);
471 /* Test with an expanding character sequence */
472 u_uastrcpy(rule, "&a < b < c/abd < d");
473 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
474 log_verbose("Expansion rule testing back and forth with no normalization\n");
475 if (c2 == NULL || U_FAILURE(status))
477 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
478 myErrorName(status));
481 u_uastrcpy(source, "abcd");
482 iter=ucol_openElements(c2, source, u_strlen(source), &status);
483 if(U_FAILURE(status)){
484 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
485 myErrorName(status));
489 ucol_closeElements(iter);
492 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
493 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
494 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496 if (c3 == NULL || U_FAILURE(status))
498 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
499 myErrorName(status));
502 u_uastrcpy(source, "abcdbchdc");
503 iter=ucol_openElements(c3, source, u_strlen(source), &status);
504 if(U_FAILURE(status)){
505 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
506 myErrorName(status));
510 ucol_closeElements(iter);
522 coll = ucol_open("th_TH", &status);
523 log_verbose("Thai locale testing back and forth with normalization\n");
524 iter=ucol_openElements(coll, source, u_strlen(source), &status);
525 if(U_FAILURE(status)){
526 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
527 myErrorName(status));
531 ucol_closeElements(iter);
541 coll = ucol_open("ja_JP", &status);
542 log_verbose("Japanese locale testing back and forth with normalization\n");
543 iter=ucol_openElements(coll, source, u_strlen(source), &status);
544 if(U_FAILURE(status)){
545 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
546 myErrorName(status));
550 ucol_closeElements(iter);
557 * Test for getOffset() and setOffset()
559 static void TestOffset()
561 UErrorCode status= U_ZERO_ERROR;
562 UCollator *en_us=NULL;
563 UCollationElements *iter, *pristine;
565 OrderAndOffset *orders;
566 int32_t orderLength=0;
571 u_uastrcpy(test1, "What subset of all possible test cases?");
572 u_uastrcpy(test2, "has the highest probability of detecting");
573 en_us = ucol_open("en_US", &status);
574 log_verbose("Testing getOffset and setOffset for collations\n");
575 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
576 if(U_FAILURE(status)){
577 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
578 myErrorName(status));
583 /* testing boundaries */
584 ucol_setOffset(iter, 0, &status);
585 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
586 log_err("Error: After setting offset to 0, we should be at the end "
587 "of the backwards iteration");
589 ucol_setOffset(iter, u_strlen(test1), &status);
590 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
591 log_err("Error: After setting offset to end of the string, we should "
592 "be at the end of the backwards iteration");
595 /* Run all the way through the iterator, then get the offset */
597 orders = getOrders(iter, &orderLength);
599 offset = ucol_getOffset(iter);
601 if (offset != u_strlen(test1))
603 log_err("offset at end != length %d vs %d\n", offset,
607 /* Now set the offset back to the beginning and see if it works */
608 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
609 if(U_FAILURE(status)){
610 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
611 myErrorName(status));
615 status = U_ZERO_ERROR;
617 ucol_setOffset(iter, 0, &status);
618 if (U_FAILURE(status))
620 log_err("setOffset failed. %s\n", myErrorName(status));
624 assertEqual(iter, pristine);
627 ucol_closeElements(pristine);
628 ucol_closeElements(iter);
631 /* testing offsets in normalization buffer */
637 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
638 iter = ucol_openElements(en_us, test1, 4, &status);
639 if(U_FAILURE(status)){
640 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
641 myErrorName(status));
647 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651 if (ucol_getOffset(iter) != 1) {
652 log_err("ERROR: Offset of iteration should be 1\n");
656 if (ucol_getOffset(iter) != 4) {
657 log_err("ERROR: Offset of iteration should be 4\n");
661 if (ucol_getOffset(iter) != 3) {
662 log_err("ERROR: Offset of iteration should be 3\n");
670 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
675 if (ucol_getOffset(iter) != 3) {
676 log_err("ERROR: Offset of iteration should be 3\n");
680 if (ucol_getOffset(iter) != 1) {
681 log_err("ERROR: Offset of iteration should be 1\n");
685 if (ucol_getOffset(iter) != 0) {
686 log_err("ERROR: Offset of iteration should be 0\n");
692 if(U_FAILURE(status)){
693 log_err("ERROR: in iterating collation elements %s\n",
694 myErrorName(status));
697 ucol_closeElements(iter);
704 static void TestSetText()
707 UErrorCode status = U_ZERO_ERROR;
708 UCollator *en_us=NULL;
709 UCollationElements *iter1, *iter2;
713 u_uastrcpy(test1, "What subset of all possible test cases?");
714 u_uastrcpy(test2, "has the highest probability of detecting");
715 en_us = ucol_open("en_US", &status);
716 log_verbose("testing setText for Collation elements\n");
717 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
718 if(U_FAILURE(status)){
719 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
720 myErrorName(status));
724 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
725 if(U_FAILURE(status)){
726 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
727 myErrorName(status));
732 /* Run through the second iterator just to exercise it */
733 c = ucol_next(iter2, &status);
736 while ( ++i < 10 && (c != UCOL_NULLORDER))
738 if (U_FAILURE(status))
740 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
741 ucol_closeElements(iter2);
742 ucol_closeElements(iter1);
747 c = ucol_next(iter2, &status);
750 /* Now set it to point to the same string as the first iterator */
751 ucol_setText(iter2, test1, u_strlen(test1), &status);
752 if (U_FAILURE(status))
754 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758 assertEqual(iter1, iter2);
761 /* Now set it to point to a null string with fake length*/
762 ucol_setText(iter2, NULL, 2, &status);
763 if (status != U_ILLEGAL_ARGUMENT_ERROR)
765 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
766 myErrorName(status));
769 ucol_closeElements(iter2);
770 ucol_closeElements(iter1);
775 * Test for getMaxExpansion()
777 static void TestMaxExpansion()
779 UErrorCode status = U_ZERO_ERROR;
780 UCollator *coll ;/*= ucol_open("en_US", &status);*/
782 UChar32 unassigned = 0xEFFFD;
783 UChar supplementary[2];
784 uint32_t stringOffset = 0;
785 UBool isError = FALSE;
787 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
788 uint32_t temporder = 0;
791 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
792 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
793 UCOL_DEFAULT_STRENGTH,NULL, &status);
794 if(U_SUCCESS(status) && coll) {
795 iter = ucol_openElements(coll, &ch, 1, &status);
797 while (ch < 0xFFFF && U_SUCCESS(status)) {
804 ucol_setText(iter, &ch, 1, &status);
805 order = ucol_previous(iter, &status);
807 /* thai management */
809 order = ucol_previous(iter, &status);
811 while (U_SUCCESS(status) &&
812 ucol_previous(iter, &status) != UCOL_NULLORDER) {
816 size = ucol_getMaxExpansion(iter, order);
817 if (U_FAILURE(status) || size < count) {
818 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
823 /* testing for exact max expansion */
828 ucol_setText(iter, &ch, 1, &status);
829 order = ucol_previous(iter, &status);
830 size = ucol_getMaxExpansion(iter, order);
831 if (U_FAILURE(status) || size != 1) {
832 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
839 ucol_setText(iter, &ch, 1, &status);
840 temporder = ucol_previous(iter, &status);
842 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
843 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
848 ucol_setText(iter, &ch, 1, &status);
849 temporder = ucol_previous(iter, &status);
851 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
852 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
856 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
857 (void)isError; /* Suppress set but not used warning. */
858 ucol_setText(iter, supplementary, 2, &status);
859 sorder = ucol_previous(iter, &status);
861 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
862 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
869 ucol_setText(iter, &ch, 1, &status);
870 temporder = ucol_previous(iter, &status);
871 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
872 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
876 ucol_closeElements(iter);
879 /* testing special jamo &a<\u1160 */
891 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
892 UCOL_DEFAULT_STRENGTH,NULL, &status);
893 iter = ucol_openElements(coll, &ch, 1, &status);
895 temporder = ucol_previous(iter, &status);
896 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
897 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
901 ucol_closeElements(iter);
904 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
910 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
914 UErrorCode status = U_ZERO_ERROR;
918 c1 = ucol_next(i1, &status);
919 c2 = ucol_next(i2, &status);
923 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
929 while (c1 != UCOL_NULLORDER);
933 * Testing iterators with extremely small buffers
935 static void TestSmallBuffer()
937 UErrorCode status = U_ZERO_ERROR;
939 UCollationElements *testiter,
942 OrderAndOffset *testorders,
946 UChar str[] = {0x300, 0x31A, 0};
948 creating a long string of decomposable characters,
949 since by default the writable buffer is of size 256
951 while (count < 500) {
952 if ((count & 1) == 0) {
953 teststr[count ++] = 0x300;
956 teststr[count ++] = 0x31A;
960 coll = ucol_open("th_TH", &status);
961 if(U_SUCCESS(status) && coll) {
962 testiter = ucol_openElements(coll, teststr, 500, &status);
963 iter = ucol_openElements(coll, str, 2, &status);
965 orders = getOrders(iter, &count);
967 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
971 this will rearrange the string data to 250 characters of 0x300 first then
972 250 characters of 0x031A
974 testorders = getOrders(testiter, &count);
977 log_err("Error decomposition does not give the right sized collation elements\n");
981 /* UCA collation element for 0x0F76 */
982 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
983 (count <= 250 && testorders[-- count].order != orders[0].order)) {
984 log_err("Error decomposition does not give the right collation element at %d count\n", count);
992 ucol_reset(testiter);
994 /* ensures closing of elements done properly to clear writable buffer */
995 ucol_next(testiter, &status);
996 ucol_next(testiter, &status);
997 ucol_closeElements(testiter);
998 ucol_closeElements(iter);
1001 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1006 * Testing the discontigous contractions
1008 static void TestDiscontiguos() {
1009 const char *rulestr =
1010 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1012 int rulelen = u_unescape(rulestr, rule, 50);
1013 const char *src[] = {
1014 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1015 /* base character blocked */
1016 "XD\\u0300", "XD\\u0300\\u0315",
1017 /* non blocking combining character */
1018 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1019 /* blocking combining character */
1020 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1021 /* contraction prefix */
1022 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1023 "X\\u0300\\u031A\\u0315",
1024 /* ends not with a contraction character */
1025 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1026 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1028 const char *tgt[] = {
1029 /* non blocking combining character */
1030 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1031 /* base character blocked */
1032 "X D \\u0300", "X D \\u0300\\u0315",
1033 /* non blocking combining character */
1034 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1035 /* blocking combining character */
1036 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1037 /* contraction prefix */
1038 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1039 "X\\u0300 \\u031A \\u0315",
1040 /* ends not with a contraction character */
1041 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1042 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1046 UErrorCode status = U_ZERO_ERROR;
1048 UCollationElements *iter;
1049 UCollationElements *resultiter;
1051 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1052 iter = ucol_openElements(coll, rule, 1, &status);
1053 resultiter = ucol_openElements(coll, rule, 1, &status);
1055 if (U_FAILURE(status)) {
1056 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1060 while (count < size) {
1063 int strLen = u_unescape(src[count], str, 20);
1066 ucol_setText(iter, str, strLen, &status);
1067 if (U_FAILURE(status)) {
1068 log_err("Error opening collation iterator\n");
1072 u_unescape(tgt[count], tstr, 20);
1075 log_verbose("count %d\n", count);
1079 UChar *e = u_strchr(s, 0x20);
1083 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1084 ce = ucol_next(resultiter, &status);
1085 if (U_FAILURE(status)) {
1086 log_err("Error manipulating collation iterator\n");
1089 while (ce != UCOL_NULLORDER) {
1090 if (ce != (uint32_t)ucol_next(iter, &status) ||
1091 U_FAILURE(status)) {
1092 log_err("Discontiguos contraction test mismatch\n");
1095 ce = ucol_next(resultiter, &status);
1096 if (U_FAILURE(status)) {
1097 log_err("Error getting next collation element\n");
1110 ucol_closeElements(resultiter);
1111 ucol_closeElements(iter);
1116 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1117 * normalization on AND jamo tailoring, among other things.
1119 * Note: This test is sensitive to changes of the root collator,
1120 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1121 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1122 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1123 * For example, the DUCET's artificial secondary CE in the ae-ligature
1124 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1126 static const UChar tsceText[] = { /* Nothing in here should be ignorable */
1127 0x0020, 0xAC00, /* simple LV Hangul */
1128 0x0020, 0xAC01, /* simple LVT Hangul */
1129 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1130 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1131 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1132 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1133 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1134 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1135 0x0020, 0x00E6, /* small letter ae, expands */
1136 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1139 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1141 static const int32_t rootStandardOffsets[] = {
1150 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1155 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1157 static const int32_t rootSearchOffsets[] = {
1165 20, 21,22,22,23,23,23,24,
1166 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1171 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1174 const char * locale;
1175 const int32_t * offsets;
1179 static const TSCEItem tsceItems[] = {
1180 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
1181 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
1185 static void TestSearchCollatorElements(void)
1187 const TSCEItem * tsceItemPtr;
1188 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1189 UErrorCode status = U_ZERO_ERROR;
1190 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1191 if ( U_SUCCESS(status) ) {
1192 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1193 if ( U_SUCCESS(status) ) {
1194 int32_t offset, element;
1195 const int32_t * nextOffsetPtr;
1196 const int32_t * limitOffsetPtr;
1198 nextOffsetPtr = tsceItemPtr->offsets;
1199 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1201 offset = ucol_getOffset(uce);
1202 element = ucol_next(uce, &status);
1203 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
1204 if ( element == 0 ) {
1205 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1207 if ( nextOffsetPtr < limitOffsetPtr ) {
1208 if (offset != *nextOffsetPtr) {
1209 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1210 tsceItemPtr->locale, *nextOffsetPtr, offset );
1211 nextOffsetPtr = limitOffsetPtr;
1216 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1218 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1219 if ( nextOffsetPtr < limitOffsetPtr ) {
1220 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1223 ucol_setOffset(uce, kLen_tsceText, &status);
1224 status = U_ZERO_ERROR;
1225 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1226 limitOffsetPtr = tsceItemPtr->offsets;
1228 offset = ucol_getOffset(uce);
1229 element = ucol_previous(uce, &status);
1230 if ( element == 0 ) {
1231 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1233 if ( nextOffsetPtr > limitOffsetPtr ) {
1235 if (offset != *nextOffsetPtr) {
1236 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1237 tsceItemPtr->locale, *nextOffsetPtr, offset );
1238 nextOffsetPtr = limitOffsetPtr;
1242 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1244 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1245 if ( nextOffsetPtr > limitOffsetPtr ) {
1246 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1249 ucol_closeElements(uce);
1251 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1255 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1260 #endif /* #if !UCONFIG_NO_COLLATION */