1 /********************************************************************
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 //===============================================================================
12 // Created by: Helena Shih
14 // Modification History:
16 // Date Name Description
17 // 2/5/97 aliu Added streamIn and streamOut methods. Added
18 // constructor which reads RuleBasedCollator object from
19 // a binary file. Added writeToFile method which streams
20 // RuleBasedCollator out to a binary file. The streamIn
21 // and streamOut methods use istream and ostream objects
23 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset
24 // setOffset and DecompositionIterator::getOffset, setOffset.
25 // DecompositionIterator is made public so add class scope
27 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t)
28 //===============================================================================
30 #include "unicode/utypes.h"
32 #if !UCONFIG_NO_COLLATION
34 #include "unicode/localpointer.h"
35 #include "unicode/coll.h"
36 #include "unicode/tblcoll.h"
37 #include "unicode/coleitr.h"
38 #include "unicode/sortkey.h"
40 #include "unicode/chariter.h"
41 #include "unicode/schriter.h"
42 #include "unicode/ustring.h"
43 #include "unicode/ucol.h"
49 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
52 CollationAPITest::doAssert(UBool condition, const char *message)
55 errln(UnicodeString("ERROR : ") + message);
59 #ifdef U_USE_COLLATION_OBSOLETE_2_6
61 * Test Collator::createInstance(... version...) for some locale. Called by TestProperty().
64 TestOpenVersion(IntlTest &test, const Locale &locale) {
65 UVersionInfo version1, version2;
66 Collator *collator1, *collator2;
69 errorCode=U_ZERO_ERROR;
70 collator1=Collator::createInstance(locale, errorCode);
71 if(U_SUCCESS(errorCode)) {
72 /* get the current version */
73 collator1->getVersion(version1);
76 /* try to get that same version again */
77 collator2=Collator::createInstance(locale, version1, errorCode);
78 if(U_SUCCESS(errorCode)) {
79 collator2->getVersion(version2);
80 if(0!=uprv_memcmp(version1, version2, sizeof(UVersionInfo))) {
81 test.errln("error: Collator::createInstance(\"%s\", (%s collator)->getVersion()) returns a different collator\n", locale.getName(), locale.getName());
85 test.errln("error: Collator::createInstance(\"%s\", (%s collator)->getVersion()) fails: %s\n", locale.getName(), locale.getName(), u_errorName(errorCode));
91 // Collator Class Properties
92 // ctor, dtor, createInstance, compare, getStrength/setStrength
93 // getDecomposition/setDecomposition, getDisplayName
95 CollationAPITest::TestProperty(/* char* par */)
97 UErrorCode success = U_ZERO_ERROR;
100 * Expected version of the English collator.
101 * Currently, the major/minor version numbers change when the builder code
103 * number 2 is from the tailoring data version and
104 * number 3 is the UCA version.
105 * This changes with every UCA version change, and the expected value
106 * needs to be adjusted.
107 * Same in cintltst/capitst.c.
109 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
110 UVersionInfo versionArray;
112 logln("The property tests begin : ");
113 logln("Test ctors : ");
114 col = Collator::createInstance(Locale::getEnglish(), success);
115 if (U_FAILURE(success)){
116 errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
120 StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
121 if (U_FAILURE(success)){
122 errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
127 col->getVersion(versionArray);
128 // Check for a version greater than some value rather than equality
129 // so that we need not update the expected version each time.
130 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
131 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
132 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
134 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
135 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
138 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
139 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
140 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
141 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
142 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
143 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
145 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
146 success = U_ZERO_ERROR;
148 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
149 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
150 UCharIterator abauIter, abssIter;
151 uiter_setReplaceable(&abauIter, &abau);
152 uiter_setReplaceable(&abssIter, &abss);
153 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
154 success = U_ZERO_ERROR;
157 /*start of update [Bertrand A. D. 02/10/98]*/
158 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
159 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed");
160 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed");
161 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed");
162 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
163 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
164 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
165 /*end of update [Bertrand A. D. 02/10/98]*/
168 logln("Test ctors ends.");
169 logln("testing Collator::getStrength() method ...");
170 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
171 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
174 logln("testing Collator::setStrength() method ...");
175 col->setStrength(Collator::SECONDARY);
176 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
177 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
178 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
182 logln("Get display name for the US English collation in German : ");
183 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
184 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
186 logln("Get display name for the US English collation in English : ");
187 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
188 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
190 // weiv : this test is bogus if we're running on any machine that has different default locale than English.
191 // Therefore, it is banned!
192 logln("Get display name for the US English in default locale language : ");
193 logln(Collator::getDisplayName(Locale::US, name));
194 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
197 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
199 doAssert(rcol->getRules().length() != 0, "da_DK rules does not have length 0");
202 col = Collator::createInstance(Locale::getFrench(), success);
203 if (U_FAILURE(success))
205 errln("Creating French collation failed.");
209 col->setStrength(Collator::PRIMARY);
210 logln("testing Collator::getStrength() method again ...");
211 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
212 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
214 logln("testing French Collator::setStrength() method ...");
215 col->setStrength(Collator::TERTIARY);
216 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
217 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
218 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
220 logln("Create junk collation: ");
221 Locale abcd("ab", "CD", "");
222 success = U_ZERO_ERROR;
224 junk = Collator::createInstance(abcd, success);
226 if (U_FAILURE(success))
228 errln("Junk collation creation failed, should at least return default.");
234 col = Collator::createInstance(success);
235 if (U_FAILURE(success))
237 errln("Creating default collator failed.");
242 doAssert(((RuleBasedCollator *)col)->getRules() == ((RuleBasedCollator *)junk)->getRules(),
243 "The default collation should be returned.");
244 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
245 if (U_FAILURE(success))
247 errln("Creating fr_CA collator failed.");
253 // If the default locale isn't French, the French and non-French collators
254 // should be different
255 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
256 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
258 Collator *aFrCol = frCol->clone();
259 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
260 logln("Collator property test ended.");
267 #ifdef U_USE_COLLATION_OBSOLETE_2_6
268 /* test Collator::createInstance(...version...) */
269 TestOpenVersion(*this, "");
270 TestOpenVersion(*this, "da");
271 TestOpenVersion(*this, "fr");
272 TestOpenVersion(*this, "ja");
274 /* try some bogus version */
276 versionArray[1]=0x99;
277 versionArray[2]=0xc7;
278 versionArray[3]=0xfe;
279 col=Collator::createInstance(Locale(), versionArray, success);
280 if(U_SUCCESS(success)) {
281 errln("error: ucol_openVersion(bogus version) succeeded");
288 CollationAPITest::TestRuleBasedColl()
290 RuleBasedCollator *col1, *col2, *col3, *col4;
291 UErrorCode status = U_ZERO_ERROR;
293 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
294 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
296 col1 = new RuleBasedCollator(ruleset1, status);
297 if (U_FAILURE(status)) {
298 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
302 logln("PASS: RuleBased Collator creation passed\n");
305 status = U_ZERO_ERROR;
306 col2 = new RuleBasedCollator(ruleset2, status);
307 if (U_FAILURE(status)) {
308 errln("RuleBased Collator creation failed.\n");
312 logln("PASS: RuleBased Collator creation passed\n");
315 status = U_ZERO_ERROR;
316 Locale locale("aa", "AA");
317 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
318 if (U_FAILURE(status)) {
319 errln("Fallback Collator creation failed.: %s\n");
323 logln("PASS: Fallback Collator creation passed\n");
327 status = U_ZERO_ERROR;
328 col3 = (RuleBasedCollator *)Collator::createInstance(status);
329 if (U_FAILURE(status)) {
330 errln("Default Collator creation failed.: %s\n");
334 logln("PASS: Default Collator creation passed\n");
337 UnicodeString rule1 = col1->getRules();
338 UnicodeString rule2 = col2->getRules();
339 UnicodeString rule3 = col3->getRules();
341 doAssert(rule1 != rule2, "Default collator getRules failed");
342 doAssert(rule2 != rule3, "Default collator getRules failed");
343 doAssert(rule1 != rule3, "Default collator getRules failed");
345 col4 = new RuleBasedCollator(rule2, status);
346 if (U_FAILURE(status)) {
347 errln("RuleBased Collator creation failed.\n");
351 UnicodeString rule4 = col4->getRules();
352 doAssert(rule2 == rule4, "Default collator getRules failed");
354 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
355 if (U_FAILURE(status)) {
356 errln("Cloned rule data failed.\n");
360 // free(clonedrule4); BAD API!!!!
361 uprv_free(clonedrule4);
371 CollationAPITest::TestRules()
373 RuleBasedCollator *coll;
374 UErrorCode status = U_ZERO_ERROR;
377 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
378 if (U_FAILURE(status)) {
379 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
383 logln("PASS: RuleBased Collator creation passed\n");
386 coll->getRules(UCOL_TAILORING_ONLY, rules);
387 if (rules.length() != 0x00) {
388 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
391 coll->getRules(UCOL_FULL_RULES, rules);
392 if (rules.length() < 0) {
393 errln("English full rules failed");
399 CollationAPITest::TestDecomposition() {
400 UErrorCode status = U_ZERO_ERROR;
401 Collator *en_US = Collator::createInstance("en_US", status),
402 *el_GR = Collator::createInstance("el_GR", status),
403 *vi_VN = Collator::createInstance("vi_VN", status);
405 if (U_FAILURE(status)) {
406 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
410 /* there is no reason to have canonical decomposition in en_US OR default locale */
411 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
413 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
416 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
418 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
421 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
423 errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
432 CollationAPITest::TestSafeClone() {
433 static const int CLONETEST_COLLATOR_COUNT = 3;
434 Collator *someCollators [CLONETEST_COLLATOR_COUNT];
436 UErrorCode err = U_ZERO_ERROR;
439 UnicodeString test1("abCda");
440 UnicodeString test2("abcda");
442 /* one default collator & two complex ones */
443 someCollators[0] = Collator::createInstance("en_US", err);
444 someCollators[1] = Collator::createInstance("ko", err);
445 someCollators[2] = Collator::createInstance("ja_JP", err);
447 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
448 delete someCollators[0];
449 delete someCollators[1];
450 delete someCollators[2];
454 /* change orig & clone & make sure they are independent */
456 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
458 col = someCollators[index]->safeClone();
460 errln("SafeClone of collator should not return null\n");
463 col->setStrength(Collator::TERTIARY);
464 someCollators[index]->setStrength(Collator::PRIMARY);
465 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
466 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
468 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
469 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
471 delete someCollators[index];
476 CollationAPITest::TestHashCode(/* char* par */)
478 logln("hashCode tests begin.");
479 UErrorCode success = U_ZERO_ERROR;
481 col1 = Collator::createInstance(Locale::getEnglish(), success);
482 if (U_FAILURE(success))
484 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
489 Locale dk("da", "DK", "");
490 col2 = Collator::createInstance(dk, success);
491 if (U_FAILURE(success))
493 errln("Danish collation creation failed.");
498 col3 = Collator::createInstance(Locale::getEnglish(), success);
499 if (U_FAILURE(success))
501 errln("2nd default collation creation failed.");
505 logln("Collator::hashCode() testing ...");
507 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
508 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
509 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
511 logln("hashCode tests end.");
515 UnicodeString test1("Abcda");
516 UnicodeString test2("abcda");
518 CollationKey sortk1, sortk2, sortk3;
519 UErrorCode status = U_ZERO_ERROR;
521 col3->getCollationKey(test1, sortk1, status);
522 col3->getCollationKey(test2, sortk2, status);
523 col3->getCollationKey(test2, sortk3, status);
525 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
526 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
531 //----------------------------------------------------------------------------
532 // CollationKey -- Tests the CollationKey methods
535 CollationAPITest::TestCollationKey(/* char* par */)
537 logln("testing CollationKey begins...");
539 UErrorCode success=U_ZERO_ERROR;
540 col = Collator::createInstance(Locale::getEnglish(), success);
541 if (U_FAILURE(success))
543 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
546 col->setStrength(Collator::TERTIARY);
548 CollationKey sortk1, sortk2;
549 UnicodeString test1("Abcda"), test2("abcda");
550 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
552 logln("Testing weird arguments");
553 col->getCollationKey(NULL, 0, sortk1, key1Status);
554 // key gets reset here
556 sortk1.getByteArray(length);
557 doAssert(sortk1.isBogus() == FALSE && length == 0,
558 "Empty string should return an empty collation key");
559 // bogus key returned here
560 key1Status = U_ILLEGAL_ARGUMENT_ERROR;
561 col->getCollationKey(NULL, 0, sortk1, key1Status);
562 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
563 "Error code should return bogus collation key");
565 key1Status = U_ZERO_ERROR;
566 logln("Use tertiary comparison level testing ....");
568 col->getCollationKey(test1, sortk1, key1Status);
569 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
570 == Collator::GREATER,
571 "Result should be \"Abcda\" >>> \"abcda\"");
573 CollationKey sortk3(sortk2), sortkNew, sortkEmpty;
577 doAssert((sortk1 != sortk2), "The sort keys should be different");
578 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
579 doAssert((sortk2 == sortk3), "The sort keys should be the same");
580 doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
581 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
582 doAssert((sortkNew != sortk3), "The sort keys should be different");
583 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
584 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
585 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
586 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
587 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
588 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
589 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
590 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
591 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
592 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
594 int32_t cnt1, cnt2, cnt3, cnt4;
596 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
597 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
599 const uint8_t* byteArray3 = 0;
600 byteArray3 = sortk1.getByteArray(cnt3);
602 const uint8_t* byteArray4 = 0;
603 byteArray4 = sortk2.getByteArray(cnt4);
605 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
606 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
608 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
609 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
610 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
611 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
612 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
613 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
615 logln("Equality tests : ");
616 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
617 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
618 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
619 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
620 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
621 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
627 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
628 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
629 logln("testing sortkey ends...");
631 col->setStrength(Collator::SECONDARY);
632 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
633 col->getCollationKey(test2, sortk2, key2Status))
635 "Result should be \"Abcda\" == \"abcda\"");
639 //----------------------------------------------------------------------------
640 // Tests the CollatorElementIterator class.
641 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
644 CollationAPITest::TestElemIter(/* char* par */)
646 logln("testing sortkey begins...");
648 UErrorCode success = U_ZERO_ERROR;
649 col = Collator::createInstance(Locale::getEnglish(), success);
650 if (U_FAILURE(success))
652 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
656 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
657 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
658 logln("Constructors and comparison testing....");
659 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
661 CharacterIterator *chariter=new StringCharacterIterator(testString1);
662 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
665 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
666 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
668 int32_t offset = iterator1->getOffset();
670 errln("Error in getOffset for collation element iterator\n");
673 iterator1->setOffset(6, success);
674 if (U_FAILURE(success)) {
675 errln("Error in setOffset for collation element iterator\n");
678 iterator1->setOffset(0, success);
679 int32_t order1, order2, order3;
680 doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
681 doAssert((*iterator1 != *iterator3), "The two iterators should be different");
683 doAssert((*coliter == *iterator1), "The two iterators should be the same");
684 doAssert((*coliter == *iterator2), "The two iterators should be the same");
685 doAssert((*coliter != *iterator3), "The two iterators should be different");
687 order1 = iterator1->next(success);
688 if (U_FAILURE(success))
690 errln("Somehow ran out of memory stepping through the iterator.");
694 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
695 order2 = iterator2->getOffset();
696 doAssert((order1 != order2), "The order result should not be the same");
697 order2 = iterator2->next(success);
698 if (U_FAILURE(success))
700 errln("Somehow ran out of memory stepping through the iterator.");
704 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
705 doAssert((order1 == order2), "The order result should be the same");
706 order3 = iterator3->next(success);
707 if (U_FAILURE(success))
709 errln("Somehow ran out of memory stepping through the iterator.");
713 doAssert((CollationElementIterator::primaryOrder(order1) ==
714 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
715 doAssert((CollationElementIterator::secondaryOrder(order1) ==
716 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
717 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
718 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
720 order1 = iterator1->next(success); order3 = iterator3->next(success);
721 if (U_FAILURE(success))
723 errln("Somehow ran out of memory stepping through the iterator.");
727 doAssert((CollationElementIterator::primaryOrder(order1) ==
728 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
729 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
730 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
732 order1 = iterator1->next(success);
733 order3 = iterator3->next(success);
734 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
736 doAssert((CollationElementIterator::secondaryOrder(order1) !=
737 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
739 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
741 iterator1->reset(); iterator2->reset(); iterator3->reset();
742 order1 = iterator1->next(success);
743 if (U_FAILURE(success))
745 errln("Somehow ran out of memory stepping through the iterator.");
749 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
751 order2 = iterator2->next(success);
752 if (U_FAILURE(success))
754 errln("Somehow ran out of memory stepping through the iterator.");
758 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
759 doAssert((order1 == order2), "The order result should be the same");
761 order3 = iterator3->next(success);
762 if (U_FAILURE(success))
764 errln("Somehow ran out of memory stepping through the iterator.");
768 doAssert((CollationElementIterator::primaryOrder(order1) ==
769 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
770 doAssert((CollationElementIterator::secondaryOrder(order1) ==
771 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
772 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
773 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
775 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
776 if (U_FAILURE(success))
778 errln("Somehow ran out of memory stepping through the iterator.");
782 doAssert((CollationElementIterator::primaryOrder(order1) ==
783 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
784 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
785 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
787 order1 = iterator1->next(success); order3 = iterator3->next(success);
788 if (U_FAILURE(success))
790 errln("Somehow ran out of memory stepping through the iterator.");
794 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
796 doAssert((CollationElementIterator::secondaryOrder(order1) !=
797 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
799 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
800 doAssert((*iterator2 != *iterator3), "The iterators should be different");
804 success=U_UNSUPPORTED_ERROR;
805 Collator *colerror=NULL;
806 colerror=Collator::createInstance(Locale::getEnglish(), success);
807 if (colerror != 0 || success == U_ZERO_ERROR){
808 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
810 int32_t position=coliter->previous(success);
811 if(position != CollationElementIterator::NULLORDER){
812 errln((UnicodeString)"Expected NULLORDER got" + position);
815 coliter->setText(*chariter, success);
816 if(!U_FAILURE(success)){
817 errln("Expeceted error");
819 iterator1->setText((UnicodeString)"hello there", success);
820 if(!U_FAILURE(success)){
821 errln("Expeceted error");
833 logln("testing CollationElementIterator ends...");
836 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
838 CollationAPITest::TestOperators(/* char* par */)
840 UErrorCode success = U_ZERO_ERROR;
841 UnicodeString ruleset1("< a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
842 UnicodeString ruleset2("< a, A < b, B < c, C < d, D, e, E");
843 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
844 if (U_FAILURE(success)) {
845 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
848 success = U_ZERO_ERROR;
849 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
850 if (U_FAILURE(success)) {
851 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
854 logln("The operator tests begin : ");
855 logln("testing operator==, operator!=, clone methods ...");
856 doAssert((*col1 != *col2), "The two different table collations compared equal");
858 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
860 success = U_ZERO_ERROR;
861 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
862 if (U_FAILURE(success)) {
863 errln("Default collation creation failed.");
866 doAssert((*col1 != *col3), "The two different table collations compared equal");
867 Collator* col4 = col1->clone();
868 Collator* col5 = col3->clone();
869 doAssert((*col1 == *col4), "Cloned collation objects not equal");
870 doAssert((*col3 != *col4), "Two different table collations compared equal");
871 doAssert((*col3 == *col5), "Cloned collation objects not equal");
872 doAssert((*col4 != *col5), "Two cloned collations compared equal");
874 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
875 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
876 if (U_FAILURE(success)) {
877 errln("Creating default collation with rules failed.");
880 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
882 success = U_ZERO_ERROR;
883 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
884 if (U_FAILURE(success)) {
885 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
888 success = U_ZERO_ERROR;
889 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
890 if (U_FAILURE(success)) {
891 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
894 success = U_ZERO_ERROR;
895 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
896 if (U_FAILURE(success)) {
897 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
900 // doAssert((*col7 == *col8), "The two equal table collations compared different");
901 doAssert((*col7 != *col9), "The two different table collations compared equal");
902 doAssert((*col8 != *col9), "The two different table collations compared equal");
904 logln("operator tests ended.");
916 // test clone and copy
918 CollationAPITest::TestDuplicate(/* char* par */)
920 UErrorCode status = U_ZERO_ERROR;
921 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
922 if (U_FAILURE(status)) {
923 logln("Default collator creation failed.");
926 Collator *col2 = col1->clone();
927 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
928 UnicodeString *ruleset = new UnicodeString("< a, A < b, B < c, C < d, D, e, E");
929 RuleBasedCollator *col3 = new RuleBasedCollator(*ruleset, status);
930 doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
931 *col3 = *((RuleBasedCollator*)col1);
932 doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
934 if (U_FAILURE(status)) {
935 logln("Collation tailoring failed.");
939 UCollationResult res;
940 UnicodeString first((UChar)0x0061);
941 UnicodeString second((UChar)0x0062);
942 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
947 // Try using the cloned collators after deleting the original data
948 res = col2->compare(first, second, status);
949 if(res != UCOL_LESS) {
950 errln("a should be less then b after tailoring");
952 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
953 errln(UnicodeString("English rule difference. ")
954 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
956 res = col3->compare(first, second, status);
957 if(res != UCOL_LESS) {
958 errln("a should be less then b after tailoring");
960 if (col3->getRules() != copiedEnglishRules) {
961 errln(UnicodeString("English rule difference. ")
962 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
970 CollationAPITest::TestCompare(/* char* par */)
972 logln("The compare tests begin : ");
974 UErrorCode success = U_ZERO_ERROR;
975 col = Collator::createInstance(Locale::getEnglish(), success);
976 if (U_FAILURE(success)) {
977 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
980 UnicodeString test1("Abcda"), test2("abcda");
981 logln("Use tertiary comparison level testing ....");
983 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
984 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
985 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
987 col->setStrength(Collator::SECONDARY);
988 logln("Use secondary comparison level testing ....");
990 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
991 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
992 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
994 col->setStrength(Collator::PRIMARY);
995 logln("Use primary comparison level testing ....");
997 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
998 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
999 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
1001 // Test different APIs
1002 const UChar* t1 = test1.getBuffer();
1003 int32_t t1Len = test1.length();
1004 const UChar* t2 = test2.getBuffer();
1005 int32_t t2Len = test2.length();
1007 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
1008 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
1009 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
1010 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
1011 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
1012 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
1014 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
1015 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
1016 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
1017 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
1018 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
1019 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
1020 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
1024 logln("The compare tests end.");
1029 CollationAPITest::TestGetAll(/* char* par */)
1031 int32_t count1, count2;
1032 UErrorCode status = U_ZERO_ERROR;
1034 logln("Trying Collator::getAvailableLocales(int&)");
1036 const Locale* list = Collator::getAvailableLocales(count1);
1037 for (int32_t i = 0; i < count1; ++i) {
1038 UnicodeString dispName;
1039 logln(UnicodeString("Locale name: ")
1040 + UnicodeString(list[i].getName())
1041 + UnicodeString(" , the display name is : ")
1042 + UnicodeString(list[i].getDisplayName(dispName)));
1045 if (count1 == 0 || list == NULL) {
1046 dataerrln("getAvailableLocales(int&) returned an empty list");
1049 logln("Trying Collator::getAvailableLocales()");
1050 StringEnumeration* localeEnum = Collator::getAvailableLocales();
1051 const UnicodeString* locStr;
1052 const char *locCStr;
1055 if (localeEnum == NULL) {
1056 dataerrln("getAvailableLocales() returned NULL");
1060 while ((locStr = localeEnum->snext(status)) != NULL)
1062 logln(UnicodeString("Locale name is: ") + *locStr);
1065 if (count1 != count2) {
1066 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1069 logln("Trying Collator::getAvailableLocales() clone");
1071 StringEnumeration* localeEnum2 = localeEnum->clone();
1072 localeEnum2->reset(status);
1073 while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1075 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1078 if (count1 != count2) {
1079 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1081 if (localeEnum->count(status) != count1) {
1082 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1088 void CollationAPITest::TestSortKey()
1090 UErrorCode status = U_ZERO_ERROR;
1092 this is supposed to open default date format, but later on it treats
1093 it like it is "en_US"
1094 - very bad if you try to run the tests on machine where default
1095 locale is NOT "en_US"
1097 Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1098 if (U_FAILURE(status)) {
1099 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1103 if (col->getStrength() != Collator::TERTIARY)
1105 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1108 /* Need to use identical strength */
1109 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1111 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1112 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1113 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1115 uint8_t sortkey1[64];
1116 uint8_t sortkey2[64];
1117 uint8_t sortkey3[64];
1119 logln("Use tertiary comparison level testing ....\n");
1122 col->getCollationKey(test1, u_strlen(test1), key1, status);
1125 col->getCollationKey(test2, u_strlen(test2), key2, status);
1128 col->getCollationKey(test3, u_strlen(test3), key3, status);
1130 doAssert(key1.compareTo(key2) == Collator::GREATER,
1131 "Result should be \"Abcda\" > \"abcda\"");
1132 doAssert(key2.compareTo(key1) == Collator::LESS,
1133 "Result should be \"abcda\" < \"Abcda\"");
1134 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1135 "Result should be \"abcda\" == \"abcda\"");
1137 // Clone the key2 sortkey for later.
1138 int32_t keylength = 0;
1139 const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1140 LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1141 memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1143 col->getSortKey(test1, sortkey1, 64);
1144 col->getSortKey(test2, sortkey2, 64);
1145 col->getSortKey(test3, sortkey3, 64);
1147 const uint8_t *tempkey = key1.getByteArray(keylength);
1148 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1149 "Test1 string should have the same collation key and sort key");
1150 tempkey = key2.getByteArray(keylength);
1151 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1152 "Test2 string should have the same collation key and sort key");
1153 tempkey = key3.getByteArray(keylength);
1154 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1155 "Test3 string should have the same collation key and sort key");
1157 col->getSortKey(test1, 5, sortkey1, 64);
1158 col->getSortKey(test2, 5, sortkey2, 64);
1159 col->getSortKey(test3, 5, sortkey3, 64);
1161 tempkey = key1.getByteArray(keylength);
1162 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1163 "Test1 string should have the same collation key and sort key");
1164 tempkey = key2.getByteArray(keylength);
1165 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1166 "Test2 string should have the same collation key and sort key");
1167 tempkey = key3.getByteArray(keylength);
1168 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1169 "Test3 string should have the same collation key and sort key");
1171 UnicodeString strtest1(test1);
1172 col->getSortKey(strtest1, sortkey1, 64);
1173 UnicodeString strtest2(test2);
1174 col->getSortKey(strtest2, sortkey2, 64);
1175 UnicodeString strtest3(test3);
1176 col->getSortKey(strtest3, sortkey3, 64);
1178 tempkey = key1.getByteArray(keylength);
1179 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1180 "Test1 string should have the same collation key and sort key");
1181 tempkey = key2.getByteArray(keylength);
1182 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1183 "Test2 string should have the same collation key and sort key");
1184 tempkey = key3.getByteArray(keylength);
1185 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1186 "Test3 string should have the same collation key and sort key");
1188 logln("Use secondary comparision level testing ...\n");
1189 col->setStrength(Collator::SECONDARY);
1191 col->getCollationKey(test1, u_strlen(test1), key1, status);
1192 col->getCollationKey(test2, u_strlen(test2), key2, status);
1193 col->getCollationKey(test3, u_strlen(test3), key3, status);
1195 doAssert(key1.compareTo(key2) == Collator::EQUAL,
1196 "Result should be \"Abcda\" == \"abcda\"");
1197 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1198 "Result should be \"abcda\" == \"abcda\"");
1200 tempkey = key2.getByteArray(keylength);
1201 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1202 "Binary format for 'abcda' sortkey different for secondary strength!");
1204 col->getSortKey(test1, sortkey1, 64);
1205 col->getSortKey(test2, sortkey2, 64);
1206 col->getSortKey(test3, sortkey3, 64);
1208 tempkey = key1.getByteArray(keylength);
1209 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1210 "Test1 string should have the same collation key and sort key");
1211 tempkey = key2.getByteArray(keylength);
1212 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1213 "Test2 string should have the same collation key and sort key");
1214 tempkey = key3.getByteArray(keylength);
1215 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1216 "Test3 string should have the same collation key and sort key");
1218 col->getSortKey(test1, 5, sortkey1, 64);
1219 col->getSortKey(test2, 5, sortkey2, 64);
1220 col->getSortKey(test3, 5, sortkey3, 64);
1222 tempkey = key1.getByteArray(keylength);
1223 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1224 "Test1 string should have the same collation key and sort key");
1225 tempkey = key2.getByteArray(keylength);
1226 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1227 "Test2 string should have the same collation key and sort key");
1228 tempkey = key3.getByteArray(keylength);
1229 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1230 "Test3 string should have the same collation key and sort key");
1232 col->getSortKey(strtest1, sortkey1, 64);
1233 col->getSortKey(strtest2, sortkey2, 64);
1234 col->getSortKey(strtest3, sortkey3, 64);
1236 tempkey = key1.getByteArray(keylength);
1237 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1238 "Test1 string should have the same collation key and sort key");
1239 tempkey = key2.getByteArray(keylength);
1240 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1241 "Test2 string should have the same collation key and sort key");
1242 tempkey = key3.getByteArray(keylength);
1243 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1244 "Test3 string should have the same collation key and sort key");
1246 logln("testing sortkey ends...");
1250 void CollationAPITest::TestSortKeyOverflow() {
1251 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1252 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1253 if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1256 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1257 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi.
1258 // The sort key should be 6 bytes:
1259 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1260 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1261 uint8_t sortKey[12];
1262 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, LENGTHOF(sortKey));
1263 uint8_t sortKey2[12];
1264 for (int32_t capacity = 0; capacity < length; ++capacity) {
1265 uprv_memset(sortKey2, 2, LENGTHOF(sortKey2));
1266 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1267 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1268 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1269 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1270 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1274 // Now try to break getCollationKey().
1275 // Internally, it always starts with a large stack buffer.
1276 // Since we cannot control the initial capacity, we throw an increasing number
1277 // of characters at it, with the problematic part at the end.
1278 const int32_t longCapacity = 2000;
1279 // Each 'a' in the prefix should result in one primary sort key byte.
1280 // For i_and_phi we expect 6 bytes, then the NUL terminator.
1281 const int32_t maxPrefixLength = longCapacity - 6 - 1;
1282 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1283 UnicodeString s(FALSE, i_and_phi, 2);
1284 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1285 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1286 CollationKey collKey;
1287 col->getCollationKey(s, collKey, errorCode);
1288 int32_t collKeyLength;
1289 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1290 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1291 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1294 // Insert an 'a' to match ++prefixLength.
1295 s.insert(prefixLength, (UChar)0x61);
1299 void CollationAPITest::TestMaxExpansion()
1301 UErrorCode status = U_ZERO_ERROR;
1303 UChar32 unassigned = 0xEFFFD;
1304 uint32_t sorder = 0;
1305 uint32_t temporder = 0;
1307 UnicodeString rule("&a < ab < c/aba < d < z < ch");
1308 RuleBasedCollator coll(rule, status);
1309 if(U_FAILURE(status)) {
1310 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1313 UnicodeString str(ch);
1314 CollationElementIterator *iter =
1315 coll.createCollationElementIterator(str);
1317 while (ch < 0xFFFF && U_SUCCESS(status)) {
1324 str.setCharAt(0, ch);
1325 iter->setText(str, status);
1326 order = iter->previous(status);
1328 /* thai management */
1330 order = iter->previous(status);
1332 while (U_SUCCESS(status) && iter->previous(status) != UCOL_NULLORDER) {
1336 size = coll.getMaxExpansion(order);
1337 if (U_FAILURE(status) || size < count) {
1338 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1343 /* testing for exact max expansion */
1348 str.setCharAt(0, ch);
1349 iter->setText(str, status);
1350 order = iter->previous(status);
1351 size = coll.getMaxExpansion(order);
1352 if (U_FAILURE(status) || size != 1) {
1353 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1361 iter->setText(str, status);
1362 temporder = iter->previous(status);
1363 size = coll.getMaxExpansion(temporder);
1364 if (U_FAILURE(status) || size != 3) {
1365 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1366 ch, temporder, size, 3);
1371 iter->setText(str, status);
1372 temporder = iter->previous(status);
1373 size = coll.getMaxExpansion(temporder);
1374 if (U_FAILURE(status) || size != 1) {
1375 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1376 ch, temporder, size, 1);
1379 str.setTo(unassigned);
1380 iter->setText(str, status);
1381 sorder = iter->previous(status);
1382 size = coll.getMaxExpansion(sorder);
1383 if (U_FAILURE(status) || size != 2) {
1384 errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1391 iter->setText(str, status);
1392 temporder = iter->previous(status);
1393 size = coll.getMaxExpansion(temporder);
1394 if (U_FAILURE(status) || size > 3) {
1395 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1401 /* testing special jamo &a<\u1160 */
1402 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1404 RuleBasedCollator jamocoll(rule, status);
1405 iter = jamocoll.createCollationElementIterator(str);
1406 temporder = iter->previous(status);
1407 size = iter->getMaxExpansion(temporder);
1408 if (U_FAILURE(status) || size != 6) {
1409 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1416 void CollationAPITest::TestDisplayName()
1418 UErrorCode error = U_ZERO_ERROR;
1419 Collator *coll = Collator::createInstance("en_US", error);
1420 if (U_FAILURE(error)) {
1421 errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1425 UnicodeString result;
1426 coll->getDisplayName(Locale::getCanadaFrench(), result);
1427 Locale::getCanadaFrench().getDisplayName(name);
1428 if (result.compare(name)) {
1429 errln("Failure getting the correct name for locale en_US");
1432 coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1433 Locale::getSimplifiedChinese().getDisplayName(name);
1434 if (result.compare(name)) {
1435 errln("Failure getting the correct name for locale zh_SG");
1440 void CollationAPITest::TestAttribute()
1442 UErrorCode error = U_ZERO_ERROR;
1443 Collator *coll = Collator::createInstance(error);
1445 if (U_FAILURE(error)) {
1446 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1450 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1451 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1453 errln("Setting and retrieving of the french collation failed");
1456 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1457 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1459 errln("Setting and retrieving of the french collation failed");
1462 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1463 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1465 errln("Setting and retrieving of the alternate handling failed");
1468 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1469 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1471 errln("Setting and retrieving of the alternate handling failed");
1474 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1475 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1477 errln("Setting and retrieving of the case first attribute failed");
1480 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1481 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1483 errln("Setting and retrieving of the case first attribute failed");
1486 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1487 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1489 errln("Setting and retrieving of the case level attribute failed");
1492 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1493 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1495 errln("Setting and retrieving of the case level attribute failed");
1498 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1499 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1501 errln("Setting and retrieving of the normalization on/off attribute failed");
1504 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1505 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1507 errln("Setting and retrieving of the normalization on/off attribute failed");
1510 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1511 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1513 errln("Setting and retrieving of the collation strength failed");
1516 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1517 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1519 errln("Setting and retrieving of the collation strength failed");
1522 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1523 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1525 errln("Setting and retrieving of the collation strength failed");
1528 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1529 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1531 errln("Setting and retrieving of the collation strength failed");
1534 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1535 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1537 errln("Setting and retrieving of the collation strength failed");
1543 void CollationAPITest::TestVariableTopSetting() {
1544 UErrorCode status = U_ZERO_ERROR;
1546 UChar vt[256] = { 0 };
1548 Collator *coll = Collator::createInstance(status);
1549 if(U_FAILURE(status)) {
1551 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1555 uint32_t oldVarTop = coll->getVariableTop(status);
1559 uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1561 if((newVarTop & 0xFFFF0000) != (coll->getVariableTop(status) & 0xFFFF0000)) {
1562 errln("Didn't set vartop properly\n");
1565 coll->setVariableTop(oldVarTop, status);
1567 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1569 if((newVarTop & 0xFFFF0000) != (newerVarTop & 0xFFFF0000)) {
1570 errln("Didn't set vartop properly from UnicodeString!\n");
1577 void CollationAPITest::TestGetLocale() {
1578 UErrorCode status = U_ZERO_ERROR;
1579 const char *rules = "&a<x<y<z";
1580 UChar rlz[256] = {0};
1582 Collator *coll = NULL;
1587 static const struct {
1588 const char* requestedLocale;
1589 const char* validLocale;
1590 const char* actualLocale;
1592 { "sr_YU", "sr_YU", "root" },
1593 { "sh_YU", "sh_YU", "sh" },
1594 { "en_US_CALIFORNIA", "en_US", "root" },
1595 { "fr_FR_NONEXISTANT", "fr_FR", "fr" }
1598 u_unescape(rules, rlz, 256);
1600 /* test opening collators for different locales */
1601 for(i = 0; i<(int32_t)(sizeof(testStruct)/sizeof(testStruct[0])); i++) {
1602 status = U_ZERO_ERROR;
1603 coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1604 if(U_FAILURE(status)) {
1605 log("Failed to open collator for %s with %s\n", testStruct[i].requestedLocale, u_errorName(status));
1609 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1610 if(locale != testStruct[i].requestedLocale) {
1611 log("[Coll %s]: Error in requested locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].requestedLocale, locale.getName());
1613 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1614 if(locale != testStruct[i].validLocale) {
1615 log("[Coll %s]: Error in valid locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1617 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1618 if(locale != testStruct[i].actualLocale) {
1619 log("[Coll %s]: Error in actual locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1624 /* completely non-existant locale for collator should get a default collator */
1626 Collator *defaultColl = Collator::createInstance((const Locale)NULL, status);
1627 coll = Collator::createInstance("blahaha", status);
1628 if(U_FAILURE(status)) {
1629 log("Failed to open collator with %s\n", u_errorName(status));
1634 if(coll->getLocale(ULOC_REQUESTED_LOCALE, status) != "blahaha") {
1635 log("Nonexisting locale didn't preserve the requested locale\n");
1637 if(coll->getLocale(ULOC_VALID_LOCALE, status) !=
1638 defaultColl->getLocale(ULOC_VALID_LOCALE, status)) {
1639 log("Valid locale for nonexisting locale locale collator differs "
1640 "from valid locale for default collator\n");
1642 if(coll->getLocale(ULOC_ACTUAL_LOCALE, status) !=
1643 defaultColl->getLocale(ULOC_ACTUAL_LOCALE, status)) {
1644 log("Actual locale for nonexisting locale locale collator differs "
1645 "from actual locale for default collator\n");
1653 /* collator instantiated from rules should have all three locales NULL */
1654 coll = new RuleBasedCollator(rlz, status);
1655 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1656 if(!locale.isBogus()) {
1657 log("For collator instantiated from rules, requested locale %s is not bogus\n", locale.getName());
1659 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1660 if(!locale.isBogus()) {
1661 log("For collator instantiated from rules, valid locale %s is not bogus\n", locale.getName());
1663 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1664 if(!locale.isBogus()) {
1665 log("For collator instantiated from rules, actual locale %s is not bogus\n", locale.getName());
1671 const char *original;
1678 static int U_CALLCONV
1679 compare_teststruct(const void *string1, const void *string2) {
1680 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1685 void CollationAPITest::TestBounds(void) {
1686 UErrorCode status = U_ZERO_ERROR;
1688 Collator *coll = Collator::createInstance(Locale("sh"), status);
1689 if(U_FAILURE(status)) {
1691 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1695 uint8_t sortkey[512], lower[512], upper[512];
1698 static const char * const test[] = {
1702 "j\\u00F6hn sm\\u00EFth",
1703 "J\\u00F6hn Sm\\u00EFth",
1704 "J\\u00D6HN SM\\u00CFTH",
1709 struct teststruct tests[] = {
1710 {"\\u010CAKI MIHALJ", {0}},
1711 {"\\u010CAKI MIHALJ", {0}},
1712 {"\\u010CAKI PIRO\\u0160KA", {0}},
1713 {"\\u010CABAI ANDRIJA", {0}},
1714 {"\\u010CABAI LAJO\\u0160", {0}},
1715 {"\\u010CABAI MARIJA", {0}},
1716 {"\\u010CABAI STEVAN", {0}},
1717 {"\\u010CABAI STEVAN", {0}},
1718 {"\\u010CABARKAPA BRANKO", {0}},
1719 {"\\u010CABARKAPA MILENKO", {0}},
1720 {"\\u010CABARKAPA MIROSLAV", {0}},
1721 {"\\u010CABARKAPA SIMO", {0}},
1722 {"\\u010CABARKAPA STANKO", {0}},
1723 {"\\u010CABARKAPA TAMARA", {0}},
1724 {"\\u010CABARKAPA TOMA\\u0160", {0}},
1725 {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1726 {"\\u010CABDARI\\u0106 ZORICA", {0}},
1727 {"\\u010CABI NANDOR", {0}},
1728 {"\\u010CABOVI\\u0106 MILAN", {0}},
1729 {"\\u010CABRADI AGNEZIJA", {0}},
1730 {"\\u010CABRADI IVAN", {0}},
1731 {"\\u010CABRADI JELENA", {0}},
1732 {"\\u010CABRADI LJUBICA", {0}},
1733 {"\\u010CABRADI STEVAN", {0}},
1734 {"\\u010CABRDA MARTIN", {0}},
1735 {"\\u010CABRILO BOGDAN", {0}},
1736 {"\\u010CABRILO BRANISLAV", {0}},
1737 {"\\u010CABRILO LAZAR", {0}},
1738 {"\\u010CABRILO LJUBICA", {0}},
1739 {"\\u010CABRILO SPASOJA", {0}},
1740 {"\\u010CADE\\u0160 ZDENKA", {0}},
1741 {"\\u010CADESKI BLAGOJE", {0}},
1742 {"\\u010CADOVSKI VLADIMIR", {0}},
1743 {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1744 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1745 {"\\u010CAJA VANKA", {0}},
1746 {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1747 {"\\u010CAJI\\u0106 BORISLAV", {0}},
1748 {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1749 {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1750 {"\\u010CAKAN EUGEN", {0}},
1751 {"\\u010CAKAN EVGENIJE", {0}},
1752 {"\\u010CAKAN IVAN", {0}},
1753 {"\\u010CAKAN JULIJAN", {0}},
1754 {"\\u010CAKAN MIHAJLO", {0}},
1755 {"\\u010CAKAN STEVAN", {0}},
1756 {"\\u010CAKAN VLADIMIR", {0}},
1757 {"\\u010CAKAN VLADIMIR", {0}},
1758 {"\\u010CAKAN VLADIMIR", {0}},
1759 {"\\u010CAKARA ANA", {0}},
1760 {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1761 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1762 {"\\u010CAKI \\u0160ANDOR", {0}},
1763 {"\\u010CAKI AMALIJA", {0}},
1764 {"\\u010CAKI ANDRA\\u0160", {0}},
1765 {"\\u010CAKI LADISLAV", {0}},
1766 {"\\u010CAKI LAJO\\u0160", {0}},
1767 {"\\u010CAKI LASLO", {0}}
1772 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1773 int32_t arraySize = sizeof(tests)/sizeof(tests[0]);
1775 (void)lowerSize; // Suppress unused variable warnings.
1778 for(i = 0; i<arraySize; i++) {
1779 buffSize = u_unescape(tests[i].original, buffer, 512);
1780 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1783 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1785 for(i = 0; i < arraySize-1; i++) {
1786 for(j = i+1; j < arraySize; j++) {
1787 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1788 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1789 for(k = i; k <= j; k++) {
1790 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1791 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1793 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1794 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1801 for(i = 0; i<(int32_t)(sizeof(test)/sizeof(test[0])); i++) {
1802 buffSize = u_unescape(test[i], buffer, 512);
1803 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1804 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1805 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1806 for(j = i+1; j<(int32_t)(sizeof(test)/sizeof(test[0])); j++) {
1807 buffSize = u_unescape(test[j], buffer, 512);
1808 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1809 if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1810 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1812 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1813 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1821 void CollationAPITest::TestGetTailoredSet()
1825 const char *tests[20];
1828 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1829 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1832 uint32_t i = 0, j = 0;
1833 UErrorCode status = U_ZERO_ERROR;
1835 RuleBasedCollator *coll = NULL;
1837 UnicodeSet *set = NULL;
1839 for(i = 0; i < sizeof(setTest)/sizeof(setTest[0]); i++) {
1840 buff = UnicodeString(setTest[i].rules, "").unescape();
1841 coll = new RuleBasedCollator(buff, status);
1842 if(U_SUCCESS(status)) {
1843 set = coll->getTailoredSet(status);
1844 if(set->size() != setTest[i].testsize) {
1845 errln("Tailored set size different (%d) than expected (%d)", set->size(), setTest[i].testsize);
1847 for(j = 0; j < (uint32_t)setTest[i].testsize; j++) {
1848 buff = UnicodeString(setTest[i].tests[j], "").unescape();
1849 if(!set->contains(buff)) {
1850 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1855 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1861 void CollationAPITest::TestUClassID()
1863 char id = *((char *)RuleBasedCollator::getStaticClassID());
1865 errln("Static class id for RuleBasedCollator should be 0");
1867 UErrorCode status = U_ZERO_ERROR;
1868 RuleBasedCollator *coll
1869 = (RuleBasedCollator *)Collator::createInstance(status);
1870 if(U_FAILURE(status)) {
1872 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1875 id = *((char *)coll->getDynamicClassID());
1877 errln("Dynamic class id for RuleBasedCollator should be 0");
1879 id = *((char *)CollationKey::getStaticClassID());
1881 errln("Static class id for CollationKey should be 0");
1883 CollationKey *key = new CollationKey();
1884 id = *((char *)key->getDynamicClassID());
1886 errln("Dynamic class id for CollationKey should be 0");
1888 id = *((char *)CollationElementIterator::getStaticClassID());
1890 errln("Static class id for CollationElementIterator should be 0");
1892 UnicodeString str("testing");
1893 CollationElementIterator *iter = coll->createCollationElementIterator(str);
1894 id = *((char *)iter->getDynamicClassID());
1896 errln("Dynamic class id for CollationElementIterator should be 0");
1903 class TestCollator : public Collator
1906 virtual Collator* clone(void) const;
1908 using Collator::compare;
1910 virtual UCollationResult compare(const UnicodeString& source,
1911 const UnicodeString& target,
1912 UErrorCode& status) const;
1913 virtual UCollationResult compare(const UnicodeString& source,
1914 const UnicodeString& target,
1916 UErrorCode& status) const;
1917 virtual UCollationResult compare(const UChar* source,
1918 int32_t sourceLength,
1919 const UChar* target,
1920 int32_t targetLength,
1921 UErrorCode& status) const;
1922 virtual CollationKey& getCollationKey(const UnicodeString& source,
1924 UErrorCode& status) const;
1925 virtual CollationKey& getCollationKey(const UChar*source,
1926 int32_t sourceLength,
1928 UErrorCode& status) const;
1929 virtual int32_t hashCode(void) const;
1930 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
1931 virtual ECollationStrength getStrength(void) const;
1932 virtual void setStrength(ECollationStrength newStrength);
1933 virtual UClassID getDynamicClassID(void) const;
1934 virtual void getVersion(UVersionInfo info) const;
1935 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
1936 UErrorCode &status);
1937 virtual UColAttributeValue getAttribute(UColAttribute attr,
1938 UErrorCode &status) const;
1939 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
1940 UErrorCode &status);
1941 virtual uint32_t setVariableTop(const UnicodeString &varTop,
1942 UErrorCode &status);
1943 virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
1944 virtual uint32_t getVariableTop(UErrorCode &status) const;
1945 virtual int32_t getSortKey(const UnicodeString& source,
1947 int32_t resultLength) const;
1948 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
1949 uint8_t*result, int32_t resultLength) const;
1950 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
1951 virtual UBool operator==(const Collator& other) const;
1952 // Collator::operator!= calls !Collator::operator== which works for all subclasses.
1953 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1954 TestCollator() : Collator() {};
1955 TestCollator(UCollationStrength collationStrength,
1956 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
1959 inline UBool TestCollator::operator==(const Collator& other) const {
1960 // TestCollator has no fields, so we test for identity.
1961 return this == &other;
1963 // Normally, subclasses should do something like the following:
1964 // if (this == &other) { return TRUE; }
1965 // if (!Collator::operator==(other)) { return FALSE; } // not the same class
1967 // const TestCollator &o = (const TestCollator&)other;
1968 // (compare this vs. o's subclass fields)
1971 Collator* TestCollator::clone() const
1973 return new TestCollator();
1976 UCollationResult TestCollator::compare(const UnicodeString& source,
1977 const UnicodeString& target,
1978 UErrorCode& status) const
1980 if(U_SUCCESS(status)) {
1981 return UCollationResult(source.compare(target));
1987 UCollationResult TestCollator::compare(const UnicodeString& source,
1988 const UnicodeString& target,
1990 UErrorCode& status) const
1992 if(U_SUCCESS(status)) {
1993 return UCollationResult(source.compare(0, length, target));
1999 UCollationResult TestCollator::compare(const UChar* source,
2000 int32_t sourceLength,
2001 const UChar* target,
2002 int32_t targetLength,
2003 UErrorCode& status) const
2005 UnicodeString s(source, sourceLength);
2006 UnicodeString t(target, targetLength);
2007 return compare(s, t, status);
2010 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2012 UErrorCode& status) const
2016 length = source.extract(temp, length, NULL, status);
2018 CollationKey tempkey((uint8_t*)temp, length);
2023 CollationKey& TestCollator::getCollationKey(const UChar*source,
2024 int32_t sourceLength,
2026 UErrorCode& status) const
2028 //s tack allocation used since collationkey does not keep the unicodestring
2029 UnicodeString str(source, sourceLength);
2030 return getCollationKey(str, key, status);
2033 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2034 int32_t resultLength) const
2036 UErrorCode status = U_ZERO_ERROR;
2037 int32_t length = source.extract((char *)result, resultLength, NULL,
2043 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2044 uint8_t*result, int32_t resultLength) const
2046 UnicodeString str(source, sourceLength);
2047 return getSortKey(str, result, resultLength);
2050 int32_t TestCollator::hashCode() const
2055 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2057 // api not used, this is to make the compiler happy
2058 if (U_FAILURE(status)) {
2064 Collator::ECollationStrength TestCollator::getStrength() const
2069 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2071 // api not used, this is to make the compiler happy
2075 UClassID TestCollator::getDynamicClassID(void) const
2080 void TestCollator::getVersion(UVersionInfo info) const
2082 // api not used, this is to make the compiler happy
2083 memset(info, 0, U_MAX_VERSION_LENGTH);
2086 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2087 UErrorCode & /*status*/)
2091 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2092 UErrorCode &status) const
2094 // api not used, this is to make the compiler happy
2095 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2098 return UCOL_DEFAULT;
2101 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2104 // api not used, this is to make the compiler happy
2105 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2106 status = U_ILLEGAL_ARGUMENT_ERROR;
2111 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2114 // api not used, this is to make the compiler happy
2115 if (U_SUCCESS(status) && varTop.length() == 0) {
2116 status = U_ILLEGAL_ARGUMENT_ERROR;
2121 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2123 // api not used, this is to make the compiler happy
2124 if (U_SUCCESS(status) && varTop == 0) {
2125 status = U_ILLEGAL_ARGUMENT_ERROR;
2129 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2132 // api not used, this is to make the compiler happy
2133 if (U_SUCCESS(status)) {
2136 return (uint32_t)(0xFFFFFFFFu);
2139 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2141 return Collator::getTailoredSet(status);
2144 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2146 Collator::setLocales(requestedLocale, validLocale, actualLocale);
2150 void CollationAPITest::TestSubclass()
2154 doAssert(col1 != col2, "2 instances of TestCollator should be different");
2155 if (col1.hashCode() != col2.hashCode()) {
2156 errln("Every TestCollator has the same hashcode");
2158 UnicodeString abc("abc", 3);
2159 UnicodeString bcd("bcd", 3);
2160 if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2161 errln("TestCollator compare should be the same as the default "
2162 "string comparison");
2165 UErrorCode status = U_ZERO_ERROR;
2166 col1.getCollationKey(abc, key, status);
2168 const char* bytes = (const char *)key.getByteArray(length);
2169 UnicodeString keyarray(bytes, length, NULL, status);
2170 if (abc != keyarray) {
2171 errln("TestCollator collationkey API is returning wrong values");
2174 UnicodeSet expectedset(0, 0x10FFFF);
2175 UnicodeSet *defaultset = col1.getTailoredSet(status);
2176 if (!defaultset->containsAll(expectedset)
2177 || !expectedset.containsAll(*defaultset)) {
2178 errln("Error: expected default tailoring to be 0 to 0x10ffff");
2182 // use base class implementation
2183 Locale loc1 = Locale::getGermany();
2184 Locale loc2 = Locale::getFrance();
2185 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2187 UnicodeString displayName;
2188 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2190 TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2191 UnicodeString a("a");
2192 UnicodeString b("b");
2193 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2194 if(col1.compare(a, b) != result) {
2195 errln("Collator doesn't give default result");
2197 if(col1.compare(a, b, 1) != result) {
2198 errln("Collator doesn't give default result");
2200 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2201 errln("Collator doesn't give default result");
2205 void CollationAPITest::TestNULLCharTailoring()
2207 UErrorCode status = U_ZERO_ERROR;
2208 UChar buf[256] = {0};
2209 int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2210 UnicodeString first((UChar)0x0061);
2211 UnicodeString second((UChar)0);
2212 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2213 if(U_FAILURE(status)) {
2215 errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2218 UCollationResult res = coll->compare(first, second, status);
2219 if(res != UCOL_LESS) {
2220 errln("a should be less then NULL after tailoring");
2225 void CollationAPITest::TestClone() {
2227 UErrorCode status = U_ZERO_ERROR;
2228 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2230 if (U_FAILURE(status)) {
2231 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2235 c0->setStrength(Collator::TERTIARY);
2236 dump("c0", c0, status);
2239 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2240 c1->setStrength(Collator::TERTIARY);
2241 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2242 if(val == UCOL_LOWER_FIRST){
2243 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2245 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2247 dump("c0", c0, status);
2248 dump("c1", c1, status);
2251 RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2252 val = c2->getAttribute(UCOL_CASE_FIRST, status);
2253 if(val == UCOL_LOWER_FIRST){
2254 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2256 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2258 if(U_FAILURE(status)){
2259 errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2262 dump("c0", c0, status);
2263 dump("c1", c1, status);
2264 dump("c2", c2, status);
2266 errln("The cloned objects refer to same data");
2273 void CollationAPITest::TestIterNumeric() {
2274 // Regression test for ticket #9915.
2275 // The collation code sometimes masked the continuation marker away
2276 // but later tested the result for isContinuation().
2277 // This test case failed because the third bytes of the computed numeric-collation primaries
2278 // were permutated with the script reordering table.
2279 // It should have been possible to reproduce this with the root collator
2280 // and characters with appropriate 3-byte primary weights.
2281 // The effectiveness of this test depends completely on the collation elements
2282 // and on the implementation code.
2283 IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2284 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2285 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2288 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2289 UCharIterator iter40, iter72;
2290 uiter_setUTF8(&iter40, "\x34\x30", 2);
2291 uiter_setUTF8(&iter72, "\x37\x32", 2);
2292 UCollationResult result = coll.compare(iter40, iter72, errorCode);
2293 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2296 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2297 const char* bigone = "One";
2298 const char* littleone = "one";
2300 logln(msg + " " + c->compare(bigone, littleone) +
2301 " s: " + c->getStrength() +
2302 " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2304 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2306 if (exec) logln("TestSuite CollationAPITest: ");
2307 TESTCASE_AUTO_BEGIN;
2308 TESTCASE_AUTO(TestProperty);
2309 TESTCASE_AUTO(TestOperators);
2310 TESTCASE_AUTO(TestDuplicate);
2311 TESTCASE_AUTO(TestCompare);
2312 TESTCASE_AUTO(TestHashCode);
2313 TESTCASE_AUTO(TestCollationKey);
2314 TESTCASE_AUTO(TestElemIter);
2315 TESTCASE_AUTO(TestGetAll);
2316 TESTCASE_AUTO(TestRuleBasedColl);
2317 TESTCASE_AUTO(TestDecomposition);
2318 TESTCASE_AUTO(TestSafeClone);
2319 TESTCASE_AUTO(TestSortKey);
2320 TESTCASE_AUTO(TestSortKeyOverflow);
2321 TESTCASE_AUTO(TestMaxExpansion);
2322 TESTCASE_AUTO(TestDisplayName);
2323 TESTCASE_AUTO(TestAttribute);
2324 TESTCASE_AUTO(TestVariableTopSetting);
2325 TESTCASE_AUTO(TestRules);
2326 TESTCASE_AUTO(TestGetLocale);
2327 TESTCASE_AUTO(TestBounds);
2328 TESTCASE_AUTO(TestGetTailoredSet);
2329 TESTCASE_AUTO(TestUClassID);
2330 TESTCASE_AUTO(TestSubclass);
2331 TESTCASE_AUTO(TestNULLCharTailoring);
2332 TESTCASE_AUTO(TestClone);
2333 TESTCASE_AUTO(TestIterNumeric);
2337 #endif /* #if !UCONFIG_NO_COLLATION */