source/test/cintltst/cmsccoll.c

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 2001-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /*******************************************************************************
   9 *
  10 * File cmsccoll.C
  11 *
  12 *******************************************************************************/
  13 /**
  14  * These are the tests specific to ICU 1.8 and above, that I didn't know where
  15  * to fit.
  16  */
  17
  18 #include <stdio.h>
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_COLLATION
  23
  24 #include "unicode/ucol.h"
  25 #include "unicode/ucoleitr.h"
  26 #include "unicode/uloc.h"
  27 #include "cintltst.h"
  28 #include "ccolltst.h"
  29 #include "callcoll.h"
  30 #include "unicode/ustring.h"
  31 #include "string.h"
  32 #include "ucol_imp.h"
  33 #include "cmemory.h"
  34 #include "cstring.h"
  35 #include "uassert.h"
  36 #include "unicode/parseerr.h"
  37 #include "unicode/ucnv.h"
  38 #include "unicode/ures.h"
  39 #include "unicode/uscript.h"
  40 #include "unicode/utf16.h"
  41 #include "uparse.h"
  42 #include "putilimp.h"
  43
  44
  45 #define MAX_TOKEN_LEN 16
  46
  47 typedef UCollationResult tst_strcoll(void *collator, const int object,
  48                         const UChar *source, const int sLen,
  49                         const UChar *target, const int tLen);
  50
  51
  52
  53 const static char cnt1[][10] = {
  54
  55   "AA",
  56   "AC",
  57   "AZ",
  58   "AQ",
  59   "AB",
  60   "ABZ",
  61   "ABQ",
  62   "Z",
  63   "ABC",
  64   "Q",
  65   "B"
  66 };
  67
  68 const static char cnt2[][10] = {
  69   "DA",
  70   "DAD",
  71   "DAZ",
  72   "MAR",
  73   "Z",
  74   "DAVIS",
  75   "MARK",
  76   "DAV",
  77   "DAVI"
  78 };
  79
  80 static void IncompleteCntTest(void)
  81 {
  82   UErrorCode status = U_ZERO_ERROR;
  83   UChar temp[90];
  84   UChar t1[90];
  85   UChar t2[90];
  86
  87   UCollator *coll =  NULL;
  88   uint32_t i = 0, j = 0;
  89   uint32_t size = 0;
  90
  91   u_uastrcpy(temp, " & Z < ABC < Q < B");
  92
  93   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
  94
  95   if(U_SUCCESS(status)) {
  96     size = UPRV_LENGTHOF(cnt1);
  97     for(i = 0; i < size-1; i++) {
  98       for(j = i+1; j < size; j++) {
  99         UCollationElements *iter;
 100         u_uastrcpy(t1, cnt1[i]);
 101         u_uastrcpy(t2, cnt1[j]);
 102         doTest(coll, t1, t2, UCOL_LESS);
 103         /* synwee : added collation element iterator test */
 104         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 105         if (U_FAILURE(status)) {
 106           log_err("Creation of iterator failed\n");
 107           break;
 108         }
 109         backAndForth(iter);
 110         ucol_closeElements(iter);
 111       }
 112     }
 113   }
 114
 115   ucol_close(coll);
 116
 117
 118   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
 119   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
 120
 121   if(U_SUCCESS(status)) {
 122     size = UPRV_LENGTHOF(cnt2);
 123     for(i = 0; i < size-1; i++) {
 124       for(j = i+1; j < size; j++) {
 125         UCollationElements *iter;
 126         u_uastrcpy(t1, cnt2[i]);
 127         u_uastrcpy(t2, cnt2[j]);
 128         doTest(coll, t1, t2, UCOL_LESS);
 129
 130         /* synwee : added collation element iterator test */
 131         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
 132         if (U_FAILURE(status)) {
 133           log_err("Creation of iterator failed\n");
 134           break;
 135         }
 136         backAndForth(iter);
 137         ucol_closeElements(iter);
 138       }
 139     }
 140   }
 141
 142   ucol_close(coll);
 143
 144
 145 }
 146
 147 const static char shifted[][20] = {
 148   "black bird",
 149   "black-bird",
 150   "blackbird",
 151   "black Bird",
 152   "black-Bird",
 153   "blackBird",
 154   "black birds",
 155   "black-birds",
 156   "blackbirds"
 157 };
 158
 159 const static UCollationResult shiftedTert[] = {
 160   UCOL_EQUAL,
 161   UCOL_EQUAL,
 162   UCOL_EQUAL,
 163   UCOL_LESS,
 164   UCOL_EQUAL,
 165   UCOL_EQUAL,
 166   UCOL_LESS,
 167   UCOL_EQUAL,
 168   UCOL_EQUAL
 169 };
 170
 171 const static char nonignorable[][20] = {
 172   "black bird",
 173   "black Bird",
 174   "black birds",
 175   "black-bird",
 176   "black-Bird",
 177   "black-birds",
 178   "blackbird",
 179   "blackBird",
 180   "blackbirds"
 181 };
 182
 183 static void BlackBirdTest(void) {
 184   UErrorCode status = U_ZERO_ERROR;
 185   UChar t1[90];
 186   UChar t2[90];
 187
 188   uint32_t i = 0, j = 0;
 189   uint32_t size = 0;
 190   UCollator *coll = ucol_open("en_US", &status);
 191
 192   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 193   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
 194
 195   if(U_SUCCESS(status)) {
 196     size = UPRV_LENGTHOF(nonignorable);
 197     for(i = 0; i < size-1; i++) {
 198       for(j = i+1; j < size; j++) {
 199         u_uastrcpy(t1, nonignorable[i]);
 200         u_uastrcpy(t2, nonignorable[j]);
 201         doTest(coll, t1, t2, UCOL_LESS);
 202       }
 203     }
 204   }
 205
 206   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
 207   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
 208
 209   if(U_SUCCESS(status)) {
 210     size = UPRV_LENGTHOF(shifted);
 211     for(i = 0; i < size-1; i++) {
 212       for(j = i+1; j < size; j++) {
 213         u_uastrcpy(t1, shifted[i]);
 214         u_uastrcpy(t2, shifted[j]);
 215         doTest(coll, t1, t2, UCOL_LESS);
 216       }
 217     }
 218   }
 219
 220   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
 221   if(U_SUCCESS(status)) {
 222     size = UPRV_LENGTHOF(shifted);
 223     for(i = 1; i < size; i++) {
 224       u_uastrcpy(t1, shifted[i-1]);
 225       u_uastrcpy(t2, shifted[i]);
 226       doTest(coll, t1, t2, shiftedTert[i]);
 227     }
 228   }
 229
 230   ucol_close(coll);
 231 }
 232
 233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
 234     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
 235     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
 236     {0x0041/*'A'*/, 0x0300, 0x0000},
 237     {0x00C0, 0x0301, 0x0000},
 238     /* this would work with forced normalization */
 239     {0x00C0, 0x0316, 0x0000}
 240 };
 241
 242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
 243     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
 245     {0x00C0, 0},
 246     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
 247     /* this would work with forced normalization */
 248     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
 249 };
 250
 251 const static UCollationResult results[] = {
 252     UCOL_GREATER,
 253     UCOL_EQUAL,
 254     UCOL_EQUAL,
 255     UCOL_GREATER,
 256     UCOL_EQUAL
 257 };
 258
 259 static void FunkyATest(void)
 260 {
 261
 262     int32_t i;
 263     UErrorCode status = U_ZERO_ERROR;
 264     UCollator  *myCollation;
 265     myCollation = ucol_open("en_US", &status);
 266     if(U_FAILURE(status)){
 267         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 268         return;
 269     }
 270     log_verbose("Testing some A letters, for some reason\n");
 271     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 272     ucol_setStrength(myCollation, UCOL_TERTIARY);
 273     for (i = 0; i < 4 ; i++)
 274     {
 275         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
 276     }
 277     ucol_close(myCollation);
 278 }
 279
 280 UColAttributeValue caseFirst[] = {
 281     UCOL_OFF,
 282     UCOL_LOWER_FIRST,
 283     UCOL_UPPER_FIRST
 284 };
 285
 286
 287 UColAttributeValue alternateHandling[] = {
 288     UCOL_NON_IGNORABLE,
 289     UCOL_SHIFTED
 290 };
 291
 292 UColAttributeValue caseLevel[] = {
 293     UCOL_OFF,
 294     UCOL_ON
 295 };
 296
 297 UColAttributeValue strengths[] = {
 298     UCOL_PRIMARY,
 299     UCOL_SECONDARY,
 300     UCOL_TERTIARY,
 301     UCOL_QUATERNARY,
 302     UCOL_IDENTICAL
 303 };
 304
 305 #if 0
 306 static const char * strengthsC[] = {
 307     "UCOL_PRIMARY",
 308     "UCOL_SECONDARY",
 309     "UCOL_TERTIARY",
 310     "UCOL_QUATERNARY",
 311     "UCOL_IDENTICAL"
 312 };
 313
 314 static const char * caseFirstC[] = {
 315     "UCOL_OFF",
 316     "UCOL_LOWER_FIRST",
 317     "UCOL_UPPER_FIRST"
 318 };
 319
 320
 321 static const char * alternateHandlingC[] = {
 322     "UCOL_NON_IGNORABLE",
 323     "UCOL_SHIFTED"
 324 };
 325
 326 static const char * caseLevelC[] = {
 327     "UCOL_OFF",
 328     "UCOL_ON"
 329 };
 330
 331 /* not used currently - does not test only prints */
 332 static void PrintMarkDavis(void)
 333 {
 334   UErrorCode status = U_ZERO_ERROR;
 335   UChar m[256];
 336   uint8_t sortkey[256];
 337   UCollator *coll = ucol_open("en_US", &status);
 338   uint32_t h,i,j,k, sortkeysize;
 339   uint32_t sizem = 0;
 340   char buffer[512];
 341   uint32_t len = 512;
 342
 343   log_verbose("PrintMarkDavis");
 344
 345   u_uastrcpy(m, "Mark Davis");
 346   sizem = u_strlen(m);
 347
 348
 349   m[1] = 0xe4;
 350
 351   for(i = 0; i<sizem; i++) {
 352     fprintf(stderr, "\\u%04X ", m[i]);
 353   }
 354   fprintf(stderr, "\n");
 355
 356   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
 357     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
 358     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
 359
 360     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
 361       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
 362       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
 363
 364       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
 365         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
 366         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
 367
 368         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
 369           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
 370           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
 371           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
 372           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
 373         }
 374
 375       }
 376
 377     }
 378
 379   }
 380 }
 381 #endif
 382
 383 static void BillFairmanTest(void) {
 384 /*
 385 ** check for actual locale via ICU resource bundles
 386 **
 387 ** lp points to the original locale ("fr_FR_....")
 388 */
 389
 390     UResourceBundle *lr,*cr;
 391     UErrorCode              lec = U_ZERO_ERROR;
 392     const char *lp = "fr_FR_you_ll_never_find_this_locale";
 393
 394     log_verbose("BillFairmanTest\n");
 395
 396     lr = ures_open(NULL,lp,&lec);
 397     if (lr) {
 398         cr = ures_getByKey(lr,"collations",0,&lec);
 399         if (cr) {
 400             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
 401             if (lp) {
 402                 if (U_SUCCESS(lec)) {
 403                     if(strcmp(lp, "fr") != 0) {
 404                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
 405                     }
 406                 }
 407             }
 408             ures_close(cr);
 409         }
 410         ures_close(lr);
 411     }
 412 }
 413
 414 const static char chTest[][20] = {
 415   "c",
 416   "C",
 417   "ca", "cb", "cx", "cy", "CZ",
 418   "c\\u030C", "C\\u030C",
 419   "h",
 420   "H",
 421   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
 422   "ch", "cH", "Ch", "CH",
 423   "cha", "charly", "che", "chh", "chch", "chr",
 424   "i", "I", "iarly",
 425   "r", "R",
 426   "r\\u030C", "R\\u030C",
 427   "s",
 428   "S",
 429   "s\\u030C", "S\\u030C",
 430   "z", "Z",
 431   "z\\u030C", "Z\\u030C"
 432 };
 433
 434 static void TestChMove(void) {
 435     UChar t1[256] = {0};
 436     UChar t2[256] = {0};
 437
 438     uint32_t i = 0, j = 0;
 439     uint32_t size = 0;
 440     UErrorCode status = U_ZERO_ERROR;
 441
 442     UCollator *coll = ucol_open("cs", &status);
 443
 444     if(U_SUCCESS(status)) {
 445         size = UPRV_LENGTHOF(chTest);
 446         for(i = 0; i < size-1; i++) {
 447             for(j = i+1; j < size; j++) {
 448                 u_unescape(chTest[i], t1, 256);
 449                 u_unescape(chTest[j], t2, 256);
 450                 doTest(coll, t1, t2, UCOL_LESS);
 451             }
 452         }
 453     }
 454     else {
 455         log_data_err("Can't open collator");
 456     }
 457     ucol_close(coll);
 458 }
 459
 460
 461
 462
 463 /*
 464 const static char impTest[][20] = {
 465   "\\u4e00",
 466     "a",
 467     "A",
 468     "b",
 469     "B",
 470     "\\u4e01"
 471 };
 472 */
 473
 474
 475 static void TestImplicitTailoring(void) {
 476   static const struct {
 477     const char *rules;
 478     const char *data[10];
 479     const uint32_t len;
 480   } tests[] = {
 481       {
 482         /* Tailor b and c before U+4E00. */
 483         "&[before 1]\\u4e00 < b < c "
 484         /* Now, before U+4E00 is c; put d and e after that. */
 485         "&[before 1]\\u4e00 < d < e",
 486         { "b", "c", "d", "e", "\\u4e00"}, 5 },
 487       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
 488       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
 489       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
 490   };
 491
 492   int32_t i = 0;
 493
 494   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
 495       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
 496   }
 497
 498 /*
 499   UChar t1[256] = {0};
 500   UChar t2[256] = {0};
 501
 502   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
 503
 504   uint32_t i = 0, j = 0;
 505   uint32_t size = 0;
 506   uint32_t ruleLen = 0;
 507   UErrorCode status = U_ZERO_ERROR;
 508   UCollator *coll = NULL;
 509   ruleLen = u_unescape(rule, t1, 256);
 510
 511   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 512
 513   if(U_SUCCESS(status)) {
 514     size = UPRV_LENGTHOF(impTest);
 515     for(i = 0; i < size-1; i++) {
 516       for(j = i+1; j < size; j++) {
 517         u_unescape(impTest[i], t1, 256);
 518         u_unescape(impTest[j], t2, 256);
 519         doTest(coll, t1, t2, UCOL_LESS);
 520       }
 521     }
 522   }
 523   else {
 524     log_err("Can't open collator");
 525   }
 526   ucol_close(coll);
 527   */
 528 }
 529
 530 static void TestFCDProblem(void) {
 531   UChar t1[256] = {0};
 532   UChar t2[256] = {0};
 533
 534   const char *s1 = "\\u0430\\u0306\\u0325";
 535   const char *s2 = "\\u04D1\\u0325";
 536
 537   UErrorCode status = U_ZERO_ERROR;
 538   UCollator *coll = ucol_open("", &status);
 539   u_unescape(s1, t1, 256);
 540   u_unescape(s2, t2, 256);
 541
 542   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
 543   doTest(coll, t1, t2, UCOL_EQUAL);
 544
 545   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 546   doTest(coll, t1, t2, UCOL_EQUAL);
 547
 548   ucol_close(coll);
 549 }
 550
 551 /*
 552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
 553 We're only using NFC/NFD in this test.
 554 */
 555 #define NORM_BUFFER_TEST_LEN 18
 556 typedef struct {
 557   UChar32 u;
 558   UChar NFC[NORM_BUFFER_TEST_LEN];
 559   UChar NFD[NORM_BUFFER_TEST_LEN];
 560 } tester;
 561
 562 static void TestComposeDecompose(void) {
 563     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
 564     static const UChar UNICODESET_STR[] = {
 565         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
 566         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
 567         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
 568     };
 569     int32_t noOfLoc;
 570     int32_t i = 0, j = 0;
 571
 572     UErrorCode status = U_ZERO_ERROR;
 573     const char *locName = NULL;
 574     uint32_t nfcSize;
 575     uint32_t nfdSize;
 576     tester **t;
 577     uint32_t noCases = 0;
 578     UCollator *coll = NULL;
 579     UChar32 u = 0;
 580     UChar comp[NORM_BUFFER_TEST_LEN];
 581     uint32_t len = 0;
 582     UCollationElements *iter;
 583     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
 584     int32_t charsToTestSize;
 585
 586     noOfLoc = uloc_countAvailable();
 587
 588     coll = ucol_open("", &status);
 589     if (U_FAILURE(status)) {
 590         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
 591         return;
 592     }
 593     charsToTestSize = uset_size(charsToTest);
 594     if (charsToTestSize <= 0) {
 595         log_err("Set was zero. Missing data?\n");
 596         return;
 597     }
 598     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
 599     t[0] = (tester *)malloc(sizeof(tester));
 600     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
 601
 602     for(u = 0; u < charsToTestSize; u++) {
 603         UChar32 ch = uset_charAt(charsToTest, u);
 604         len = 0;
 605         U16_APPEND_UNSAFE(comp, len, ch);
 606         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
 607         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
 608
 609         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
 610           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
 611             t[noCases]->u = ch;
 612             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
 613                 u_strncpy(t[noCases]->NFC, comp, len);
 614                 t[noCases]->NFC[len] = 0;
 615             }
 616             noCases++;
 617             t[noCases] = (tester *)malloc(sizeof(tester));
 618             uprv_memset(t[noCases], 0, sizeof(tester));
 619         }
 620     }
 621     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
 622     uset_close(charsToTest);
 623     charsToTest = NULL;
 624
 625     for(u=0; u<(UChar32)noCases; u++) {
 626         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
 627             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
 628             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
 629         }
 630     }
 631     /*
 632     for(u = 0; u < charsToTestSize; u++) {
 633       if(!(u&0xFFFF)) {
 634         log_verbose("%08X ", u);
 635       }
 636       uprv_memset(t[noCases], 0, sizeof(tester));
 637       t[noCases]->u = u;
 638       len = 0;
 639       U16_APPEND_UNSAFE(comp, len, u);
 640       comp[len] = 0;
 641       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
 642       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
 643       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
 644       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
 645     }
 646     */
 647
 648     ucol_close(coll);
 649
 650     log_verbose("Testing locales, number of cases = %i\n", noCases);
 651     for(i = 0; i<noOfLoc; i++) {
 652         status = U_ZERO_ERROR;
 653         locName = uloc_getAvailable(i);
 654         if(hasCollationElements(locName)) {
 655             char cName[256];
 656             UChar name[256];
 657             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
 658
 659             for(j = 0; j<nameSize; j++) {
 660                 cName[j] = (char)name[j];
 661             }
 662             cName[nameSize] = 0;
 663             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
 664
 665             coll = ucol_open(locName, &status);
 666             ucol_setStrength(coll, UCOL_IDENTICAL);
 667             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
 668
 669             for(u=0; u<(UChar32)noCases; u++) {
 670                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
 671                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
 672                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
 673                     log_verbose("Testing NFC\n");
 674                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
 675                     backAndForth(iter);
 676                     log_verbose("Testing NFD\n");
 677                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
 678                     backAndForth(iter);
 679                 }
 680             }
 681             ucol_closeElements(iter);
 682             ucol_close(coll);
 683         }
 684     }
 685     for(u = 0; u <= (UChar32)noCases; u++) {
 686         free(t[u]);
 687     }
 688     free(t);
 689 }
 690
 691 static void TestEmptyRule(void) {
 692   UErrorCode status = U_ZERO_ERROR;
 693   UChar rulez[] = { 0 };
 694   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 695
 696   ucol_close(coll);
 697 }
 698
 699 static void TestUCARules(void) {
 700   UErrorCode status = U_ZERO_ERROR;
 701   UChar b[256];
 702   UChar *rules = b;
 703   uint32_t ruleLen = 0;
 704   UCollator *UCAfromRules = NULL;
 705   UCollator *coll = ucol_open("", &status);
 706   if(status == U_FILE_ACCESS_ERROR) {
 707     log_data_err("Is your data around?\n");
 708     return;
 709   } else if(U_FAILURE(status)) {
 710     log_err("Error opening collator\n");
 711     return;
 712   }
 713   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
 714
 715   log_verbose("TestUCARules\n");
 716   if(ruleLen > 256) {
 717     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
 718     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
 719   }
 720   log_verbose("Rules length is %d\n", ruleLen);
 721   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
 722   if(U_SUCCESS(status)) {
 723     ucol_close(UCAfromRules);
 724   } else {
 725     log_verbose("Unable to create a collator from UCARules!\n");
 726   }
 727 /*
 728   u_unescape(blah, b, 256);
 729   ucol_getSortKey(coll, b, 1, res, 256);
 730 */
 731   ucol_close(coll);
 732   if(rules != b) {
 733     free(rules);
 734   }
 735 }
 736
 737
 738 /* Pinyin tonal order */
 739 /*
 740     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
 741           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
 742     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
 743     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
 744     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
 745     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
 746       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
 747 .. (\u00fc)
 748
 749 However, in testing we got the following order:
 750     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
 751           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
 752     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
 753 .. (\u0113)
 754     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
 755     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
 756     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
 757 .. (\u01d8)
 758       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
 759 */
 760
 761 static void TestBefore(void) {
 762   const static char *data[] = {
 763       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
 764       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
 765       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
 766       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
 767       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
 768       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
 769   };
 770   genericRulesStarter(
 771     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
 772     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
 773     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
 774     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
 775     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
 776     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
 777     data, UPRV_LENGTHOF(data));
 778 }
 779
 780 #if 0
 781 /* superceded by TestBeforePinyin */
 782 static void TestJ784(void) {
 783   const static char *data[] = {
 784       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
 785       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
 786       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
 787       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
 788       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
 789       "\\u00fc",
 790            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
 791   };
 792   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
 793 }
 794 #endif
 795
 796 #if 0
 797 /* superceded by the changes to the lv locale */
 798 static void TestJ831(void) {
 799   const static char *data[] = {
 800     "I",
 801       "i",
 802       "Y",
 803       "y"
 804   };
 805   genericLocaleStarter("lv", data, UPRV_LENGTHOF(data));
 806 }
 807 #endif
 808
 809 static void TestJ815(void) {
 810   const static char *data[] = {
 811     "aa",
 812       "Aa",
 813       "ab",
 814       "Ab",
 815       "ad",
 816       "Ad",
 817       "ae",
 818       "Ae",
 819       "\\u00e6",
 820       "\\u00c6",
 821       "af",
 822       "Af",
 823       "b",
 824       "B"
 825   };
 826   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
 827   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
 828 }
 829
 830
 831 static void TestCase(void)
 832 {
 833     const static UChar gRules[MAX_TOKEN_LEN] =
 834     /*" & 0 < 1,\u2461<a,A"*/
 835     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
 836
 837     const static UChar testCase[][MAX_TOKEN_LEN] =
 838     {
 839         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
 840         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
 841         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
 842         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
 843     };
 844
 845     const static UCollationResult caseTestResults[][9] =
 846     {
 847         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
 848         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
 849         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
 850         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
 851     };
 852
 853     const static UColAttributeValue caseTestAttributes[][2] =
 854     {
 855         { UCOL_LOWER_FIRST, UCOL_OFF},
 856         { UCOL_UPPER_FIRST, UCOL_OFF},
 857         { UCOL_LOWER_FIRST, UCOL_ON},
 858         { UCOL_UPPER_FIRST, UCOL_ON}
 859     };
 860     int32_t i,j,k;
 861     UErrorCode status = U_ZERO_ERROR;
 862     UCollationElements *iter;
 863     UCollator  *myCollation;
 864     myCollation = ucol_open("en_US", &status);
 865
 866     if(U_FAILURE(status)){
 867         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 868         return;
 869     }
 870     log_verbose("Testing different case settings\n");
 871     ucol_setStrength(myCollation, UCOL_TERTIARY);
 872
 873     for(k = 0; k<4; k++) {
 874       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
 875       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
 876       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
 877       for (i = 0; i < 3 ; i++) {
 878         for(j = i+1; j<4; j++) {
 879           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
 880         }
 881       }
 882     }
 883     ucol_close(myCollation);
 884
 885     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
 886     if(U_FAILURE(status)){
 887         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
 888         return;
 889     }
 890     log_verbose("Testing different case settings with custom rules\n");
 891     ucol_setStrength(myCollation, UCOL_TERTIARY);
 892
 893     for(k = 0; k<4; k++) {
 894       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
 895       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
 896       for (i = 0; i < 3 ; i++) {
 897         for(j = i+1; j<4; j++) {
 898           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
 899           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
 900           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
 901           backAndForth(iter);
 902           ucol_closeElements(iter);
 903           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
 904           backAndForth(iter);
 905           ucol_closeElements(iter);
 906         }
 907       }
 908     }
 909     ucol_close(myCollation);
 910     {
 911       const static char *lowerFirst[] = {
 912         "h",
 913         "H",
 914         "ch",
 915         "Ch",
 916         "CH",
 917         "cha",
 918         "chA",
 919         "Cha",
 920         "ChA",
 921         "CHa",
 922         "CHA",
 923         "i",
 924         "I"
 925       };
 926
 927       const static char *upperFirst[] = {
 928         "H",
 929         "h",
 930         "CH",
 931         "Ch",
 932         "ch",
 933         "CHA",
 934         "CHa",
 935         "ChA",
 936         "Cha",
 937         "chA",
 938         "cha",
 939         "I",
 940         "i"
 941       };
 942       log_verbose("mixed case test\n");
 943       log_verbose("lower first, case level off\n");
 944       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
 945       log_verbose("upper first, case level off\n");
 946       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
 947       log_verbose("lower first, case level on\n");
 948       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
 949       log_verbose("upper first, case level on\n");
 950       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
 951     }
 952
 953 }
 954
 955 static void TestIncrementalNormalize(void) {
 956
 957     /*UChar baseA     =0x61;*/
 958     UChar baseA     =0x41;
 959 /*    UChar baseB     = 0x42;*/
 960     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
 961     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
 962     /*
 963         0x316 is combining grave accent below, cc=220
 964         0x321 is combining palatalized hook below, cc=202
 965         0x300 is combining grave accent, cc=230
 966     */
 967
 968 #define MAXSLEN 2000
 969     /*int          maxSLen   = 64000;*/
 970     int          sLen;
 971     int          i;
 972
 973     UCollator        *coll;
 974     UErrorCode       status = U_ZERO_ERROR;
 975     UCollationResult result;
 976
 977     int32_t myQ = getTestOption(QUICK_OPTION);
 978
 979     if(getTestOption(QUICK_OPTION) < 0) {
 980         setTestOption(QUICK_OPTION, 1);
 981     }
 982
 983     {
 984         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
 985         /*          most buffers along the way.*/
 986         UChar            strA[MAXSLEN+1];
 987         UChar            strB[MAXSLEN+1];
 988
 989         coll = ucol_open("en_US", &status);
 990         if(status == U_FILE_ACCESS_ERROR) {
 991           log_data_err("Is your data around?\n");
 992           return;
 993         } else if(U_FAILURE(status)) {
 994           log_err("Error opening collator\n");
 995           return;
 996         }
 997         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
 998
 999         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1000         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1001         /*for (sLen = 1000; sLen<1001; sLen++) {*/
1002         for (sLen = 500; sLen<501; sLen++) {
1003         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1004             strA[0] = baseA;
1005             strB[0] = baseA;
1006             for (i=1; i<=sLen-1; i++) {
1007                 strA[i] = ccMix[i % 3];
1008                 strB[sLen-i] = ccMix[i % 3];
1009             }
1010             strA[sLen]   = 0;
1011             strB[sLen]   = 0;
1012
1013             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1014             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1015             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1016             doTest(coll, strA, strB, UCOL_EQUAL);
1017         }
1018     }
1019
1020     setTestOption(QUICK_OPTION, myQ);
1021
1022
1023     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1024     /*         of the string.  Checks a couple of edge cases.*/
1025
1026     {
1027         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1028         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1029         ucol_setStrength(coll, UCOL_TERTIARY);
1030         doTest(coll, strA, strB, UCOL_EQUAL);
1031     }
1032
1033     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1034
1035     {
1036       /* New UCA  3.1.1.
1037        * test below used a code point from Desseret, which sorts differently
1038        * than d800 dc00
1039        */
1040         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1041         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1042         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1043         ucol_setStrength(coll, UCOL_TERTIARY);
1044         doTest(coll, strA, strB, UCOL_GREATER);
1045     }
1046
1047     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1048
1049     {
1050         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1051         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1052         char  sortKeyA[50];
1053         char  sortKeyAz[50];
1054         char  sortKeyB[50];
1055         char  sortKeyBz[50];
1056         int   r;
1057
1058         /* there used to be -3 here. Hmmmm.... */
1059         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1060         result = ucol_strcoll(coll, strA, 3, strB, 3);
1061         if (result != UCOL_GREATER) {
1062             log_err("ERROR 1 in test 4\n");
1063         }
1064         result = ucol_strcoll(coll, strA, -1, strB, -1);
1065         if (result != UCOL_EQUAL) {
1066             log_err("ERROR 2 in test 4\n");
1067         }
1068
1069         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1070         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1071         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1072         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1073
1074         r = strcmp(sortKeyA, sortKeyAz);
1075         if (r <= 0) {
1076             log_err("Error 3 in test 4\n");
1077         }
1078         r = strcmp(sortKeyA, sortKeyB);
1079         if (r <= 0) {
1080             log_err("Error 4 in test 4\n");
1081         }
1082         r = strcmp(sortKeyAz, sortKeyBz);
1083         if (r != 0) {
1084             log_err("Error 5 in test 4\n");
1085         }
1086
1087         ucol_setStrength(coll, UCOL_IDENTICAL);
1088         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1089         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1090         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1091         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1092
1093         r = strcmp(sortKeyA, sortKeyAz);
1094         if (r <= 0) {
1095             log_err("Error 6 in test 4\n");
1096         }
1097         r = strcmp(sortKeyA, sortKeyB);
1098         if (r <= 0) {
1099             log_err("Error 7 in test 4\n");
1100         }
1101         r = strcmp(sortKeyAz, sortKeyBz);
1102         if (r != 0) {
1103             log_err("Error 8 in test 4\n");
1104         }
1105         ucol_setStrength(coll, UCOL_TERTIARY);
1106     }
1107
1108
1109     /*  Test 5:  Null characters in non-normal source strings.*/
1110
1111     {
1112         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1113         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1114         char  sortKeyA[50];
1115         char  sortKeyAz[50];
1116         char  sortKeyB[50];
1117         char  sortKeyBz[50];
1118         int   r;
1119
1120         result = ucol_strcoll(coll, strA, 6, strB, 6);
1121         if (result != UCOL_GREATER) {
1122             log_err("ERROR 1 in test 5\n");
1123         }
1124         result = ucol_strcoll(coll, strA, -1, strB, -1);
1125         if (result != UCOL_EQUAL) {
1126             log_err("ERROR 2 in test 5\n");
1127         }
1128
1129         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1130         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1131         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1132         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1133
1134         r = strcmp(sortKeyA, sortKeyAz);
1135         if (r <= 0) {
1136             log_err("Error 3 in test 5\n");
1137         }
1138         r = strcmp(sortKeyA, sortKeyB);
1139         if (r <= 0) {
1140             log_err("Error 4 in test 5\n");
1141         }
1142         r = strcmp(sortKeyAz, sortKeyBz);
1143         if (r != 0) {
1144             log_err("Error 5 in test 5\n");
1145         }
1146
1147         ucol_setStrength(coll, UCOL_IDENTICAL);
1148         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1149         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1150         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1151         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1152
1153         r = strcmp(sortKeyA, sortKeyAz);
1154         if (r <= 0) {
1155             log_err("Error 6 in test 5\n");
1156         }
1157         r = strcmp(sortKeyA, sortKeyB);
1158         if (r <= 0) {
1159             log_err("Error 7 in test 5\n");
1160         }
1161         r = strcmp(sortKeyAz, sortKeyBz);
1162         if (r != 0) {
1163             log_err("Error 8 in test 5\n");
1164         }
1165         ucol_setStrength(coll, UCOL_TERTIARY);
1166     }
1167
1168
1169     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1170
1171     {
1172         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1173         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1174
1175         result = ucol_strcoll(coll, strA, 5, strB, 5);
1176         if (result != UCOL_LESS) {
1177             log_err("Error 1 in test 6\n");
1178         }
1179         result = ucol_strcoll(coll, strA, -1, strB, -1);
1180         if (result != UCOL_EQUAL) {
1181             log_err("Error 2 in test 6\n");
1182         }
1183     }
1184
1185     ucol_close(coll);
1186 }
1187
1188
1189
1190 #if 0
1191 static void TestGetCaseBit(void) {
1192   static const char *caseBitData[] = {
1193     "a", "A", "ch", "Ch", "CH",
1194       "\\uFF9E", "\\u0009"
1195   };
1196
1197   static const uint8_t results[] = {
1198     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1199       UCOL_UPPER_CASE, UCOL_LOWER_CASE
1200   };
1201
1202   uint32_t i, blen = 0;
1203   UChar b[256] = {0};
1204   UErrorCode status = U_ZERO_ERROR;
1205   UCollator *UCA = ucol_open("", &status);
1206   uint8_t res = 0;
1207
1208   for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1209     blen = u_unescape(caseBitData[i], b, 256);
1210     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1211     if(results[i] != res) {
1212       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1213     }
1214   }
1215 }
1216 #endif
1217
1218 static void TestHangulTailoring(void) {
1219     static const char *koreanData[] = {
1220         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1221             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1222             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1223             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1224             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1225             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1226     };
1227
1228     const char *rules =
1229         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1230         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1231         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1232         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1233         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1234         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1235
1236
1237   UErrorCode status = U_ZERO_ERROR;
1238   UChar rlz[2048] = { 0 };
1239   uint32_t rlen = u_unescape(rules, rlz, 2048);
1240
1241   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1242   if(status == U_FILE_ACCESS_ERROR) {
1243     log_data_err("Is your data around?\n");
1244     return;
1245   } else if(U_FAILURE(status)) {
1246     log_err("Error opening collator\n");
1247     return;
1248   }
1249
1250   log_verbose("Using start of korean rules\n");
1251
1252   if(U_SUCCESS(status)) {
1253     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1254   } else {
1255     log_err("Unable to open collator with rules %s\n", rules);
1256   }
1257
1258   ucol_close(coll);
1259
1260   log_verbose("Using ko__LOTUS locale\n");
1261   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1262 }
1263
1264 /*
1265  * The secondary/tertiary compression middle byte
1266  * as used by the current implementation.
1267  * Subject to change as the sort key compression changes.
1268  * See class CollationKeys.
1269  */
1270 enum {
1271     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1272     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1273 };
1274
1275 static void TestCompressOverlap(void) {
1276     UChar       secstr[150];
1277     UChar       tertstr[150];
1278     UErrorCode  status = U_ZERO_ERROR;
1279     UCollator  *coll;
1280     uint8_t     result[500];
1281     uint32_t    resultlen;
1282     int         count = 0;
1283     uint8_t    *tempptr;
1284
1285     coll = ucol_open("", &status);
1286
1287     if (U_FAILURE(status)) {
1288         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1289         return;
1290     }
1291     while (count < 149) {
1292         secstr[count] = 0x0020; /* [06, 05, 05] */
1293         tertstr[count] = 0x0020;
1294         count ++;
1295     }
1296
1297     /* top down compression ----------------------------------- */
1298     secstr[count] = 0x0332; /* [, 87, 05] */
1299     tertstr[count] = 0x3000; /* [06, 05, 07] */
1300
1301     /* no compression secstr should have 150 secondary bytes, tertstr should
1302     have 150 tertiary bytes.
1303     with correct compression, secstr should have 6 secondary
1304     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1305     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1306     (void)resultlen;    /* Suppress set but not used warning. */
1307     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1308     while (*(tempptr + 1) != 1) {
1309         /* the last secondary collation element is not checked since it is not
1310         part of the compression */
1311         if (*tempptr < SEC_COMMON_MIDDLE) {
1312             log_err("Secondary top down compression overlapped\n");
1313         }
1314         tempptr ++;
1315     }
1316
1317     /* tertiary top/bottom/common for en_US is similar to the secondary
1318     top/bottom/common */
1319     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1320     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1321     while (*(tempptr + 1) != 0) {
1322         /* the last secondary collation element is not checked since it is not
1323         part of the compression */
1324         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1325             log_err("Tertiary top down compression overlapped\n");
1326         }
1327         tempptr ++;
1328     }
1329
1330     /* bottom up compression ------------------------------------- */
1331     secstr[count] = 0;
1332     tertstr[count] = 0;
1333     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1334     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1335     while (*(tempptr + 1) != 1) {
1336         /* the last secondary collation element is not checked since it is not
1337         part of the compression */
1338         if (*tempptr > SEC_COMMON_MIDDLE) {
1339             log_err("Secondary bottom up compression overlapped\n");
1340         }
1341         tempptr ++;
1342     }
1343
1344     /* tertiary top/bottom/common for en_US is similar to the secondary
1345     top/bottom/common */
1346     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1347     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1348     while (*(tempptr + 1) != 0) {
1349         /* the last secondary collation element is not checked since it is not
1350         part of the compression */
1351         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1352             log_err("Tertiary bottom up compression overlapped\n");
1353         }
1354         tempptr ++;
1355     }
1356
1357     ucol_close(coll);
1358 }
1359
1360 static void TestCyrillicTailoring(void) {
1361   static const char *test[] = {
1362     "\\u0410b",
1363       "\\u0410\\u0306a",
1364       "\\u04d0A"
1365   };
1366
1367     /* Russian overrides contractions, so this test is not valid anymore */
1368     /*genericLocaleStarter("ru", test, 3);*/
1369
1370     // Most of the following are commented out because UCA 8.0
1371     // drops most of the Cyrillic contractions from the default order.
1372     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1373
1374     // genericLocaleStarter("root", test, 3);
1375     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1376     // genericRulesStarter("&Z < \\u0410", test, 3);
1377     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1378     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1379     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1380     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1381 }
1382
1383 static void TestSuppressContractions(void) {
1384
1385   static const char *testNoCont2[] = {
1386       "\\u0410\\u0302a",
1387       "\\u0410\\u0306b",
1388       "\\u0410c"
1389   };
1390   static const char *testNoCont[] = {
1391       "a\\u0410",
1392       "A\\u0410\\u0306",
1393       "\\uFF21\\u0410\\u0302"
1394   };
1395
1396   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1397   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1398 }
1399
1400 static void TestContraction(void) {
1401     const static char *testrules[] = {
1402         "&A = AB / B",
1403         "&A = A\\u0306/\\u0306",
1404         "&c = ch / h"
1405     };
1406     const static UChar testdata[][2] = {
1407         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1408         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1409         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1410     };
1411     const static UChar testdata2[][2] = {
1412         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1413         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1414         {0x0063 /* 'c' */, 0x006C /* 'l' */}
1415     };
1416 #if 0
1417     /*
1418      * These pairs of rule strings are not guaranteed to yield the very same mappings.
1419      * In fact, LDML 24 recommends an improved way of creating mappings
1420      * which always yields different mappings for such pairs. See
1421      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1422      */
1423     const static char *testrules3[] = {
1424         "&z < xyz &xyzw << B",
1425         "&z < xyz &xyz << B / w",
1426         "&z < ch &achm << B",
1427         "&z < ch &a << B / chm",
1428         "&\\ud800\\udc00w << B",
1429         "&\\ud800\\udc00 << B / w",
1430         "&a\\ud800\\udc00m << B",
1431         "&a << B / \\ud800\\udc00m",
1432     };
1433 #endif
1434
1435     UErrorCode  status   = U_ZERO_ERROR;
1436     UCollator  *coll;
1437     UChar       rule[256] = {0};
1438     uint32_t    rlen     = 0;
1439     int         i;
1440
1441     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1442         UCollationElements *iter1;
1443         int j = 0;
1444         log_verbose("Rule %s for testing\n", testrules[i]);
1445         rlen = u_unescape(testrules[i], rule, 32);
1446         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1447         if (U_FAILURE(status)) {
1448             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1449             return;
1450         }
1451         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1452         if (U_FAILURE(status)) {
1453             log_err("Collation iterator creation failed\n");
1454             return;
1455         }
1456         while (j < 2) {
1457             UCollationElements *iter2 = ucol_openElements(coll,
1458                                                          &(testdata[i][j]),
1459                                                          1, &status);
1460             uint32_t ce;
1461             if (U_FAILURE(status)) {
1462                 log_err("Collation iterator creation failed\n");
1463                 return;
1464             }
1465             ce = ucol_next(iter2, &status);
1466             while (ce != UCOL_NULLORDER) {
1467                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1468                     log_err("Collation elements in contraction split does not match\n");
1469                     return;
1470                 }
1471                 ce = ucol_next(iter2, &status);
1472             }
1473             j ++;
1474             ucol_closeElements(iter2);
1475         }
1476         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1477             log_err("Collation elements not exhausted\n");
1478             return;
1479         }
1480         ucol_closeElements(iter1);
1481         ucol_close(coll);
1482     }
1483
1484     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1485     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1486     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1487         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1488                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1489                 testdata2[1][1]);
1490         return;
1491     }
1492     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1493         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1494                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1495                 testdata2[2][1]);
1496         return;
1497     }
1498     ucol_close(coll);
1499 #if 0  /* see above */
1500     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1501         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1502         UCollator          *coll1,
1503                            *coll2;
1504         UCollationElements *iter1,
1505                            *iter2;
1506         UChar               ch = 0x0042 /* 'B' */;
1507         uint32_t            ce;
1508         rlen = u_unescape(testrules3[i], rule, 32);
1509         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1510         rlen = u_unescape(testrules3[i + 1], rule, 32);
1511         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1512         if (U_FAILURE(status)) {
1513             log_err("Collator creation failed %s\n", testrules[i]);
1514             return;
1515         }
1516         iter1 = ucol_openElements(coll1, &ch, 1, &status);
1517         iter2 = ucol_openElements(coll2, &ch, 1, &status);
1518         if (U_FAILURE(status)) {
1519             log_err("Collation iterator creation failed\n");
1520             return;
1521         }
1522         ce = ucol_next(iter1, &status);
1523         if (U_FAILURE(status)) {
1524             log_err("Retrieving ces failed\n");
1525             return;
1526         }
1527         while (ce != UCOL_NULLORDER) {
1528             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1529             if (ce == ce2) {
1530                 log_verbose("CEs match: %08x\n", ce);
1531             } else {
1532                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1533                 return;
1534             }
1535             ce = ucol_next(iter1, &status);
1536             if (U_FAILURE(status)) {
1537                 log_err("Retrieving ces failed\n");
1538                 return;
1539             }
1540         }
1541         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1542             log_err("CEs not exhausted\n");
1543             return;
1544         }
1545         ucol_closeElements(iter1);
1546         ucol_closeElements(iter2);
1547         ucol_close(coll1);
1548         ucol_close(coll2);
1549     }
1550 #endif
1551 }
1552
1553 static void TestExpansion(void) {
1554     const static char *testrules[] = {
1555 #if 0
1556         /*
1557          * This seems to have tested that M was not mapped to an expansion.
1558          * I believe the old builder just did that because it computed the extension CEs
1559          * at the very end, which was a bug.
1560          * Among other problems, it violated the core tailoring principle
1561          * by making an earlier rule depend on a later one.
1562          * And, of course, if M did not get an expansion, then it was primary different from K,
1563          * unlike what the rule &K<<M says.
1564          */
1565         "&J << K / B & K << M",
1566 #endif
1567         "&J << K / B << M"
1568     };
1569     const static UChar testdata[][3] = {
1570         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1571         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1572         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1573         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1574         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1575         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1576     };
1577
1578     UErrorCode  status   = U_ZERO_ERROR;
1579     UCollator  *coll;
1580     UChar       rule[256] = {0};
1581     uint32_t    rlen     = 0;
1582     int         i;
1583
1584     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1585         int j = 0;
1586         log_verbose("Rule %s for testing\n", testrules[i]);
1587         rlen = u_unescape(testrules[i], rule, 32);
1588         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1589         if (U_FAILURE(status)) {
1590             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1591             return;
1592         }
1593
1594         for (j = 0; j < 5; j ++) {
1595             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1596         }
1597         ucol_close(coll);
1598     }
1599 }
1600
1601 #if 0
1602 /* this test tests the current limitations of the engine */
1603 /* it always fail, so it is disabled by default */
1604 static void TestLimitations(void) {
1605   /* recursive expansions */
1606   {
1607     static const char *rule = "&a=b/c&d=c/e";
1608     static const char *tlimit01[] = {"add","b","adf"};
1609     static const char *tlimit02[] = {"aa","b","af"};
1610     log_verbose("recursive expansions\n");
1611     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1612     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1613   }
1614   /* contractions spanning expansions */
1615   {
1616     static const char *rule = "&a<<<c/e&g<<<eh";
1617     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1618     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1619     log_verbose("contractions spanning expansions\n");
1620     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1621     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1622   }
1623   /* normalization: nulls in contractions */
1624   {
1625     static const char *rule = "&a<<<\\u0000\\u0302";
1626     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1627     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1628     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1629     static const UColAttributeValue valOn[] = { UCOL_ON };
1630     static const UColAttributeValue valOff[] = { UCOL_OFF };
1631
1632     log_verbose("NULL in contractions\n");
1633     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1634     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1635     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1636     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1637
1638   }
1639   /* normalization: contractions spanning normalization */
1640   {
1641     static const char *rule = "&a<<<\\u0000\\u0302";
1642     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1643     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1644     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1645     static const UColAttributeValue valOn[] = { UCOL_ON };
1646     static const UColAttributeValue valOff[] = { UCOL_OFF };
1647
1648     log_verbose("contractions spanning normalization\n");
1649     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1650     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1651     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1652     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1653
1654   }
1655   /* variable top:  */
1656   {
1657     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1658     static const char *rule = "&\\u2010<x<[variable top]=z";
1659     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1660     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1661     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1662     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1663     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1664     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1665     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1666
1667     log_verbose("variable top\n");
1668     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1669     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1670     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1671     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1672     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1673
1674   }
1675   /* case level */
1676   {
1677     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1678     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1679     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1680     static const UColAttribute att[] = { UCOL_CASE_FIRST};
1681     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1682     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1683     log_verbose("case level\n");
1684     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1685     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1686     /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1687     /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1688   }
1689
1690 }
1691 #endif
1692
1693 static void TestBocsuCoverage(void) {
1694   UErrorCode status = U_ZERO_ERROR;
1695   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1696   UChar       test[256] = {0};
1697   uint32_t    tlen     = u_unescape(testString, test, 32);
1698   uint8_t key[256]     = {0};
1699   uint32_t klen         = 0;
1700
1701   UCollator *coll = ucol_open("", &status);
1702   if(U_SUCCESS(status)) {
1703   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1704
1705   klen = ucol_getSortKey(coll, test, tlen, key, 256);
1706   (void)klen;    /* Suppress set but not used warning. */
1707
1708   ucol_close(coll);
1709   } else {
1710     log_data_err("Couldn't open UCA\n");
1711   }
1712 }
1713
1714 static void TestVariableTopSetting(void) {
1715   UErrorCode status = U_ZERO_ERROR;
1716   uint32_t varTopOriginal = 0, varTop1, varTop2;
1717   UCollator *coll = ucol_open("", &status);
1718   if(U_SUCCESS(status)) {
1719
1720   static const UChar nul = 0;
1721   static const UChar space = 0x20;
1722   static const UChar dot = 0x2e;  /* punctuation */
1723   static const UChar degree = 0xb0;  /* symbol */
1724   static const UChar dollar = 0x24;  /* currency symbol */
1725   static const UChar zero = 0x30;  /* digit */
1726
1727   varTopOriginal = ucol_getVariableTop(coll, &status);
1728   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1729   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1730
1731   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1732   varTop2 = ucol_getVariableTop(coll, &status);
1733   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1734   if(U_FAILURE(status) || varTop1 != varTop2 ||
1735       !ucol_equal(coll, &nul, 0, &space, 1) ||
1736       ucol_equal(coll, &nul, 0, &dot, 1) ||
1737       ucol_equal(coll, &nul, 0, &degree, 1) ||
1738       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1739       ucol_equal(coll, &nul, 0, &zero, 1) ||
1740       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1741     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1742   }
1743
1744   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1745   varTop2 = ucol_getVariableTop(coll, &status);
1746   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1747   if(U_FAILURE(status) || varTop1 != varTop2 ||
1748       !ucol_equal(coll, &nul, 0, &space, 1) ||
1749       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1750       ucol_equal(coll, &nul, 0, &degree, 1) ||
1751       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1752       ucol_equal(coll, &nul, 0, &zero, 1) ||
1753       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1754     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1755   }
1756
1757   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1758   varTop2 = ucol_getVariableTop(coll, &status);
1759   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1760   if(U_FAILURE(status) || varTop1 != varTop2 ||
1761       !ucol_equal(coll, &nul, 0, &space, 1) ||
1762       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1763       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1764       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1765       ucol_equal(coll, &nul, 0, &zero, 1) ||
1766       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1767     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1768   }
1769
1770   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1771   varTop2 = ucol_getVariableTop(coll, &status);
1772   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1773   if(U_FAILURE(status) || varTop1 != varTop2 ||
1774       !ucol_equal(coll, &nul, 0, &space, 1) ||
1775       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1776       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1777       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1778       ucol_equal(coll, &nul, 0, &zero, 1) ||
1779       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1780     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1781   }
1782
1783   log_verbose("Testing setting variable top to contractions\n");
1784   {
1785     UChar first[4] = { 0 };
1786     first[0] = 0x0040;
1787     first[1] = 0x0050;
1788     first[2] = 0x0000;
1789
1790     status = U_ZERO_ERROR;
1791     ucol_setVariableTop(coll, first, -1, &status);
1792
1793     if(U_SUCCESS(status)) {
1794       log_err("Invalid contraction succeded in setting variable top!\n");
1795     }
1796
1797   }
1798
1799   log_verbose("Test restoring variable top\n");
1800
1801   status = U_ZERO_ERROR;
1802   ucol_restoreVariableTop(coll, varTopOriginal, &status);
1803   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1804     log_err("Couldn't restore old variable top\n");
1805   }
1806
1807   log_verbose("Testing calling with error set\n");
1808
1809   status = U_INTERNAL_PROGRAM_ERROR;
1810   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1811   varTop2 = ucol_getVariableTop(coll, &status);
1812   ucol_restoreVariableTop(coll, varTop2, &status);
1813   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1814   varTop2 = ucol_getVariableTop(NULL, &status);
1815   ucol_restoreVariableTop(NULL, varTop2, &status);
1816   if(status != U_INTERNAL_PROGRAM_ERROR) {
1817     log_err("Bad reaction to passed error!\n");
1818   }
1819   ucol_close(coll);
1820   } else {
1821     log_data_err("Couldn't open UCA collator\n");
1822   }
1823 }
1824
1825 static void TestMaxVariable() {
1826   UErrorCode status = U_ZERO_ERROR;
1827   UColReorderCode oldMax, max;
1828   UCollator *coll;
1829
1830   static const UChar nul = 0;
1831   static const UChar space = 0x20;
1832   static const UChar dot = 0x2e;  /* punctuation */
1833   static const UChar degree = 0xb0;  /* symbol */
1834   static const UChar dollar = 0x24;  /* currency symbol */
1835   static const UChar zero = 0x30;  /* digit */
1836
1837   coll = ucol_open("", &status);
1838   if(U_FAILURE(status)) {
1839     log_data_err("Couldn't open root collator\n");
1840     return;
1841   }
1842
1843   oldMax = ucol_getMaxVariable(coll);
1844   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1845   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1846
1847   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1848   max = ucol_getMaxVariable(coll);
1849   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1850   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1851       !ucol_equal(coll, &nul, 0, &space, 1) ||
1852       ucol_equal(coll, &nul, 0, &dot, 1) ||
1853       ucol_equal(coll, &nul, 0, &degree, 1) ||
1854       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1855       ucol_equal(coll, &nul, 0, &zero, 1) ||
1856       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1857     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1858   }
1859
1860   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1861   max = ucol_getMaxVariable(coll);
1862   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1863   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1864       !ucol_equal(coll, &nul, 0, &space, 1) ||
1865       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1866       ucol_equal(coll, &nul, 0, &degree, 1) ||
1867       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1868       ucol_equal(coll, &nul, 0, &zero, 1) ||
1869       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1870     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1871   }
1872
1873   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1874   max = ucol_getMaxVariable(coll);
1875   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1876   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1877       !ucol_equal(coll, &nul, 0, &space, 1) ||
1878       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1879       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1880       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1881       ucol_equal(coll, &nul, 0, &zero, 1) ||
1882       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1883     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1884   }
1885
1886   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1887   max = ucol_getMaxVariable(coll);
1888   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1889   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1890       !ucol_equal(coll, &nul, 0, &space, 1) ||
1891       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1892       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1893       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1894       ucol_equal(coll, &nul, 0, &zero, 1) ||
1895       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1896     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1897   }
1898
1899   log_verbose("Test restoring maxVariable\n");
1900   status = U_ZERO_ERROR;
1901   ucol_setMaxVariable(coll, oldMax, &status);
1902   if(oldMax != ucol_getMaxVariable(coll)) {
1903     log_err("Couldn't restore old maxVariable\n");
1904   }
1905
1906   log_verbose("Testing calling with error set\n");
1907   status = U_INTERNAL_PROGRAM_ERROR;
1908   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1909   max = ucol_getMaxVariable(coll);
1910   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1911     log_err("Bad reaction to passed error!\n");
1912   }
1913   ucol_close(coll);
1914 }
1915
1916 static void TestNonChars(void) {
1917   static const char *test[] = {
1918       "\\u0000",  /* ignorable */
1919       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1920       "\\uFDD0", "\\uFDEF",
1921       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1922       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1923       "\\U0003FFFE", "\\U0003FFFF",
1924       "\\U0004FFFE", "\\U0004FFFF",
1925       "\\U0005FFFE", "\\U0005FFFF",
1926       "\\U0006FFFE", "\\U0006FFFF",
1927       "\\U0007FFFE", "\\U0007FFFF",
1928       "\\U0008FFFE", "\\U0008FFFF",
1929       "\\U0009FFFE", "\\U0009FFFF",
1930       "\\U000AFFFE", "\\U000AFFFF",
1931       "\\U000BFFFE", "\\U000BFFFF",
1932       "\\U000CFFFE", "\\U000CFFFF",
1933       "\\U000DFFFE", "\\U000DFFFF",
1934       "\\U000EFFFE", "\\U000EFFFF",
1935       "\\U000FFFFE", "\\U000FFFFF",
1936       "\\U0010FFFE", "\\U0010FFFF",
1937       "\\uFFFF"  /* special character with maximum primary weight */
1938   };
1939   UErrorCode status = U_ZERO_ERROR;
1940   UCollator *coll = ucol_open("en_US", &status);
1941
1942   log_verbose("Test non characters\n");
1943
1944   if(U_SUCCESS(status)) {
1945     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1946   } else {
1947     log_err_status(status, "Unable to open collator\n");
1948   }
1949
1950   ucol_close(coll);
1951 }
1952
1953 static void TestExtremeCompression(void) {
1954   static char *test[4];
1955   int32_t j = 0, i = 0;
1956
1957   for(i = 0; i<4; i++) {
1958     test[i] = (char *)malloc(2048*sizeof(char));
1959   }
1960
1961   for(j = 20; j < 500; j++) {
1962     for(i = 0; i<4; i++) {
1963       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1964       test[i][j-1] = (char)('a'+i);
1965       test[i][j] = 0;
1966     }
1967     genericLocaleStarter("en_US", (const char **)test, 4);
1968   }
1969
1970
1971   for(i = 0; i<4; i++) {
1972     free(test[i]);
1973   }
1974 }
1975
1976 #if 0
1977 static void TestExtremeCompression(void) {
1978   static char *test[4];
1979   int32_t j = 0, i = 0;
1980   UErrorCode status = U_ZERO_ERROR;
1981   UCollator *coll = ucol_open("en_US", status);
1982   for(i = 0; i<4; i++) {
1983     test[i] = (char *)malloc(2048*sizeof(char));
1984   }
1985   for(j = 10; j < 2048; j++) {
1986     for(i = 0; i<4; i++) {
1987       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1988       test[i][j-1] = (char)('a'+i);
1989       test[i][j] = 0;
1990     }
1991   }
1992   genericLocaleStarter("en_US", (const char **)test, 4);
1993
1994   for(j = 10; j < 2048; j++) {
1995     for(i = 0; i<1; i++) {
1996       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1997       test[i][j] = 0;
1998     }
1999   }
2000   for(i = 0; i<4; i++) {
2001     free(test[i]);
2002   }
2003 }
2004 #endif
2005
2006 static void TestSurrogates(void) {
2007   static const char *test[] = {
2008     "z","\\ud900\\udc25",  "\\ud805\\udc50",
2009        "\\ud800\\udc00y",  "\\ud800\\udc00r",
2010        "\\ud800\\udc00f",  "\\ud800\\udc00",
2011        "\\ud800\\udc00c", "\\ud800\\udc00b",
2012        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2013        "\\ud800\\udc00a",
2014        "c", "b"
2015   };
2016
2017   static const char *rule =
2018     "&z < \\ud900\\udc25   < \\ud805\\udc50"
2019        "< \\ud800\\udc00y  < \\ud800\\udc00r"
2020        "< \\ud800\\udc00f  << \\ud800\\udc00"
2021        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2022        "< \\ud800\\udc00a  < c < b" ;
2023
2024   genericRulesStarter(rule, test, 14);
2025 }
2026
2027 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2028 static void TestPrefix(void) {
2029   uint32_t i;
2030
2031   static const struct {
2032     const char *rules;
2033     const char *data[50];
2034     const uint32_t len;
2035   } tests[] = {
2036     { "&z <<< z|a",
2037       {"zz", "za"}, 2 },
2038
2039     { "&z <<< z|   a",
2040       {"zz", "za"}, 2 },
2041     { "[strength I]"
2042       "&a=\\ud900\\udc25"
2043       "&z<<<\\ud900\\udc25|a",
2044       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2045   };
2046
2047
2048   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2049     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2050   }
2051 }
2052
2053 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2054 /* JIS X 4061 collation order implementation                                   */
2055 static void TestNewJapanese(void) {
2056
2057   static const char * const test1[] = {
2058       "\\u30b7\\u30e3\\u30fc\\u30ec",
2059       "\\u30b7\\u30e3\\u30a4",
2060       "\\u30b7\\u30e4\\u30a3",
2061       "\\u30b7\\u30e3\\u30ec",
2062       "\\u3061\\u3087\\u3053",
2063       "\\u3061\\u3088\\u3053",
2064       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2065       "\\u3066\\u30fc\\u305f",
2066       "\\u30c6\\u30fc\\u30bf",
2067       "\\u30c6\\u30a7\\u30bf",
2068       "\\u3066\\u3048\\u305f",
2069       "\\u3067\\u30fc\\u305f",
2070       "\\u30c7\\u30fc\\u30bf",
2071       "\\u30c7\\u30a7\\u30bf",
2072       "\\u3067\\u3048\\u305f",
2073       "\\u3066\\u30fc\\u305f\\u30fc",
2074       "\\u30c6\\u30fc\\u30bf\\u30a1",
2075       "\\u30c6\\u30a7\\u30bf\\u30fc",
2076       "\\u3066\\u3047\\u305f\\u3041",
2077       "\\u3066\\u3048\\u305f\\u30fc",
2078       "\\u3067\\u30fc\\u305f\\u30fc",
2079       "\\u30c7\\u30fc\\u30bf\\u30a1",
2080       "\\u3067\\u30a7\\u305f\\u30a1",
2081       "\\u30c7\\u3047\\u30bf\\u3041",
2082       "\\u30c7\\u30a8\\u30bf\\u30a2",
2083       "\\u3072\\u3086",
2084       "\\u3073\\u3085\\u3042",
2085       "\\u3074\\u3085\\u3042",
2086       "\\u3073\\u3085\\u3042\\u30fc",
2087       "\\u30d3\\u30e5\\u30a2\\u30fc",
2088       "\\u3074\\u3085\\u3042\\u30fc",
2089       "\\u30d4\\u30e5\\u30a2\\u30fc",
2090       "\\u30d2\\u30e5\\u30a6",
2091       "\\u30d2\\u30e6\\u30a6",
2092       "\\u30d4\\u30e5\\u30a6\\u30a2",
2093       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2094       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2095       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2096       "\\u3072\\u3085\\u3093",
2097       "\\u3074\\u3085\\u3093",
2098       "\\u3075\\u30fc\\u308a",
2099       "\\u30d5\\u30fc\\u30ea",
2100       "\\u3075\\u3045\\u308a",
2101       "\\u3075\\u30a5\\u308a",
2102       "\\u3075\\u30a5\\u30ea",
2103       "\\u30d5\\u30a6\\u30ea",
2104       "\\u3076\\u30fc\\u308a",
2105       "\\u30d6\\u30fc\\u30ea",
2106       "\\u3076\\u3045\\u308a",
2107       "\\u30d6\\u30a5\\u308a",
2108       "\\u3077\\u3046\\u308a",
2109       "\\u30d7\\u30a6\\u30ea",
2110       "\\u3075\\u30fc\\u308a\\u30fc",
2111       "\\u30d5\\u30a5\\u30ea\\u30fc",
2112       "\\u3075\\u30a5\\u308a\\u30a3",
2113       "\\u30d5\\u3045\\u308a\\u3043",
2114       "\\u30d5\\u30a6\\u30ea\\u30fc",
2115       "\\u3075\\u3046\\u308a\\u3043",
2116       "\\u30d6\\u30a6\\u30ea\\u30a4",
2117       "\\u3077\\u30fc\\u308a\\u30fc",
2118       "\\u3077\\u30a5\\u308a\\u30a4",
2119       "\\u3077\\u3046\\u308a\\u30fc",
2120       "\\u30d7\\u30a6\\u30ea\\u30a4",
2121       "\\u30d5\\u30fd",
2122       "\\u3075\\u309e",
2123       "\\u3076\\u309d",
2124       "\\u3076\\u3075",
2125       "\\u3076\\u30d5",
2126       "\\u30d6\\u3075",
2127       "\\u30d6\\u30d5",
2128       "\\u3076\\u309e",
2129       "\\u3076\\u3077",
2130       "\\u30d6\\u3077",
2131       "\\u3077\\u309d",
2132       "\\u30d7\\u30fd",
2133       "\\u3077\\u3075",
2134 };
2135
2136   static const char *test2[] = {
2137     "\\u306f\\u309d", /* H\\u309d */
2138     "\\u30cf\\u30fd", /* K\\u30fd */
2139     "\\u306f\\u306f", /* HH */
2140     "\\u306f\\u30cf", /* HK */
2141     "\\u30cf\\u30cf", /* KK */
2142     "\\u306f\\u309e", /* H\\u309e */
2143     "\\u30cf\\u30fe", /* K\\u30fe */
2144     "\\u306f\\u3070", /* HH\\u309b */
2145     "\\u30cf\\u30d0", /* KK\\u309b */
2146     "\\u306f\\u3071", /* HH\\u309c */
2147     "\\u30cf\\u3071", /* KH\\u309c */
2148     "\\u30cf\\u30d1", /* KK\\u309c */
2149     "\\u3070\\u309d", /* H\\u309b\\u309d */
2150     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2151     "\\u3070\\u306f", /* H\\u309bH */
2152     "\\u30d0\\u30cf", /* K\\u309bK */
2153     "\\u3070\\u309e", /* H\\u309b\\u309e */
2154     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2155     "\\u3070\\u3070", /* H\\u309bH\\u309b */
2156     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2157     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2158     "\\u3070\\u3071", /* H\\u309bH\\u309c */
2159     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2160     "\\u3071\\u309d", /* H\\u309c\\u309d */
2161     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2162     "\\u3071\\u306f", /* H\\u309cH */
2163     "\\u30d1\\u30cf", /* K\\u309cK */
2164     "\\u3071\\u3070", /* H\\u309cH\\u309b */
2165     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2166     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2167     "\\u3071\\u3071", /* H\\u309cH\\u309c */
2168     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2169   };
2170   /*
2171   static const char *test3[] = {
2172     "\\u221er\\u221e",
2173     "\\u221eR#",
2174     "\\u221et\\u221e",
2175     "#r\\u221e",
2176     "#R#",
2177     "#t%",
2178     "#T%",
2179     "8t\\u221e",
2180     "8T\\u221e",
2181     "8t#",
2182     "8T#",
2183     "8t%",
2184     "8T%",
2185     "8t8",
2186     "8T8",
2187     "\\u03c9r\\u221e",
2188     "\\u03a9R%",
2189     "rr\\u221e",
2190     "rR\\u221e",
2191     "Rr\\u221e",
2192     "RR\\u221e",
2193     "RT%",
2194     "rt8",
2195     "tr\\u221e",
2196     "tr8",
2197     "TR8",
2198     "tt8",
2199     "\\u30b7\\u30e3\\u30fc\\u30ec",
2200   };
2201   */
2202   static const UColAttribute att[] = { UCOL_STRENGTH };
2203   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2204
2205   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2206   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2207
2208   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2209   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2210   /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2211   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2212   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2213 }
2214
2215 static void TestStrCollIdenticalPrefix(void) {
2216   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2217   const char* test[] = {
2218     "ab\\ud9b0\\udc70",
2219     "ab\\ud9b0\\udc71"
2220   };
2221   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2222 }
2223 /* Contractions should have all their canonically equivalent */
2224 /* strings included */
2225 static void TestContractionClosure(void) {
2226   static const struct {
2227     const char *rules;
2228     const char *data[10];
2229     const uint32_t len;
2230   } tests[] = {
2231     {   "&b=\\u00e4\\u00e4",
2232       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2233     {   "&b=\\u00C5",
2234       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2235   };
2236   uint32_t i;
2237
2238
2239   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2240     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2241   }
2242 }
2243
2244 /* This tests also fails*/
2245 static void TestBeforePrefixFailure(void) {
2246   static const struct {
2247     const char *rules;
2248     const char *data[10];
2249     const uint32_t len;
2250   } tests[] = {
2251     { "&g <<< a"
2252       "&[before 3]\\uff41 <<< x",
2253       {"x", "\\uff41"}, 2 },
2254     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2255         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2256         "&[before 3]\\u30a7<<<\\u30a9",
2257       {"\\u30a9", "\\u30a7"}, 2 },
2258     {   "&[before 3]\\u30a7<<<\\u30a9"
2259         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2260         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2261       {"\\u30a9", "\\u30a7"}, 2 },
2262   };
2263   uint32_t i;
2264
2265
2266   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2267     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2268   }
2269
2270 #if 0
2271   const char* rule1 =
2272         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2273         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2274         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2275   const char* rule2 =
2276         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2277         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2278         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2279   const char* test[] = {
2280       "\\u30c6\\u30fc\\u30bf",
2281       "\\u30c6\\u30a7\\u30bf",
2282   };
2283   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2284   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2285 /* this piece of code should be in some sort of verbose mode     */
2286 /* it gets the collation elements for elements and prints them   */
2287 /* This is useful when trying to see whether the problem is      */
2288   {
2289     UErrorCode status = U_ZERO_ERROR;
2290     uint32_t i = 0;
2291     UCollationElements *it = NULL;
2292     uint32_t CE;
2293     UChar string[256];
2294     uint32_t uStringLen;
2295     UCollator *coll = NULL;
2296
2297     uStringLen = u_unescape(rule1, string, 256);
2298
2299     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2300
2301     /*coll = ucol_open("ja_JP_JIS", &status);*/
2302     it = ucol_openElements(coll, string, 0, &status);
2303
2304     for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2305       log_verbose("%s\n", test[i]);
2306       uStringLen = u_unescape(test[i], string, 256);
2307       ucol_setText(it, string, uStringLen, &status);
2308
2309       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2310         log_verbose("%08X\n", CE);
2311       }
2312       log_verbose("\n");
2313
2314     }
2315
2316     ucol_closeElements(it);
2317     ucol_close(coll);
2318   }
2319 #endif
2320 }
2321
2322 static void TestPrefixCompose(void) {
2323   const char* rule1 =
2324         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2325   /*
2326   const char* test[] = {
2327       "\\u30c6\\u30fc\\u30bf",
2328       "\\u30c6\\u30a7\\u30bf",
2329   };
2330   */
2331   {
2332     UErrorCode status = U_ZERO_ERROR;
2333     /*uint32_t i = 0;*/
2334     /*UCollationElements *it = NULL;*/
2335 /*    uint32_t CE;*/
2336     UChar string[256];
2337     uint32_t uStringLen;
2338     UCollator *coll = NULL;
2339
2340     uStringLen = u_unescape(rule1, string, 256);
2341
2342     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2343     ucol_close(coll);
2344   }
2345
2346
2347 }
2348
2349 /*
2350 [last variable] last variable value
2351 [last primary ignorable] largest CE for primary ignorable
2352 [last secondary ignorable] largest CE for secondary ignorable
2353 [last tertiary ignorable] largest CE for tertiary ignorable
2354 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2355 */
2356
2357 static void TestRuleOptions(void) {
2358   /* values here are hardcoded and are correct for the current UCA
2359    * when the UCA changes, one might be forced to change these
2360    * values.
2361    */
2362
2363   /*
2364    * These strings contain the last character before [variable top]
2365    * and the first and second characters (by primary weights) after it.
2366    * See FractionalUCA.txt. For example:
2367       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2368       [variable top = 0C FE]
2369       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2370      and
2371       00B4; [0D 0C, 05, 05]
2372    *
2373    * Note: Starting with UCA 6.0, the [variable top] collation element
2374    * is not the weight of any character or string,
2375    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2376    */
2377 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2378 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2379 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2380
2381   /*
2382    * This string has to match the character that has the [last regular] weight
2383    * which changes with each UCA version.
2384    * See the bottom of FractionalUCA.txt which says something like
2385       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2386    *
2387    * Note: Starting with UCA 6.0, the [last regular] collation element
2388    * is not the weight of any character or string,
2389    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2390    */
2391 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2392
2393   static const struct {
2394     const char *rules;
2395     const char *data[10];
2396     const uint32_t len;
2397   } tests[] = {
2398 #if 0
2399     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2400     /* - all befores here amount to zero */
2401     { "&[before 3][first tertiary ignorable]<<<a",
2402         { "\\u0000", "a"}, 2
2403     }, /* you cannot go before first tertiary ignorable */
2404
2405     { "&[before 3][last tertiary ignorable]<<<a",
2406         { "\\u0000", "a"}, 2
2407     }, /* you cannot go before last tertiary ignorable */
2408 #endif
2409     /*
2410      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2411      * and it *is* possible to "go before" that.
2412      */
2413     { "&[before 3][first secondary ignorable]<<<a",
2414         { "\\u0000", "a"}, 2
2415     },
2416
2417     { "&[before 3][last secondary ignorable]<<<a",
2418         { "\\u0000", "a"}, 2
2419     },
2420
2421     /* 'normal' befores */
2422
2423     /*
2424      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2425      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2426      * because there is no tailoring space before that boundary.
2427      * Made the tests work by tailoring to a space instead.
2428      */
2429     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2430         {  "c", "b", "\\u0332", "a" }, 4
2431     },
2432
2433     /* we don't have a code point that corresponds to
2434      * the last primary ignorable
2435      */
2436     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2437         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2438     },
2439
2440     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2441         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2442     },
2443
2444     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2445         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2446     },
2447
2448     { "&[first regular]<a"
2449       "&[before 1][first regular]<b",
2450       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2451     },
2452
2453     { "&[before 1][last regular]<b"
2454       "&[last regular]<a",
2455         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2456     },
2457
2458     { "&[before 1][first implicit]<b"
2459       "&[first implicit]<a",
2460         { "b", "\\u4e00", "a", "\\u4e01"}, 4
2461     },
2462 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2463     { "&[before 1][last implicit]<b"
2464       "&[last implicit]<a",
2465         { "b", "\\U0010FFFD", "a" }, 3
2466     },
2467 #endif
2468     { "&[last variable]<z"
2469       "&' '<x"  /* was &[last primary ignorable]<x, see above */
2470       "&[last secondary ignorable]<<y"
2471       "&[last tertiary ignorable]<<<w"
2472       "&[top]<u",
2473       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2474     }
2475
2476   };
2477   uint32_t i;
2478
2479   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2480     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2481   }
2482 }
2483
2484
2485 static void TestOptimize(void) {
2486   /* this is not really a test - just trying out
2487    * whether copying of UCA contents will fail
2488    * Cannot really test, since the functionality
2489    * remains the same.
2490    */
2491   static const struct {
2492     const char *rules;
2493     const char *data[10];
2494     const uint32_t len;
2495   } tests[] = {
2496     /* - all befores here amount to zero */
2497     { "[optimize [\\uAC00-\\uD7FF]]",
2498     { "a", "b"}, 2}
2499   };
2500   uint32_t i;
2501
2502   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2503     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2504   }
2505 }
2506
2507 /*
2508 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2509 weiv    ucol_strcollIter?
2510 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2511 weiv    these are the input strings?
2512 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2513 weiv    will check - could be a problem with utf-8 iterator
2514 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2515 weiv    hmmm
2516 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2517 weiv    that doesn't sound right
2518 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2519 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2520 cycheng@ca.ibm.c... yes
2521 weiv    and then do the comparison
2522 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2523 weiv    utf-16 strings look like a little endian ones in the example you sent me
2524 weiv    It could be a bug - let me try to test it out
2525 cycheng@ca.ibm.c... ok
2526 cycheng@ca.ibm.c... we can wait till the conf. call
2527 cycheng@ca.ibm.c... next weke
2528 weiv    that would be great
2529 weiv    hmmm
2530 weiv    I might be wrong
2531 weiv    let me play with it some more
2532 cycheng@ca.ibm.c... ok
2533 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2534 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2535 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2536 weiv    ok
2537 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2538 weiv    thanks
2539 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2540 */
2541 #if 0
2542 static void Alexis(void) {
2543   UErrorCode status = U_ZERO_ERROR;
2544   UCollator *coll = ucol_open("", &status);
2545
2546
2547   const char utf16be[2][4] = {
2548     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2549     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2550   };
2551
2552   const char utf8[2][4] = {
2553     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2554     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2555   };
2556
2557   UCharIterator iterU161, iterU162;
2558   UCharIterator iterU81, iterU82;
2559
2560   UCollationResult resU16, resU8;
2561
2562   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2563   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2564
2565   uiter_setUTF8(&iterU81, utf8[0], 4);
2566   uiter_setUTF8(&iterU82, utf8[1], 4);
2567
2568   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2569
2570   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2571   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2572
2573
2574   if(resU16 != resU8) {
2575     log_err("different results\n");
2576   }
2577
2578   ucol_close(coll);
2579 }
2580 #endif
2581
2582 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2583 static void Alexis2(void) {
2584   UErrorCode status = U_ZERO_ERROR;
2585   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2589
2590   UConverter *conv = NULL;
2591
2592   UCharIterator U16BEItS, U16BEItT;
2593   UCharIterator U8ItS, U8ItT;
2594
2595   UCollationResult resU16, resU16BE, resU8;
2596
2597   static const char* const pairs[][2] = {
2598     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2599     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2600     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2601     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2602     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2603     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2604     { "\\u0020", "\\u0020\\u0000"}
2605 /*
2606 5F20 (my result here)
2607 5F204E008E3F
2608 5F20 (your result here)
2609 */
2610   };
2611
2612   int32_t i = 0;
2613
2614   UCollator *coll = ucol_open("", &status);
2615   if(status == U_FILE_ACCESS_ERROR) {
2616     log_data_err("Is your data around?\n");
2617     return;
2618   } else if(U_FAILURE(status)) {
2619     log_err("Error opening collator\n");
2620     return;
2621   }
2622   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2623   conv = ucnv_open("UTF16BE", &status);
2624   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2625     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627
2628     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2629
2630     log_verbose("Result of strcoll is %i\n", resU16);
2631
2632     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2633     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2634     (void)U16BELenS;    /* Suppress set but not used warnings. */
2635     (void)U16BELenT;
2636
2637     /* use the original sizes, as the result from converter is in bytes */
2638     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2639     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2640
2641     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2642
2643     log_verbose("Result of U16BE is %i\n", resU16BE);
2644
2645     if(resU16 != resU16BE) {
2646       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2647     }
2648
2649     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2650     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2651
2652     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2653     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2654
2655     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2656
2657     if(resU16 != resU8) {
2658       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2659     }
2660
2661   }
2662
2663   ucol_close(coll);
2664   ucnv_close(conv);
2665 }
2666
2667 static void TestHebrewUCA(void) {
2668   UErrorCode status = U_ZERO_ERROR;
2669   static const char *first[] = {
2670     "d790d6b8d79cd795d6bcd7a9",
2671     "d790d79cd79ed7a7d799d799d7a1",
2672     "d790d6b4d79ed795d6bcd7a9",
2673   };
2674
2675   char utf8String[3][256];
2676   UChar utf16String[3][256];
2677
2678   int32_t i = 0, j = 0;
2679   int32_t sizeUTF8[3];
2680   int32_t sizeUTF16[3];
2681
2682   UCollator *coll = ucol_open("", &status);
2683   if (U_FAILURE(status)) {
2684       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2685       return;
2686   }
2687   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2688
2689   for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2690     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2691     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2692     log_verbose("%i: ");
2693     for(j = 0; j < sizeUTF16[i]; j++) {
2694       /*log_verbose("\\u%04X", utf16String[i][j]);*/
2695       log_verbose("%04X", utf16String[i][j]);
2696     }
2697     log_verbose("\n");
2698   }
2699   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2700     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2701       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2702     }
2703   }
2704
2705   ucol_close(coll);
2706
2707 }
2708
2709 static void TestPartialSortKeyTermination(void) {
2710   static const char* cases[] = {
2711     "\\u1234\\u1234\\udc00",
2712     "\\udc00\\ud800\\ud800"
2713   };
2714
2715   int32_t i;
2716
2717   UErrorCode status = U_ZERO_ERROR;
2718
2719   UCollator *coll = ucol_open("", &status);
2720
2721   UCharIterator iter;
2722
2723   UChar currCase[256];
2724   int32_t length = 0;
2725   int32_t pKeyLen = 0;
2726
2727   uint8_t key[256];
2728
2729   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2730     uint32_t state[2] = {0, 0};
2731     length = u_unescape(cases[i], currCase, 256);
2732     uiter_setString(&iter, currCase, length);
2733     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2734     (void)pKeyLen;   /* Suppress set but not used warning. */
2735
2736     log_verbose("Done\n");
2737
2738   }
2739   ucol_close(coll);
2740 }
2741
2742 static void TestSettings(void) {
2743   static const char* cases[] = {
2744     "apple",
2745       "Apple"
2746   };
2747
2748   static const char* locales[] = {
2749     "",
2750       "en"
2751   };
2752
2753   UErrorCode status = U_ZERO_ERROR;
2754
2755   int32_t i = 0, j = 0;
2756
2757   UChar source[256], target[256];
2758   int32_t sLen = 0, tLen = 0;
2759
2760   UCollator *collateObject = NULL;
2761   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2762     collateObject = ucol_open(locales[i], &status);
2763     ucol_setStrength(collateObject, UCOL_PRIMARY);
2764     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2765     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2766       sLen = u_unescape(cases[j-1], source, 256);
2767       source[sLen] = 0;
2768       tLen = u_unescape(cases[j], target, 256);
2769       source[tLen] = 0;
2770       doTest(collateObject, source, target, UCOL_EQUAL);
2771     }
2772     ucol_close(collateObject);
2773   }
2774 }
2775
2776 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2777     UErrorCode status = U_ZERO_ERROR;
2778     int32_t errorNo = 0;
2779     const UChar *sourceRules = NULL;
2780     int32_t sourceRulesLen = 0;
2781     UParseError parseError;
2782     UColAttributeValue french = UCOL_OFF;
2783
2784     if(!ucol_equals(source, target)) {
2785         log_err("Same collators, different address not equal\n");
2786         errorNo++;
2787     }
2788     ucol_close(target);
2789     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2790         target = ucol_safeClone(source, NULL, NULL, &status);
2791         if(U_FAILURE(status)) {
2792             log_err("Error creating clone\n");
2793             errorNo++;
2794             return errorNo;
2795         }
2796         if(!ucol_equals(source, target)) {
2797             log_err("Collator different from it's clone\n");
2798             errorNo++;
2799         }
2800         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2801         if(french == UCOL_ON) {
2802             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2803         } else {
2804             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2805         }
2806         if(U_FAILURE(status)) {
2807             log_err("Error setting attributes\n");
2808             errorNo++;
2809             return errorNo;
2810         }
2811         if(ucol_equals(source, target)) {
2812             log_err("Collators same even when options changed\n");
2813             errorNo++;
2814         }
2815         ucol_close(target);
2816
2817         sourceRules = ucol_getRules(source, &sourceRulesLen);
2818         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2819         if(U_FAILURE(status)) {
2820             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2821             errorNo++;
2822             return errorNo;
2823         }
2824         /* Note: The tailoring rule string is an optional data item. */
2825         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2826             log_err("Collator different from collator that was created from the same rules\n");
2827             errorNo++;
2828         }
2829         ucol_close(target);
2830     }
2831     return errorNo;
2832 }
2833
2834
2835 static void TestEquals(void) {
2836     /* ucol_equals is not currently a public API. There is a chance that it will become
2837     * something like this.
2838     */
2839     /* test whether the two collators instantiated from the same locale are equal */
2840     UErrorCode status = U_ZERO_ERROR;
2841     UParseError parseError;
2842     int32_t noOfLoc = uloc_countAvailable();
2843     const char *locName = NULL;
2844     UCollator *source = NULL, *target = NULL;
2845     int32_t i = 0;
2846
2847     const char* rules[] = {
2848         "&l < lj <<< Lj <<< LJ",
2849         "&n < nj <<< Nj <<< NJ",
2850         "&ae <<< \\u00e4",
2851         "&AE <<< \\u00c4"
2852     };
2853     /*
2854     const char* badRules[] = {
2855     "&l <<< Lj",
2856     "&n < nj <<< nJ <<< NJ",
2857     "&a <<< \\u00e4",
2858     "&AE <<< \\u00c4 <<< x"
2859     };
2860     */
2861
2862     UChar sourceRules[1024], targetRules[1024];
2863     int32_t sourceRulesSize = 0, targetRulesSize = 0;
2864     int32_t rulesSize = UPRV_LENGTHOF(rules);
2865
2866     for(i = 0; i < rulesSize; i++) {
2867         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2868         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2869     }
2870
2871     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2872     if(status == U_FILE_ACCESS_ERROR) {
2873         log_data_err("Is your data around?\n");
2874         return;
2875     } else if(U_FAILURE(status)) {
2876         log_err("Error opening collator\n");
2877         return;
2878     }
2879     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2880     if(!ucol_equals(source, target)) {
2881         log_err("Equivalent collators not equal!\n");
2882     }
2883     ucol_close(source);
2884     ucol_close(target);
2885
2886     source = ucol_open("root", &status);
2887     target = ucol_open("root", &status);
2888     log_verbose("Testing root\n");
2889     if(!ucol_equals(source, source)) {
2890         log_err("Same collator not equal\n");
2891     }
2892     if(TestEqualsForCollator("root", source, target)) {
2893         log_err("Errors for root\n");
2894     }
2895     ucol_close(source);
2896
2897     for(i = 0; i<noOfLoc; i++) {
2898         status = U_ZERO_ERROR;
2899         locName = uloc_getAvailable(i);
2900         /*if(hasCollationElements(locName)) {*/
2901         log_verbose("Testing equality for locale %s\n", locName);
2902         source = ucol_open(locName, &status);
2903         target = ucol_open(locName, &status);
2904         if (U_FAILURE(status)) {
2905             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2906             continue;
2907         }
2908         if(TestEqualsForCollator(locName, source, target)) {
2909             log_err("Errors for locale %s\n", locName);
2910         }
2911         ucol_close(source);
2912         /*}*/
2913     }
2914 }
2915
2916 static void TestJ2726(void) {
2917     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2918     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2919     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2920     UErrorCode status = U_ZERO_ERROR;
2921     UCollator *coll = ucol_open("en", &status);
2922     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2923     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2924     doTest(coll, a, aSpace, UCOL_EQUAL);
2925     doTest(coll, aSpace, a, UCOL_EQUAL);
2926     doTest(coll, a, spaceA, UCOL_EQUAL);
2927     doTest(coll, spaceA, a, UCOL_EQUAL);
2928     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2929     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2930     ucol_close(coll);
2931 }
2932
2933 static void NullRule(void) {
2934     UChar r[3] = {0};
2935     UErrorCode status = U_ZERO_ERROR;
2936     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2937     if(U_SUCCESS(status)) {
2938         log_err("This should have been an error!\n");
2939         ucol_close(coll);
2940     } else {
2941         status = U_ZERO_ERROR;
2942     }
2943     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2944     if(U_FAILURE(status)) {
2945         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2946     } else {
2947         ucol_close(coll);
2948     }
2949 }
2950
2951 /**
2952  * Test for CollationElementIterator previous and next for the whole set of
2953  * unicode characters with normalization on.
2954  */
2955 static void TestNumericCollation(void)
2956 {
2957     UErrorCode status = U_ZERO_ERROR;
2958
2959     const static char *basicTestStrings[]={
2960     "hello1",
2961     "hello2",
2962     "hello2002",
2963     "hello2003",
2964     "hello123456",
2965     "hello1234567",
2966     "hello10000000",
2967     "hello100000000",
2968     "hello1000000000",
2969     "hello10000000000",
2970     };
2971
2972     const static char *preZeroTestStrings[]={
2973     "avery10000",
2974     "avery010000",
2975     "avery0010000",
2976     "avery00010000",
2977     "avery000010000",
2978     "avery0000010000",
2979     "avery00000010000",
2980     "avery000000010000",
2981     };
2982
2983     const static char *thirtyTwoBitNumericStrings[]={
2984     "avery42949672960",
2985     "avery42949672961",
2986     "avery42949672962",
2987     "avery429496729610"
2988     };
2989
2990      const static char *longNumericStrings[]={
2991      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2992         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2993         are treated as multiple collation elements. */
2994     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2995     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2996     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2997     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2998     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2999     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3000     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3001     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3002     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3003     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3004     };
3005
3006     const static char *supplementaryDigits[] = {
3007       "\\uD835\\uDFCE", /* 0 */
3008       "\\uD835\\uDFCF", /* 1 */
3009       "\\uD835\\uDFD0", /* 2 */
3010       "\\uD835\\uDFD1", /* 3 */
3011       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3012       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3013       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3014       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3015       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3016       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3017     };
3018
3019     const static char *foreignDigits[] = {
3020       "\\u0661",
3021         "\\u0662",
3022         "\\u0663",
3023       "\\u0661\\u0660",
3024       "\\u0661\\u0662",
3025       "\\u0661\\u0663",
3026       "\\u0662\\u0660",
3027       "\\u0662\\u0662",
3028       "\\u0662\\u0663",
3029       "\\u0663\\u0660",
3030       "\\u0663\\u0662",
3031       "\\u0663\\u0663"
3032     };
3033
3034     const static char *evenZeroes[] = {
3035       "2000",
3036       "2001",
3037         "2002",
3038         "2003"
3039     };
3040
3041     UColAttribute att = UCOL_NUMERIC_COLLATION;
3042     UColAttributeValue val = UCOL_ON;
3043
3044     /* Open our collator. */
3045     UCollator* coll = ucol_open("root", &status);
3046     if (U_FAILURE(status)){
3047         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3048               myErrorName(status));
3049         return;
3050     }
3051     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3052     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3053     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3054     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3055     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3056     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3057
3058     /* Setting up our collator to do digits. */
3059     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3060     if (U_FAILURE(status)){
3061         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3062               myErrorName(status));
3063         return;
3064     }
3065
3066     /*
3067        Testing that prepended zeroes still yield the correct collation behavior.
3068        We expect that every element in our strings array will be equal.
3069     */
3070     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3071
3072     ucol_close(coll);
3073 }
3074
3075 static void TestTibetanConformance(void)
3076 {
3077     const char* test[] = {
3078         "\\u0FB2\\u0591\\u0F71\\u0061",
3079         "\\u0FB2\\u0F71\\u0061"
3080     };
3081
3082     UErrorCode status = U_ZERO_ERROR;
3083     UCollator *coll = ucol_open("", &status);
3084     UChar source[100];
3085     UChar target[100];
3086     int result;
3087     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3088     if (U_SUCCESS(status)) {
3089         u_unescape(test[0], source, 100);
3090         u_unescape(test[1], target, 100);
3091         doTest(coll, source, target, UCOL_EQUAL);
3092         result = ucol_strcoll(coll, source, -1,   target, -1);
3093         log_verbose("result %d\n", result);
3094         if (UCOL_EQUAL != result) {
3095             log_err("Tibetan comparison error\n");
3096         }
3097     }
3098     ucol_close(coll);
3099
3100     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3101 }
3102
3103 static void TestPinyinProblem(void) {
3104     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3105     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3106 }
3107
3108 /**
3109  * Iterate through the given iterator, checking to see that all the strings
3110  * in the expected array are present.
3111  * @param expected array of strings we expect to see, or NULL
3112  * @param expectedCount number of elements of expected, or 0
3113  */
3114 static int32_t checkUEnumeration(const char* msg,
3115                                  UEnumeration* iter,
3116                                  const char** expected,
3117                                  int32_t expectedCount) {
3118     UErrorCode ec = U_ZERO_ERROR;
3119     int32_t i = 0, n, j, bit;
3120     int32_t seenMask = 0;
3121
3122     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3123     n = uenum_count(iter, &ec);
3124     if (!assertSuccess("count", &ec)) return -1;
3125     log_verbose("%s = [", msg);
3126     for (;; ++i) {
3127         const char* s = uenum_next(iter, NULL, &ec);
3128         if (!assertSuccess("snext", &ec) || s == NULL) break;
3129         if (i != 0) log_verbose(",");
3130         log_verbose("%s", s);
3131         /* check expected list */
3132         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3133             if ((seenMask&bit) == 0 &&
3134                 uprv_strcmp(s, expected[j]) == 0) {
3135                 seenMask |= bit;
3136                 break;
3137             }
3138         }
3139     }
3140     log_verbose("] (%d)\n", i);
3141     assertTrue("count verified", i==n);
3142     /* did we see all expected strings? */
3143     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3144         if ((seenMask&bit)!=0) {
3145             log_verbose("Ok: \"%s\" seen\n", expected[j]);
3146         } else {
3147             log_err("FAIL: \"%s\" not seen\n", expected[j]);
3148         }
3149     }
3150     return n;
3151 }
3152
3153 /**
3154  * Test new API added for separate collation tree.
3155  */
3156 static void TestSeparateTrees(void) {
3157     UErrorCode ec = U_ZERO_ERROR;
3158     UEnumeration *e = NULL;
3159     int32_t n = -1;
3160     UBool isAvailable;
3161     char loc[256];
3162
3163     static const char* AVAIL[] = { "en", "de" };
3164
3165     static const char* KW[] = { "collation" };
3166
3167     static const char* KWVAL[] = { "phonebook", "stroke" };
3168
3169 #if !UCONFIG_NO_SERVICE
3170     e = ucol_openAvailableLocales(&ec);
3171     if (e != NULL) {
3172         assertSuccess("ucol_openAvailableLocales", &ec);
3173         assertTrue("ucol_openAvailableLocales!=0", e!=0);
3174         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3175         (void)n;    /* Suppress set but not used warnings. */
3176         /* Don't need to check n because we check list */
3177         uenum_close(e);
3178     } else {
3179         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3180     }
3181 #endif
3182
3183     e = ucol_getKeywords(&ec);
3184     if (e != NULL) {
3185         assertSuccess("ucol_getKeywords", &ec);
3186         assertTrue("ucol_getKeywords!=0", e!=0);
3187         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3188         /* Don't need to check n because we check list */
3189         uenum_close(e);
3190     } else {
3191         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3192     }
3193
3194     e = ucol_getKeywordValues(KW[0], &ec);
3195     if (e != NULL) {
3196         assertSuccess("ucol_getKeywordValues", &ec);
3197         assertTrue("ucol_getKeywordValues!=0", e!=0);
3198         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3199         /* Don't need to check n because we check list */
3200         uenum_close(e);
3201     } else {
3202         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3203     }
3204
3205     /* Try setting a warning before calling ucol_getKeywordValues */
3206     ec = U_USING_FALLBACK_WARNING;
3207     e = ucol_getKeywordValues(KW[0], &ec);
3208     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3209         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3210         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3211         /* Don't need to check n because we check list */
3212         uenum_close(e);
3213     }
3214
3215     /*
3216 U_DRAFT int32_t U_EXPORT2
3217 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3218                              const char* locale, UBool* isAvailable,
3219                              UErrorCode* status);
3220 }
3221 */
3222     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3223                                      &isAvailable, &ec);
3224     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3225         assertEquals("getFunctionalEquivalent(de)", "root", loc);
3226         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3227                    isAvailable == TRUE);
3228     }
3229
3230     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3231                                      &isAvailable, &ec);
3232     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3233         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3234         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3235                    isAvailable == FALSE);
3236     }
3237 }
3238
3239 /* supercedes TestJ784 */
3240 static void TestBeforePinyin(void) {
3241     const static char rules[] = {
3242         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3243         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3244         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3245         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3246         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3247         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3248     };
3249
3250     const static char *test[] = {
3251         "l\\u0101",
3252         "la",
3253         "l\\u0101n",
3254         "lan ",
3255         "l\\u0113",
3256         "le",
3257         "l\\u0113n",
3258         "len"
3259     };
3260
3261     const static char *test2[] = {
3262         "x\\u0101",
3263         "x\\u0100",
3264         "X\\u0101",
3265         "X\\u0100",
3266         "x\\u00E1",
3267         "x\\u00C1",
3268         "X\\u00E1",
3269         "X\\u00C1",
3270         "x\\u01CE",
3271         "x\\u01CD",
3272         "X\\u01CE",
3273         "X\\u01CD",
3274         "x\\u00E0",
3275         "x\\u00C0",
3276         "X\\u00E0",
3277         "X\\u00C0",
3278         "xa",
3279         "xA",
3280         "Xa",
3281         "XA",
3282         "x\\u0101x",
3283         "x\\u0100x",
3284         "x\\u00E1x",
3285         "x\\u00C1x",
3286         "x\\u01CEx",
3287         "x\\u01CDx",
3288         "x\\u00E0x",
3289         "x\\u00C0x",
3290         "xax",
3291         "xAx"
3292     };
3293
3294     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3295     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3296     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3297     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3298 }
3299
3300 static void TestBeforeTightening(void) {
3301     static const struct {
3302         const char *rules;
3303         UErrorCode expectedStatus;
3304     } tests[] = {
3305         { "&[before 1]a<x", U_ZERO_ERROR },
3306         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3307         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3308         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3309         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3310         { "&[before 2]a<<x",U_ZERO_ERROR },
3311         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3312         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3313         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3314         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3315         { "&[before 3]a<<<x",U_ZERO_ERROR },
3316         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3317         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3318     };
3319
3320     int32_t i = 0;
3321
3322     UErrorCode status = U_ZERO_ERROR;
3323     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3324     uint32_t rlen = 0;
3325
3326     UCollator *coll = NULL;
3327
3328
3329     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3330         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3331         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3332         if(status != tests[i].expectedStatus) {
3333             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3334                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3335         }
3336         ucol_close(coll);
3337         status = U_ZERO_ERROR;
3338     }
3339
3340 }
3341
3342 /*
3343 &m < a
3344 &[before 1] a < x <<< X << q <<< Q < z
3345 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3346
3347 &m < a
3348 &[before 2] a << x <<< X << q <<< Q < z
3349 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3350
3351 &m < a
3352 &[before 3] a <<< x <<< X << q <<< Q < z
3353 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3354
3355
3356 &m << a
3357 &[before 1] a < x <<< X << q <<< Q < z
3358 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3359
3360 &m << a
3361 &[before 2] a << x <<< X << q <<< Q < z
3362 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3363
3364 &m << a
3365 &[before 3] a <<< x <<< X << q <<< Q < z
3366 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3367
3368
3369 &m <<< a
3370 &[before 1] a < x <<< X << q <<< Q < z
3371 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3372
3373 &m <<< a
3374 &[before 2] a << x <<< X << q <<< Q < z
3375 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3376
3377 &m <<< a
3378 &[before 3] a <<< x <<< X << q <<< Q < z
3379 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3380
3381
3382 &[before 1] s < x <<< X << q <<< Q < z
3383 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3384
3385 &[before 2] s << x <<< X << q <<< Q < z
3386 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3387
3388 &[before 3] s <<< x <<< X << q <<< Q < z
3389 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3390
3391
3392 &[before 1] \u24DC < x <<< X << q <<< Q < z
3393 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3394
3395 &[before 2] \u24DC << x <<< X << q <<< Q < z
3396 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3397
3398 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3399 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3400 */
3401
3402
3403 #if 0
3404 /* requires features not yet supported */
3405 static void TestMoreBefore(void) {
3406     static const struct {
3407         const char* rules;
3408         const char* order[16];
3409         int32_t size;
3410     } tests[] = {
3411         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3412         { "m","M","x","X","q","Q","z","a","n" }, 9},
3413         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3414         { "m","M","x","X","q","Q","a","z","n" }, 9},
3415         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3416         { "m","M","x","X","a","q","Q","z","n" }, 9},
3417         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3418         { "x","X","q","Q","z","m","M","a","n" }, 9},
3419         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3420         { "m","M","x","X","q","Q","a","z","n" }, 9},
3421         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3422         { "m","M","x","X","a","q","Q","z","n" }, 9},
3423         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3424         { "x","X","q","Q","z","n","m","a","M" }, 9},
3425         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3426         { "x","X","q","Q","m","a","M","z","n" }, 9},
3427         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3428         { "m","x","X","a","M","q","Q","z","n" }, 9},
3429         { "&[before 1] s < x <<< X << q <<< Q < z",
3430         { "r","R","x","X","q","Q","z","s","n" }, 9},
3431         { "&[before 2] s << x <<< X << q <<< Q < z",
3432         { "r","R","x","X","q","Q","s","z","n" }, 9},
3433         { "&[before 3] s <<< x <<< X << q <<< Q < z",
3434         { "r","R","x","X","s","q","Q","z","n" }, 9},
3435         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3436         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3437         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3438         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3439         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3440         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3441     };
3442
3443     int32_t i = 0;
3444
3445     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3446         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3447     }
3448 }
3449 #endif
3450
3451 static void TestTailorNULL( void ) {
3452     const static char* rule = "&a <<< '\\u0000'";
3453     UErrorCode status = U_ZERO_ERROR;
3454     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3455     uint32_t rlen = 0;
3456     UChar a = 1, null = 0;
3457     UCollationResult res = UCOL_EQUAL;
3458
3459     UCollator *coll = NULL;
3460
3461
3462     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3463     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3464
3465     if(U_FAILURE(status)) {
3466         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3467     } else {
3468         res = ucol_strcoll(coll, &a, 1, &null, 1);
3469
3470         if(res != UCOL_LESS) {
3471             log_err("NULL was not tailored properly!\n");
3472         }
3473     }
3474
3475     ucol_close(coll);
3476 }
3477
3478 static void
3479 TestUpperFirstQuaternary(void)
3480 {
3481   const char* tests[] = { "B", "b", "Bb", "bB" };
3482   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3483   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3484   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3485 }
3486
3487 static void
3488 TestJ4960(void)
3489 {
3490   const char* tests[] = { "\\u00e2T", "aT" };
3491   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3492   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3493   const char* tests2[] = { "a", "A" };
3494   const char* rule = "&[first tertiary ignorable]=A=a";
3495   UColAttribute att2[] = { UCOL_CASE_LEVEL };
3496   UColAttributeValue attVals2[] = { UCOL_ON };
3497   /* Test whether we correctly ignore primary ignorables on case level when */
3498   /* we have only primary & case level */
3499   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3500   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3501   /* and case level */
3502   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3503   /* Test whether completely ignorable letters have case level info (they shouldn't) */
3504   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3505 }
3506
3507 static void
3508 TestJ5223(void)
3509 {
3510   static const char *test = "this is a test string";
3511   UChar ustr[256];
3512   int32_t ustr_length = u_unescape(test, ustr, 256);
3513   unsigned char sortkey[256];
3514   int32_t sortkey_length;
3515   UErrorCode status = U_ZERO_ERROR;
3516   static UCollator *coll = NULL;
3517   coll = ucol_open("root", &status);
3518   if(U_FAILURE(status)) {
3519     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3520     return;
3521   }
3522   ucol_setStrength(coll, UCOL_PRIMARY);
3523   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3524   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3525   if (U_FAILURE(status)) {
3526     log_err("Failed setting atributes\n");
3527     return;
3528   }
3529   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3530   if (sortkey_length > 256) return;
3531
3532   /* we mark the position where the null byte should be written in advance */
3533   sortkey[sortkey_length-1] = 0xAA;
3534
3535   /* we set the buffer size one byte higher than needed */
3536   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3537     sortkey_length+1);
3538
3539   /* no error occurs (for me) */
3540   if (sortkey[sortkey_length-1] == 0xAA) {
3541     log_err("Hit bug at first try\n");
3542   }
3543
3544   /* we mark the position where the null byte should be written again */
3545   sortkey[sortkey_length-1] = 0xAA;
3546
3547   /* this time we set the buffer size to the exact amount needed */
3548   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3549     sortkey_length);
3550
3551   /* now the trailing null byte is not written */
3552   if (sortkey[sortkey_length-1] == 0xAA) {
3553     log_err("Hit bug at second try\n");
3554   }
3555
3556   ucol_close(coll);
3557 }
3558
3559 /* Regression test for Thai partial sort key problem */
3560 static void
3561 TestJ5232(void)
3562 {
3563     const static char *test[] = {
3564         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3565         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3566     };
3567
3568     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3569 }
3570
3571 static void
3572 TestJ5367(void)
3573 {
3574     const static char *test[] = { "a", "y" };
3575     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3576     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3577 }
3578
3579 static void
3580 TestVI5913(void)
3581 {
3582     UErrorCode status = U_ZERO_ERROR;
3583     int32_t i, j;
3584     UCollator *coll =NULL;
3585     uint8_t  resColl[100], expColl[100];
3586     int32_t  rLen, tLen, ruleLen, sLen, kLen;
3587     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3588     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3589     /*
3590      * Note: Just tailoring &z<ae^ does not work as expected:
3591      * The UCA spec requires for discontiguous contractions that they
3592      * extend an *existing match* by one combining mark at a time.
3593      * Therefore, ae must be a contraction so that the builder finds
3594      * discontiguous contractions for ae^, for example with an intervening underdot.
3595      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3596      */
3597     UChar rule3[256]={
3598         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3599         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3600         0};
3601     static const UChar tData[][20]={
3602         {0x1EAC, 0},
3603         {0x0041, 0x0323, 0x0302, 0},
3604         {0x1EA0, 0x0302, 0},
3605         {0x00C2, 0x0323, 0},
3606         {0x1ED8, 0},  /* O with dot and circumflex */
3607         {0x1ECC, 0x0302, 0},
3608         {0x1EB7, 0},
3609         {0x1EA1, 0x0306, 0},
3610     };
3611     static const UChar tailorData[][20]={
3612         {0x1FA2, 0},  /* Omega with 3 combining marks */
3613         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3614         {0x1FF3, 0x0313, 0x0300, 0},
3615         {0x1F60, 0x0300, 0x0345, 0},
3616         {0x1F62, 0x0345, 0},
3617         {0x1FA0, 0x0300, 0},
3618     };
3619     static const UChar tailorData2[][20]={
3620         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3621         {0x0073, 0x0323, 0x030C, 0},
3622         {0x0073, 0x030C, 0x0323, 0},
3623     };
3624     static const UChar tailorData3[][20]={
3625         {0x007a, 0},  /*  z */
3626         {0x0061, 0x0065, 0},  /*  a + e */
3627         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3628         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3629         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3630         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3631         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3632         {0x00EA, 0},  /* e with circumflex  */
3633     };
3634
3635     /* Test Vietnamese sort. */
3636     coll = ucol_open("vi", &status);
3637     if(U_FAILURE(status)) {
3638         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3639         return;
3640     }
3641     log_verbose("\n\nVI collation:");
3642     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3643         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3644     }
3645     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3646         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3647     }
3648     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3649         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3650     }
3651     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3652         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3653     }
3654
3655     for (j=0; j<8; j++) {
3656         tLen = u_strlen(tData[j]);
3657         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3658         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3659         for(i = 0; i<rLen; i++) {
3660             log_verbose(" %02X", resColl[i]);
3661         }
3662     }
3663
3664     ucol_close(coll);
3665
3666     /* Test Romanian sort. */
3667     coll = ucol_open("ro", &status);
3668     log_verbose("\n\nRO collation:");
3669     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3670         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3671     }
3672     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3673         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3674     }
3675     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3676         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3677     }
3678
3679     for (j=4; j<8; j++) {
3680         tLen = u_strlen(tData[j]);
3681         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3682         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3683         for(i = 0; i<rLen; i++) {
3684             log_verbose(" %02X", resColl[i]);
3685         }
3686     }
3687     ucol_close(coll);
3688
3689     /* Test the precomposed Greek character with 3 combining marks. */
3690     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3691     ruleLen = u_strlen(rule);
3692     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3693     if (U_FAILURE(status)) {
3694         log_err("ucol_openRules failed with %s\n", u_errorName(status));
3695         return;
3696     }
3697     sLen = u_strlen(tailorData[0]);
3698     for (j=1; j<6; j++) {
3699         tLen = u_strlen(tailorData[j]);
3700         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3701             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3702         }
3703     }
3704     /* Test getSortKey. */
3705     tLen = u_strlen(tailorData[0]);
3706     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3707     for (j=0; j<6; j++) {
3708         tLen = u_strlen(tailorData[j]);
3709         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3710         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3711             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3712             for(i = 0; i<rLen; i++) {
3713                 log_err(" %02X", resColl[i]);
3714             }
3715         }
3716     }
3717     ucol_close(coll);
3718
3719     log_verbose("\n\nTailoring test for s with caron:");
3720     ruleLen = u_strlen(rule2);
3721     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3722     tLen = u_strlen(tailorData2[0]);
3723     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3724     for (j=1; j<3; j++) {
3725         tLen = u_strlen(tailorData2[j]);
3726         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3727         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3728             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3729             for(i = 0; i<rLen; i++) {
3730                 log_err(" %02X", resColl[i]);
3731             }
3732         }
3733     }
3734     ucol_close(coll);
3735
3736     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3737     ruleLen = u_strlen(rule3);
3738     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3739     tLen = u_strlen(tailorData3[3]);
3740     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3741     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3742     for(i = 0; i<kLen; i++) {
3743         log_verbose(" %02X", expColl[i]);
3744     }
3745     for (j=4; j<6; j++) {
3746         tLen = u_strlen(tailorData3[j]);
3747         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3748
3749         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3750             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3751             for(i = 0; i<rLen; i++) {
3752                 log_err(" %02X", resColl[i]);
3753             }
3754         }
3755
3756         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3757          for(i = 0; i<rLen; i++) {
3758              log_verbose(" %02X", resColl[i]);
3759          }
3760     }
3761     ucol_close(coll);
3762 }
3763
3764 static void
3765 TestTailor6179(void)
3766 {
3767     UErrorCode status = U_ZERO_ERROR;
3768     int32_t i;
3769     UCollator *coll =NULL;
3770     uint8_t  resColl[100];
3771     int32_t  rLen, tLen, ruleLen;
3772     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3773     static const UChar rule1[]={
3774             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3775             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3776             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3777             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3778     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3779     static const UChar rule2[]={
3780             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3781             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3782             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3783             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3784             0x3C,0x3C,0x20,0x62,0};
3785
3786     static const UChar tData1[][4]={
3787         {0x61, 0},
3788         {0x62, 0},
3789         { 0xFDD0,0x009E, 0}
3790     };
3791     static const UChar tData2[][4]={
3792         {0x61, 0},
3793         {0x62, 0},
3794         { 0xFDD0,0x009E, 0}
3795      };
3796
3797     /*
3798      * These values from FractionalUCA.txt will change,
3799      * and need to be updated here.
3800      * TODO: Make this not check for particular sort keys.
3801      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3802      */
3803     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3804     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3805     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3806     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3807
3808     UParseError parseError;
3809
3810     /* Test [Last Primary ignorable] */
3811
3812     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3813     ruleLen = u_strlen(rule1);
3814     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3815     if (U_FAILURE(status)) {
3816         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3817         return;
3818     }
3819     tLen = u_strlen(tData1[0]);
3820     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3821     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3822         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3823         for(i = 0; i<rLen; i++) {
3824             log_err(" %02X", resColl[i]);
3825         }
3826         log_err("\n");
3827     }
3828     tLen = u_strlen(tData1[1]);
3829     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3830     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3831         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3832         for(i = 0; i<rLen; i++) {
3833             log_err(" %02X", resColl[i]);
3834         }
3835         log_err("\n");
3836     }
3837     ucol_close(coll);
3838
3839
3840     /* Test [Last Secondary ignorable] */
3841     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3842     ruleLen = u_strlen(rule2);
3843     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3844     if (U_FAILURE(status)) {
3845         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3846         log_info("  offset=%d  \"%s\" | \"%s\"\n",
3847                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3848         return;
3849     }
3850     tLen = u_strlen(tData2[0]);
3851     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3852     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3853         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3854         for(i = 0; i<rLen; i++) {
3855             log_err(" %02X", resColl[i]);
3856         }
3857         log_err("\n");
3858     }
3859     tLen = u_strlen(tData2[1]);
3860     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3861     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3862       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3863       for(i = 0; i<rLen; i++) {
3864         log_err(" %02X", resColl[i]);
3865       }
3866       log_err("\n");
3867     }
3868     ucol_close(coll);
3869 }
3870
3871 static void
3872 TestUCAPrecontext(void)
3873 {
3874     UErrorCode status = U_ZERO_ERROR;
3875     int32_t i, j;
3876     UCollator *coll =NULL;
3877     uint8_t  resColl[100], prevColl[100];
3878     int32_t  rLen, tLen, ruleLen;
3879     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3880     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3881     /* & l middle-dot << a  a is an expansion. */
3882
3883     UChar tData1[][20]={
3884             { 0xb7, 0},  /* standalone middle dot(0xb7) */
3885             { 0x387, 0}, /* standalone middle dot(0x387) */
3886             { 0x61, 0},  /* a */
3887             { 0x6C, 0},  /* l */
3888             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3889             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3890             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3891             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3892             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3893             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3894             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3895      };
3896
3897     log_verbose("\n\nEN collation:");
3898     coll = ucol_open("en", &status);
3899     if (U_FAILURE(status)) {
3900         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3901         return;
3902     }
3903     for (j=0; j<11; j++) {
3904         tLen = u_strlen(tData1[j]);
3905         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3906         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3907             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3908                     j, tData1[j]);
3909         }
3910         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3911         for(i = 0; i<rLen; i++) {
3912             log_verbose(" %02X", resColl[i]);
3913         }
3914         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3915      }
3916      ucol_close(coll);
3917
3918
3919      log_verbose("\n\nJA collation:");
3920      coll = ucol_open("ja", &status);
3921      if (U_FAILURE(status)) {
3922          log_err("Tailoring test: &z <<a|- failed!");
3923          return;
3924      }
3925      for (j=0; j<11; j++) {
3926          tLen = u_strlen(tData1[j]);
3927          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3928          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3929              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3930                      j, tData1[j]);
3931          }
3932          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3933          for(i = 0; i<rLen; i++) {
3934              log_verbose(" %02X", resColl[i]);
3935          }
3936          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3937       }
3938       ucol_close(coll);
3939
3940
3941       log_verbose("\n\nTailoring test: & middle dot < a ");
3942       ruleLen = u_strlen(rule1);
3943       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3944       if (U_FAILURE(status)) {
3945           log_err("Tailoring test: & middle dot < a failed!");
3946           return;
3947       }
3948       for (j=0; j<11; j++) {
3949           tLen = u_strlen(tData1[j]);
3950           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3951           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3952               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3953                       j, tData1[j]);
3954           }
3955           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3956           for(i = 0; i<rLen; i++) {
3957               log_verbose(" %02X", resColl[i]);
3958           }
3959           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3960        }
3961        ucol_close(coll);
3962
3963
3964        log_verbose("\n\nTailoring test: & l middle-dot << a ");
3965        ruleLen = u_strlen(rule2);
3966        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3967        if (U_FAILURE(status)) {
3968            log_err("Tailoring test: & l middle-dot << a failed!");
3969            return;
3970        }
3971        for (j=0; j<11; j++) {
3972            tLen = u_strlen(tData1[j]);
3973            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3974            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3975                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3976                        j, tData1[j]);
3977            }
3978            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3979                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3980                        j, tData1[j]);
3981            }
3982            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3983            for(i = 0; i<rLen; i++) {
3984                log_verbose(" %02X", resColl[i]);
3985            }
3986            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3987         }
3988         ucol_close(coll);
3989 }
3990
3991 static void
3992 TestOutOfBuffer5468(void)
3993 {
3994     static const char *test = "\\u4e00";
3995     UChar ustr[256];
3996     int32_t ustr_length = u_unescape(test, ustr, 256);
3997     unsigned char shortKeyBuf[1];
3998     int32_t sortkey_length;
3999     UErrorCode status = U_ZERO_ERROR;
4000     static UCollator *coll = NULL;
4001
4002     coll = ucol_open("root", &status);
4003     if(U_FAILURE(status)) {
4004       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4005       return;
4006     }
4007     ucol_setStrength(coll, UCOL_PRIMARY);
4008     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4009     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4010     if (U_FAILURE(status)) {
4011       log_err("Failed setting atributes\n");
4012       return;
4013     }
4014
4015     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4016     if (sortkey_length != 4) {
4017         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4018     }
4019     log_verbose("length of sortKey is %d", sortkey_length);
4020     ucol_close(coll);
4021 }
4022
4023 #define TSKC_DATA_SIZE 5
4024 #define TSKC_BUF_SIZE  50
4025 static void
4026 TestSortKeyConsistency(void)
4027 {
4028     UErrorCode icuRC = U_ZERO_ERROR;
4029     UCollator* ucol;
4030     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4031
4032     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034     int32_t i, j, i2;
4035
4036     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4037     if (U_FAILURE(icuRC))
4038     {
4039         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4040         return;
4041     }
4042
4043     for (i = 0; i < TSKC_DATA_SIZE; i++)
4044     {
4045         UCharIterator uiter;
4046         uint32_t state[2] = { 0, 0 };
4047         int32_t dataLen = i+1;
4048         for (j=0; j<TSKC_BUF_SIZE; j++)
4049             bufFull[i][j] = bufPart[i][j] = 0;
4050
4051         /* Full sort key */
4052         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4053
4054         /* Partial sort key */
4055         uiter_setString(&uiter, data, dataLen);
4056         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4057         if (U_FAILURE(icuRC))
4058         {
4059             log_err("ucol_nextSortKeyPart failed\n");
4060             ucol_close(ucol);
4061             return;
4062         }
4063
4064         for (i2=0; i2<i; i2++)
4065         {
4066             UBool fullMatch = TRUE;
4067             UBool partMatch = TRUE;
4068             for (j=0; j<TSKC_BUF_SIZE; j++)
4069             {
4070                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4071                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4072             }
4073             if (fullMatch != partMatch) {
4074                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4075                                   : "partial key was consistent, but full key changed\n");
4076                 ucol_close(ucol);
4077                 return;
4078             }
4079         }
4080     }
4081
4082     /*=============================================*/
4083    ucol_close(ucol);
4084 }
4085
4086 /* ticket: 6101 */
4087 static void TestCroatianSortKey(void) {
4088     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4089     UErrorCode status = U_ZERO_ERROR;
4090     UCollator *ucol;
4091     UCharIterator iter;
4092
4093     static const UChar text[] = { 0x0044, 0xD81A };
4094
4095     size_t length = UPRV_LENGTHOF(text);
4096
4097     uint8_t textSortKey[32];
4098     size_t lenSortKey = 32;
4099     size_t actualSortKeyLen;
4100     uint32_t uStateInfo[2] = { 0, 0 };
4101
4102     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4103     if (U_FAILURE(status)) {
4104         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4105         return;
4106     }
4107
4108     uiter_setString(&iter, text, length);
4109
4110     actualSortKeyLen = ucol_nextSortKeyPart(
4111         ucol, &iter, (uint32_t*)uStateInfo,
4112         textSortKey, lenSortKey, &status
4113         );
4114
4115     if (actualSortKeyLen == lenSortKey) {
4116         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4117     }
4118
4119     ucol_close(ucol);
4120 }
4121
4122 /* ticket: 6140 */
4123 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4124  * they are both Hiragana and Katakana
4125  */
4126 #define SORTKEYLEN 50
4127 static void TestHiragana(void) {
4128     UErrorCode status = U_ZERO_ERROR;
4129     UCollator* ucol;
4130     UCollationResult strcollresult;
4131     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4132     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4133     int32_t data1Len = UPRV_LENGTHOF(data1);
4134     int32_t data2Len = UPRV_LENGTHOF(data2);
4135     int32_t i, j;
4136     uint8_t sortKey1[SORTKEYLEN];
4137     uint8_t sortKey2[SORTKEYLEN];
4138
4139     UCharIterator uiter1;
4140     UCharIterator uiter2;
4141     uint32_t state1[2] = { 0, 0 };
4142     uint32_t state2[2] = { 0, 0 };
4143     int32_t keySize1;
4144     int32_t keySize2;
4145
4146     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4147             &status);
4148     if (U_FAILURE(status)) {
4149         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4150         return;
4151     }
4152
4153     /* Start of full sort keys */
4154     /* Full sort key1 */
4155     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4156     /* Full sort key2 */
4157     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4158     if (keySize1 == keySize2) {
4159         for (i = 0; i < keySize1; i++) {
4160             if (sortKey1[i] != sortKey2[i]) {
4161                 log_err("Full sort keys are different. Should be equal.");
4162             }
4163         }
4164     } else {
4165         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4166     }
4167     /* End of full sort keys */
4168
4169     /* Start of partial sort keys */
4170     /* Partial sort key1 */
4171     uiter_setString(&uiter1, data1, data1Len);
4172     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4173     /* Partial sort key2 */
4174     uiter_setString(&uiter2, data2, data2Len);
4175     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4176     if (U_SUCCESS(status) && keySize1 == keySize2) {
4177         for (j = 0; j < keySize1; j++) {
4178             if (sortKey1[j] != sortKey2[j]) {
4179                 log_err("Partial sort keys are different. Should be equal");
4180             }
4181         }
4182     } else {
4183         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4184     }
4185     /* End of partial sort keys */
4186
4187     /* Start of strcoll */
4188     /* Use ucol_strcoll() to determine ordering */
4189     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4190     if (strcollresult != UCOL_EQUAL) {
4191         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4192     }
4193
4194     ucol_close(ucol);
4195 }
4196
4197 /* Convenient struct for running collation tests */
4198 typedef struct {
4199   const UChar source[MAX_TOKEN_LEN];  /* String on left */
4200   const UChar target[MAX_TOKEN_LEN];  /* String on right */
4201   UCollationResult result;            /* -1, 0 or +1, depending on collation */
4202 } OneTestCase;
4203
4204 /*
4205  * Utility function to test one collation test case.
4206  * @param testcases Array of test cases.
4207  * @param n_testcases Size of the array testcases.
4208  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4209  * @param n_rules Size of the array str_rules.
4210  */
4211 static void doTestOneTestCase(const OneTestCase testcases[],
4212                               int n_testcases,
4213                               const char* str_rules[],
4214                               int n_rules)
4215 {
4216   int rule_no, testcase_no;
4217   UChar rule[500];
4218   int32_t length = 0;
4219   UErrorCode status = U_ZERO_ERROR;
4220   UParseError parse_error;
4221   UCollator  *myCollation;
4222
4223   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4224
4225     length = u_unescape(str_rules[rule_no], rule, 500);
4226     if (length == 0) {
4227         log_err("ERROR: The rule cannot be unescaped: %s\n");
4228         return;
4229     }
4230     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4231     if(U_FAILURE(status)){
4232         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4233         log_info("  offset=%d  \"%s\" | \"%s\"\n",
4234                  parse_error.offset,
4235                  aescstrdup(parse_error.preContext, -1),
4236                  aescstrdup(parse_error.postContext, -1));
4237         return;
4238     }
4239     log_verbose("Testing the <<* syntax\n");
4240     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4241     ucol_setStrength(myCollation, UCOL_TERTIARY);
4242     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4243       doTest(myCollation,
4244              testcases[testcase_no].source,
4245              testcases[testcase_no].target,
4246              testcases[testcase_no].result
4247              );
4248     }
4249     ucol_close(myCollation);
4250   }
4251 }
4252
4253 const static OneTestCase rangeTestcases[] = {
4254   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4255   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4256   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4257
4258   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4259   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4260   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4261   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4262   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4263
4264   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4265   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4266   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4267   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4268
4269   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4270   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4271   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4272   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4273   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4274   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4275   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4276   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4277 };
4278
4279 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4280
4281 const static OneTestCase rangeTestcasesSupplemental[] = {
4282   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4283   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4284   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4286   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4287   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4288   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4289 };
4290
4291 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4292
4293 const static OneTestCase rangeTestcasesQwerty[] = {
4294   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4295   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4296
4297   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4298   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4299
4300   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4301   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4302
4303   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4304   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4305
4306   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4307     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4308   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4309     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4310 };
4311
4312 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4313
4314 static void TestSameStrengthList(void)
4315 {
4316   const char* strRules[] = {
4317     /* Normal */
4318     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4319
4320     /* Lists */
4321     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4322   };
4323   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4324 }
4325
4326 static void TestSameStrengthListQuoted(void)
4327 {
4328   const char* strRules[] = {
4329     /* Lists with quoted characters */
4330     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4331     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4332
4333     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4334     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4335
4336     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4337     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4338   };
4339   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4340 }
4341
4342 static void TestSameStrengthListSupplemental(void)
4343 {
4344   const char* strRules[] = {
4345     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4346     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4347     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4348     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4349   };
4350   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4351 }
4352
4353 static void TestSameStrengthListQwerty(void)
4354 {
4355   const char* strRules[] = {
4356     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4357     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4358     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4359     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4360     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4361
4362     /* Quoted characters also will work if two quoted characters are not consecutive.  */
4363     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4364
4365     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4366     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4367
4368  };
4369   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4370 }
4371
4372 static void TestSameStrengthListQuotedQwerty(void)
4373 {
4374   const char* strRules[] = {
4375     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4376     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4377     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4378
4379     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4380     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4381    };
4382   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4383 }
4384
4385 static void TestSameStrengthListRanges(void)
4386 {
4387   const char* strRules[] = {
4388     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4389   };
4390   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4391 }
4392
4393 static void TestSameStrengthListSupplementalRanges(void)
4394 {
4395   const char* strRules[] = {
4396     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4397     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4398   };
4399   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4400 }
4401
4402 static void TestSpecialCharacters(void)
4403 {
4404   const char* strRules[] = {
4405     /* Normal */
4406     "&';'<'+'<','<'-'<'&'<'*'",
4407
4408     /* List */
4409     "&';'<*'+,-&*'",
4410
4411     /* Range */
4412     "&';'<*'+'-'-&*'",
4413   };
4414
4415   const static OneTestCase specialCharacterStrings[] = {
4416     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4417     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4418     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4419     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4420   };
4421   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4422 }
4423
4424 static void TestPrivateUseCharacters(void)
4425 {
4426   const char* strRules[] = {
4427     /* Normal */
4428     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4429     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4430   };
4431
4432   const static OneTestCase privateUseCharacterStrings[] = {
4433     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4434     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4435     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4436     { {0xe2da}, {0xe2db}, UCOL_LESS },
4437     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4438     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4439   };
4440   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4441 }
4442
4443 static void TestPrivateUseCharactersInList(void)
4444 {
4445   const char* strRules[] = {
4446     /* List */
4447     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4448     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4449     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4450   };
4451
4452   const static OneTestCase privateUseCharacterStrings[] = {
4453     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4454     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4455     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4456     { {0xe2da}, {0xe2db}, UCOL_LESS },
4457     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4458     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4459   };
4460   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4461 }
4462
4463 static void TestPrivateUseCharactersInRange(void)
4464 {
4465   const char* strRules[] = {
4466     /* Range */
4467     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4468     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4469     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4470   };
4471
4472   const static OneTestCase privateUseCharacterStrings[] = {
4473     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4474     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4475     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4476     { {0xe2da}, {0xe2db}, UCOL_LESS },
4477     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4478     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4479   };
4480   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4481 }
4482
4483 static void TestInvalidListsAndRanges(void)
4484 {
4485   const char* invalidRules[] = {
4486     /* Range not in starred expression */
4487     "&\\ufffe<\\uffff-\\U00010002",
4488
4489     /* Range without start */
4490     "&a<*-c",
4491
4492     /* Range without end */
4493     "&a<*b-",
4494
4495     /* More than one hyphen */
4496     "&a<*b-g-l",
4497
4498     /* Range in the wrong order */
4499     "&a<*k-b",
4500
4501   };
4502
4503   UChar rule[500];
4504   UErrorCode status = U_ZERO_ERROR;
4505   UParseError parse_error;
4506   int n_rules = UPRV_LENGTHOF(invalidRules);
4507   int rule_no;
4508   int length;
4509   UCollator  *myCollation;
4510
4511   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4512
4513     length = u_unescape(invalidRules[rule_no], rule, 500);
4514     if (length == 0) {
4515         log_err("ERROR: The rule cannot be unescaped: %s\n");
4516         return;
4517     }
4518     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4519     (void)myCollation;      /* Suppress set but not used warning. */
4520     if(!U_FAILURE(status)){
4521       log_err("ERROR: Could not cause a failure as expected: \n");
4522     }
4523     status = U_ZERO_ERROR;
4524   }
4525 }
4526
4527 /*
4528  * This test ensures that characters placed before a character in a different script have the same lead byte
4529  * in their collation key before and after script reordering.
4530  */
4531 static void TestBeforeRuleWithScriptReordering(void)
4532 {
4533     UParseError error;
4534     UErrorCode status = U_ZERO_ERROR;
4535     UCollator  *myCollation;
4536     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4537     UChar rules[500];
4538     uint32_t rulesLength = 0;
4539     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4540     UCollationResult collResult;
4541
4542     uint8_t baseKey[256];
4543     uint32_t baseKeyLength;
4544     uint8_t beforeKey[256];
4545     uint32_t beforeKeyLength;
4546
4547     UChar base[] = { 0x03b1 }; /* base */
4548     int32_t baseLen = UPRV_LENGTHOF(base);
4549
4550     UChar before[] = { 0x0e01 }; /* ko kai */
4551     int32_t beforeLen = UPRV_LENGTHOF(before);
4552
4553     /*UChar *data[] = { before, base };
4554     genericRulesStarter(srules, data, 2);*/
4555
4556     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4557
4558     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4559     (void)baseKeyLength;
4560
4561     /* build collator */
4562     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4563
4564     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4565     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4566     if(U_FAILURE(status)) {
4567         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4568         return;
4569     }
4570
4571     /* check collation results - before rule applied but not script reordering */
4572     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4573     if (collResult != UCOL_GREATER) {
4574         log_err("Collation result not correct before script reordering = %d\n", collResult);
4575     }
4576
4577     /* check the lead byte of the collation keys before script reordering */
4578     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4579     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4580     if (baseKey[0] != beforeKey[0]) {
4581       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4582    }
4583
4584     /* reorder the scripts */
4585     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4586     if(U_FAILURE(status)) {
4587         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4588         return;
4589     }
4590
4591     /* check collation results - before rule applied and after script reordering */
4592     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4593     if (collResult != UCOL_GREATER) {
4594         log_err("Collation result not correct after script reordering = %d\n", collResult);
4595     }
4596
4597     /* check the lead byte of the collation keys after script reordering */
4598     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4599     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4600     if (baseKey[0] != beforeKey[0]) {
4601         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4602     }
4603
4604     ucol_close(myCollation);
4605 }
4606
4607 /*
4608  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4609  */
4610 static void TestNonLeadBytesDuringCollationReordering(void)
4611 {
4612     UErrorCode status = U_ZERO_ERROR;
4613     UCollator  *myCollation;
4614     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4615
4616     uint8_t baseKey[256];
4617     uint32_t baseKeyLength;
4618     uint8_t reorderKey[256];
4619     uint32_t reorderKeyLength;
4620
4621     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4622
4623     uint32_t i;
4624
4625
4626     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4627
4628     /* build collator tertiary */
4629     myCollation = ucol_open("", &status);
4630     ucol_setStrength(myCollation, UCOL_TERTIARY);
4631     if(U_FAILURE(status)) {
4632         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4633         return;
4634     }
4635     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4636
4637     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4638     if(U_FAILURE(status)) {
4639         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4640         return;
4641     }
4642     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4643
4644     if (baseKeyLength != reorderKeyLength) {
4645         log_err("Key lengths not the same during reordering.\n");
4646         return;
4647     }
4648
4649     for (i = 1; i < baseKeyLength; i++) {
4650         if (baseKey[i] != reorderKey[i]) {
4651             log_err("Collation key bytes not the same at position %d.\n", i);
4652             return;
4653         }
4654     }
4655     ucol_close(myCollation);
4656
4657     /* build collator quaternary */
4658     myCollation = ucol_open("", &status);
4659     ucol_setStrength(myCollation, UCOL_QUATERNARY);
4660     if(U_FAILURE(status)) {
4661         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4662         return;
4663     }
4664     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4665
4666     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4667     if(U_FAILURE(status)) {
4668         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4669         return;
4670     }
4671     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4672
4673     if (baseKeyLength != reorderKeyLength) {
4674         log_err("Key lengths not the same during reordering.\n");
4675         return;
4676     }
4677
4678     for (i = 1; i < baseKeyLength; i++) {
4679         if (baseKey[i] != reorderKey[i]) {
4680             log_err("Collation key bytes not the same at position %d.\n", i);
4681             return;
4682         }
4683     }
4684     ucol_close(myCollation);
4685 }
4686
4687 /*
4688  * Test reordering API.
4689  */
4690 static void TestReorderingAPI(void)
4691 {
4692     UErrorCode status = U_ZERO_ERROR;
4693     UCollator  *myCollation;
4694     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4695     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4696     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4697     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4698     UCollationResult collResult;
4699     int32_t retrievedReorderCodesLength;
4700     int32_t retrievedReorderCodes[10];
4701     UChar greekString[] = { 0x03b1 };
4702     UChar punctuationString[] = { 0x203e };
4703     int loopIndex;
4704
4705     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4706
4707     /* build collator tertiary */
4708     myCollation = ucol_open("", &status);
4709     ucol_setStrength(myCollation, UCOL_TERTIARY);
4710     if(U_FAILURE(status)) {
4711         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4712         return;
4713     }
4714
4715     /* set the reorderding */
4716     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4717     if (U_FAILURE(status)) {
4718         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4719         return;
4720     }
4721
4722     /* get the reordering */
4723     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4724     if (status != U_BUFFER_OVERFLOW_ERROR) {
4725         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4726         return;
4727     }
4728     status = U_ZERO_ERROR;
4729     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4730         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4731         return;
4732     }
4733     /* now let's really get it */
4734     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4735     if (U_FAILURE(status)) {
4736         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4737         return;
4738     }
4739     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4740         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4741         return;
4742     }
4743     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4744         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4745             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4746             return;
4747         }
4748     }
4749     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4750     if (collResult != UCOL_LESS) {
4751         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4752         return;
4753     }
4754
4755     /* clear the reordering */
4756     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4757     if (U_FAILURE(status)) {
4758         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4759         return;
4760     }
4761
4762     /* get the reordering again */
4763     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4764     if (retrievedReorderCodesLength != 0) {
4765         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4766         return;
4767     }
4768
4769     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4770     if (collResult != UCOL_GREATER) {
4771         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4772         return;
4773     }
4774
4775     /* clear the reordering using [NONE] */
4776     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4777     if (U_FAILURE(status)) {
4778         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4779         return;
4780     }
4781
4782     /* get the reordering again */
4783     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4784     if (retrievedReorderCodesLength != 0) {
4785         log_err_status(status,
4786                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4787                        retrievedReorderCodesLength);
4788         return;
4789     }
4790
4791     /* test for error condition on duplicate reorder codes */
4792     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4793     if (!U_FAILURE(status)) {
4794         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4795         return;
4796     }
4797
4798     status = U_ZERO_ERROR;
4799     /* test for reorder codes after a reset code */
4800     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4801     if (!U_FAILURE(status)) {
4802         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4803         return;
4804     }
4805
4806     ucol_close(myCollation);
4807 }
4808
4809 /*
4810  * Test reordering API.
4811  */
4812 static void TestReorderingAPIWithRuleCreatedCollator(void)
4813 {
4814     UErrorCode status = U_ZERO_ERROR;
4815     UCollator  *myCollation;
4816     UChar rules[90];
4817     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4818     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4819     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4820     UCollationResult collResult;
4821     int32_t retrievedReorderCodesLength;
4822     int32_t retrievedReorderCodes[10];
4823     static const UChar greekString[] = { 0x03b1 };
4824     static const UChar punctuationString[] = { 0x203e };
4825     static const UChar hanString[] = { 0x65E5, 0x672C };
4826     int loopIndex;
4827
4828     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4829
4830     /* build collator from rules */
4831     u_uastrcpy(rules, "[reorder Hani Grek]");
4832     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4833     if(U_FAILURE(status)) {
4834         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4835         return;
4836     }
4837
4838     /* get the reordering */
4839     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4840     if (U_FAILURE(status)) {
4841         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4842         return;
4843     }
4844     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4845         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4846         return;
4847     }
4848     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4849         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4850             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4851             return;
4852         }
4853     }
4854     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4855     if (collResult != UCOL_GREATER) {
4856         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4857         return;
4858     }
4859
4860     /* set the reordering */
4861     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4862     if (U_FAILURE(status)) {
4863         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4864         return;
4865     }
4866
4867     /* get the reordering */
4868     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4869     if (status != U_BUFFER_OVERFLOW_ERROR) {
4870         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4871         return;
4872     }
4873     status = U_ZERO_ERROR;
4874     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4875         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4876         return;
4877     }
4878     /* now let's really get it */
4879     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4880     if (U_FAILURE(status)) {
4881         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4882         return;
4883     }
4884     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4885         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4886         return;
4887     }
4888     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4889         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4890             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4891             return;
4892         }
4893     }
4894     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4895     if (collResult != UCOL_LESS) {
4896         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4897         return;
4898     }
4899
4900     /* clear the reordering */
4901     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4902     if (U_FAILURE(status)) {
4903         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4904         return;
4905     }
4906
4907     /* get the reordering again */
4908     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4909     if (retrievedReorderCodesLength != 0) {
4910         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4911         return;
4912     }
4913
4914     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4915     if (collResult != UCOL_GREATER) {
4916         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4917         return;
4918     }
4919
4920     /* reset the reordering */
4921     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4922     if (U_FAILURE(status)) {
4923         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4924         return;
4925     }
4926     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4927     if (U_FAILURE(status)) {
4928         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4929         return;
4930     }
4931     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4932         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4933         return;
4934     }
4935     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4936         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4937             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4938             return;
4939         }
4940     }
4941
4942     ucol_close(myCollation);
4943 }
4944
4945 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4946     int32_t i;
4947     for (i = 0; i < length; ++i) {
4948         if (expectedScript == scripts[i]) { return TRUE; }
4949     }
4950     return FALSE;
4951 }
4952
4953 static void TestEquivalentReorderingScripts(void) {
4954     // Beginning with ICU 55, collation reordering moves single scripts
4955     // rather than groups of scripts,
4956     // except where scripts share a range and sort primary-equal.
4957     UErrorCode status = U_ZERO_ERROR;
4958     int32_t equivalentScripts[100];
4959     int32_t length;
4960     int i;
4961     int32_t prevScript;
4962     /* These scripts are expected to be equivalent. */
4963     static const int32_t expectedScripts[] = {
4964         USCRIPT_HIRAGANA,
4965         USCRIPT_KATAKANA,
4966         USCRIPT_KATAKANA_OR_HIRAGANA
4967     };
4968
4969     equivalentScripts[0] = 0;
4970     length = ucol_getEquivalentReorderCodes(
4971             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4972     if (U_FAILURE(status)) {
4973         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4974         return;
4975     }
4976     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4977         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4978                 "length expected 1, was = %d; expected [%d] was [%d]\n",
4979                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4980     }
4981
4982     length = ucol_getEquivalentReorderCodes(
4983             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4984     if (U_FAILURE(status)) {
4985         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4986         return;
4987     }
4988     if (length != UPRV_LENGTHOF(expectedScripts)) {
4989         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4990                 "expected %d, was = %d\n",
4991                 UPRV_LENGTHOF(expectedScripts), length);
4992     }
4993     prevScript = -1;
4994     for (i = 0; i < length; ++i) {
4995         int32_t script = equivalentScripts[i];
4996         if (script <= prevScript) {
4997             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4998         }
4999         prevScript = script;
5000     }
5001     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5002         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5003             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5004                     expectedScripts[i]);
5005         }
5006     }
5007
5008     length = ucol_getEquivalentReorderCodes(
5009             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5010     if (U_FAILURE(status)) {
5011         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5012         return;
5013     }
5014     if (length != UPRV_LENGTHOF(expectedScripts)) {
5015         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5016                 "expected %d, was = %d\n",
5017                 UPRV_LENGTHOF(expectedScripts), length);
5018     }
5019     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5020         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5021             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5022                     expectedScripts[i]);
5023         }
5024     }
5025
5026     length = ucol_getEquivalentReorderCodes(
5027             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5028     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5029         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5030                 "expected %d, was = %d\n",
5031                 UPRV_LENGTHOF(expectedScripts), length);
5032     }
5033
5034     length = ucol_getEquivalentReorderCodes(
5035             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5036     if (U_FAILURE(status) || length != 3) {
5037         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5038                 "expected 3, was = %d\n", length);
5039     }
5040     length = ucol_getEquivalentReorderCodes(
5041             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5042     if (U_FAILURE(status) || length != 3) {
5043         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5044                 "expected 3, was = %d\n", length);
5045     }
5046     length = ucol_getEquivalentReorderCodes(
5047             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5048     if (U_FAILURE(status) || length != 3) {
5049         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5050                 "expected 3, was = %d\n", length);
5051     }
5052
5053     length = ucol_getEquivalentReorderCodes(
5054             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5055     if (U_FAILURE(status) || length != 2) {
5056         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5057                 "expected 2, was = %d\n", length);
5058     }
5059     length = ucol_getEquivalentReorderCodes(
5060             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5061     if (U_FAILURE(status) || length != 2) {
5062         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5063                 "expected 2, was = %d\n", length);
5064     }
5065 }
5066
5067 static void TestReorderingAcrossCloning(void)
5068 {
5069     UErrorCode status = U_ZERO_ERROR;
5070     UCollator  *myCollation;
5071     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5072     UCollator *clonedCollation;
5073     int32_t retrievedReorderCodesLength;
5074     int32_t retrievedReorderCodes[10];
5075     int loopIndex;
5076
5077     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5078
5079     /* build collator tertiary */
5080     myCollation = ucol_open("", &status);
5081     ucol_setStrength(myCollation, UCOL_TERTIARY);
5082     if(U_FAILURE(status)) {
5083         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5084         return;
5085     }
5086
5087     /* set the reorderding */
5088     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5089     if (U_FAILURE(status)) {
5090         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5091         return;
5092     }
5093
5094     /* clone the collator */
5095     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5096     if (U_FAILURE(status)) {
5097         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5098         return;
5099     }
5100
5101     /* get the reordering */
5102     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5103     if (U_FAILURE(status)) {
5104         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5105         return;
5106     }
5107     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5108         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5109         return;
5110     }
5111     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5112         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5113             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5114             return;
5115         }
5116     }
5117
5118     /*uprv_free(buffer);*/
5119     ucol_close(myCollation);
5120     ucol_close(clonedCollation);
5121 }
5122
5123 /*
5124  * Utility function to test one collation reordering test case set.
5125  * @param testcases Array of test cases.
5126  * @param n_testcases Size of the array testcases.
5127  * @param reorderTokens Array of reordering codes.
5128  * @param reorderTokensLen Size of the array reorderTokens.
5129  */
5130 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5131 {
5132     uint32_t testCaseNum;
5133     UErrorCode status = U_ZERO_ERROR;
5134     UCollator  *myCollation;
5135
5136     myCollation = ucol_open("", &status);
5137     if (U_FAILURE(status)) {
5138         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5139         return;
5140     }
5141     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5142     if(U_FAILURE(status)) {
5143         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5144         return;
5145     }
5146
5147     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5148         doTest(myCollation,
5149             testCases[testCaseNum].source,
5150             testCases[testCaseNum].target,
5151             testCases[testCaseNum].result
5152         );
5153     }
5154     ucol_close(myCollation);
5155 }
5156
5157 static void TestGreekFirstReorder(void)
5158 {
5159     const char* strRules[] = {
5160         "[reorder Grek]"
5161     };
5162
5163     const int32_t apiRules[] = {
5164         USCRIPT_GREEK
5165     };
5166
5167     const static OneTestCase privateUseCharacterStrings[] = {
5168         { {0x0391}, {0x0391}, UCOL_EQUAL },
5169         { {0x0041}, {0x0391}, UCOL_GREATER },
5170         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5171         { {0x0060}, {0x0391}, UCOL_LESS },
5172         { {0x0391}, {0xe2dc}, UCOL_LESS },
5173         { {0x0391}, {0x0060}, UCOL_GREATER },
5174     };
5175
5176     /* Test rules creation */
5177     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5178
5179     /* Test collation reordering API */
5180     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5181 }
5182
5183 static void TestGreekLastReorder(void)
5184 {
5185     const char* strRules[] = {
5186         "[reorder Zzzz Grek]"
5187     };
5188
5189     const int32_t apiRules[] = {
5190         USCRIPT_UNKNOWN, USCRIPT_GREEK
5191     };
5192
5193     const static OneTestCase privateUseCharacterStrings[] = {
5194         { {0x0391}, {0x0391}, UCOL_EQUAL },
5195         { {0x0041}, {0x0391}, UCOL_LESS },
5196         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5197         { {0x0060}, {0x0391}, UCOL_LESS },
5198         { {0x0391}, {0xe2dc}, UCOL_GREATER },
5199     };
5200
5201     /* Test rules creation */
5202     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5203
5204     /* Test collation reordering API */
5205     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5206 }
5207
5208 static void TestNonScriptReorder(void)
5209 {
5210     const char* strRules[] = {
5211         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5212     };
5213
5214     const int32_t apiRules[] = {
5215         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5216         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5217         UCOL_REORDER_CODE_CURRENCY
5218     };
5219
5220     const static OneTestCase privateUseCharacterStrings[] = {
5221         { {0x0391}, {0x0041}, UCOL_LESS },
5222         { {0x0041}, {0x0391}, UCOL_GREATER },
5223         { {0x0060}, {0x0041}, UCOL_LESS },
5224         { {0x0060}, {0x0391}, UCOL_GREATER },
5225         { {0x0024}, {0x0041}, UCOL_GREATER },
5226     };
5227
5228     /* Test rules creation */
5229     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5230
5231     /* Test collation reordering API */
5232     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5233 }
5234
5235 static void TestHaniReorder(void)
5236 {
5237     const char* strRules[] = {
5238         "[reorder Hani]"
5239     };
5240     const int32_t apiRules[] = {
5241         USCRIPT_HAN
5242     };
5243
5244     const static OneTestCase privateUseCharacterStrings[] = {
5245         { {0x4e00}, {0x0041}, UCOL_LESS },
5246         { {0x4e00}, {0x0060}, UCOL_GREATER },
5247         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5248         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5249         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5250         { {0xfa27}, {0x0041}, UCOL_LESS },
5251         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5252     };
5253
5254     /* Test rules creation */
5255     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5256
5257     /* Test collation reordering API */
5258     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5259 }
5260
5261 static void TestHaniReorderWithOtherRules(void)
5262 {
5263     const char* strRules[] = {
5264         "[reorder Hani] &b<a"
5265     };
5266     /*const int32_t apiRules[] = {
5267         USCRIPT_HAN
5268     };*/
5269
5270     const static OneTestCase privateUseCharacterStrings[] = {
5271         { {0x4e00}, {0x0041}, UCOL_LESS },
5272         { {0x4e00}, {0x0060}, UCOL_GREATER },
5273         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5274         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5275         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5276         { {0xfa27}, {0x0041}, UCOL_LESS },
5277         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5278         { {0x0062}, {0x0061}, UCOL_LESS },
5279     };
5280
5281     /* Test rules creation */
5282     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5283 }
5284
5285 static void TestMultipleReorder(void)
5286 {
5287     const char* strRules[] = {
5288         "[reorder Grek Zzzz DIGIT Latn Hani]"
5289     };
5290
5291     const int32_t apiRules[] = {
5292         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5293     };
5294
5295     const static OneTestCase collationTestCases[] = {
5296         { {0x0391}, {0x0041}, UCOL_LESS},
5297         { {0x0031}, {0x0041}, UCOL_LESS},
5298         { {0x0041}, {0x4e00}, UCOL_LESS},
5299     };
5300
5301     /* Test rules creation */
5302     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5303
5304     /* Test collation reordering API */
5305     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5306 }
5307
5308 /*
5309  * Test that covers issue reported in ticket 8814
5310  */
5311 static void TestReorderWithNumericCollation(void)
5312 {
5313     UErrorCode status = U_ZERO_ERROR;
5314     UCollator  *myCollation;
5315     UCollator  *myReorderCollation;
5316     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5317     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5318     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5319     UChar fortyS[] = { 0x0053 };
5320     UChar fortyThreeP[] = { 0x0050 };
5321     uint8_t fortyS_sortKey[128];
5322     int32_t fortyS_sortKey_Length;
5323     uint8_t fortyThreeP_sortKey[128];
5324     int32_t fortyThreeP_sortKey_Length;
5325     uint8_t fortyS_sortKey_reorder[128];
5326     int32_t fortyS_sortKey_reorder_Length;
5327     uint8_t fortyThreeP_sortKey_reorder[128];
5328     int32_t fortyThreeP_sortKey_reorder_Length;
5329     UCollationResult collResult;
5330     UCollationResult collResultReorder;
5331
5332     log_verbose("Testing reordering with and without numeric collation\n");
5333
5334     /* build collator tertiary with numeric */
5335     myCollation = ucol_open("", &status);
5336     /*
5337     ucol_setStrength(myCollation, UCOL_TERTIARY);
5338     */
5339     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5340     if(U_FAILURE(status)) {
5341         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5342         return;
5343     }
5344
5345     /* build collator tertiary with numeric and reordering */
5346     myReorderCollation = ucol_open("", &status);
5347     /*
5348     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5349     */
5350     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5351     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5352     if(U_FAILURE(status)) {
5353         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5354         return;
5355     }
5356
5357     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5358     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5359     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5360     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5361
5362     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5363         log_err_status(status, "ERROR: couldn't generate sort keys\n");
5364         return;
5365     }
5366     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5368     /*
5369     fprintf(stderr, "\tcollResult = %x\n", collResult);
5370     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5371     fprintf(stderr, "\nfortyS\n");
5372     for (i = 0; i < fortyS_sortKey_Length; i++) {
5373         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5374     }
5375     fprintf(stderr, "\nfortyThreeP\n");
5376     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5377         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5378     }
5379     */
5380     if (collResult != collResultReorder) {
5381         log_err_status(status, "ERROR: collation results should have been the same.\n");
5382         return;
5383     }
5384
5385     ucol_close(myCollation);
5386     ucol_close(myReorderCollation);
5387 }
5388
5389 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5390 {
5391   for (; *a == *b; ++a, ++b) {
5392     if (*a == 0) {
5393       return 0;
5394     }
5395   }
5396   return (*a < *b ? -1 : 1);
5397 }
5398
5399 static void TestImportRulesDeWithPhonebook(void)
5400 {
5401   const char* normalRules[] = {
5402     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5403     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5404     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5405   };
5406   const OneTestCase normalTests[] = {
5407     { {0x00e6}, {0x00c6}, UCOL_LESS},
5408     { {0x00fc}, {0x00dc}, UCOL_GREATER},
5409   };
5410
5411   const char* importRules[] = {
5412     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5413     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415   };
5416   const OneTestCase importTests[] = {
5417     { {0x00e6}, {0x00c6}, UCOL_LESS},
5418     { {0x00fc}, {0x00dc}, UCOL_LESS},
5419   };
5420
5421   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5422   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5423 }
5424
5425 #if 0
5426 static void TestImportRulesFiWithEor(void)
5427 {
5428   /* DUCET. */
5429   const char* defaultRules[] = {
5430     "&a<b",                                    /* Dummy rule. */
5431   };
5432
5433   const OneTestCase defaultTests[] = {
5434     { {0x0110}, {0x00F0}, UCOL_LESS},
5435     { {0x00a3}, {0x00a5}, UCOL_LESS},
5436     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5437   };
5438
5439   /* European Ordering rules: ignore currency characters. */
5440   const char* eorRules[] = {
5441     "[import root-u-co-eor]",
5442   };
5443
5444   const OneTestCase eorTests[] = {
5445     { {0x0110}, {0x00F0}, UCOL_LESS},
5446     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5447     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5448   };
5449
5450   const char* fiStdRules[] = {
5451     "[import fi-u-co-standard]",
5452   };
5453
5454   const OneTestCase fiStdTests[] = {
5455     { {0x0110}, {0x00F0}, UCOL_GREATER},
5456     { {0x00a3}, {0x00a5}, UCOL_LESS},
5457     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5458   };
5459
5460   /* Both European Ordering Rules and Fi Standard Rules. */
5461   const char* eorFiStdRules[] = {
5462     "[import root-u-co-eor][import fi-u-co-standard]",
5463   };
5464
5465   /* This is essentially same as the one before once fi.txt is updated with import. */
5466   const char* fiEorRules[] = {
5467     "[import fi-u-co-eor]",
5468   };
5469
5470   const OneTestCase fiEorTests[] = {
5471     { {0x0110}, {0x00F0}, UCOL_GREATER},
5472     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5473     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5474   };
5475
5476   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5477   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5478   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5479   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5480
5481   log_knownIssue("8962", NULL);
5482   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5483         eor{
5484             Sequence{
5485                 "[import root-u-co-eor][import fi-u-co-standard]"
5486             }
5487             Version{"21.0"}
5488         }
5489   */
5490   /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5491
5492 }
5493 #endif
5494
5495 #if 0
5496 /*
5497  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5498  * the resource files are built with -includeUnihanColl option.
5499  * TODO: Uncomment this function and make it work when unihan rules are built by default.
5500  */
5501 static void TestImportRulesCJKWithUnihan(void)
5502 {
5503   /* DUCET. */
5504   const char* defaultRules[] = {
5505     "&a<b",                                    /* Dummy rule. */
5506   };
5507
5508   const OneTestCase defaultTests[] = {
5509     { {0x3402}, {0x4e1e}, UCOL_GREATER},
5510   };
5511
5512   /* European Ordering rules: ignore currency characters. */
5513   const char* unihanRules[] = {
5514     "[import ko-u-co-unihan]",
5515   };
5516
5517   const OneTestCase unihanTests[] = {
5518     { {0x3402}, {0x4e1e}, UCOL_LESS},
5519   };
5520
5521   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5522   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5523
5524 }
5525 #endif
5526
5527 static void TestImport(void)
5528 {
5529     UCollator* vicoll;
5530     UCollator* escoll;
5531     UCollator* viescoll;
5532     UCollator* importviescoll;
5533     UParseError error;
5534     UErrorCode status = U_ZERO_ERROR;
5535     UChar* virules;
5536     int32_t viruleslength;
5537     UChar* esrules;
5538     int32_t esruleslength;
5539     UChar* viesrules;
5540     int32_t viesruleslength;
5541     char srules[500] = "[import vi][import es]";
5542     UChar rules[500];
5543     uint32_t length = 0;
5544     int32_t itemCount;
5545     int32_t i, k;
5546     UChar32 start;
5547     UChar32 end;
5548     UChar str[500];
5549     int32_t strLength;
5550
5551     uint8_t sk1[500];
5552     uint8_t sk2[500];
5553
5554     UBool b;
5555     USet* tailoredSet;
5556     USet* importTailoredSet;
5557
5558
5559     vicoll = ucol_open("vi", &status);
5560     if(U_FAILURE(status)){
5561         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5562         return;
5563     }
5564
5565     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5566     if(viruleslength == 0) {
5567         log_data_err("missing vi tailoring rule string\n");
5568         ucol_close(vicoll);
5569         return;
5570     }
5571     escoll = ucol_open("es", &status);
5572     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5573     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5574     viesrules[0] = 0;
5575     u_strcat(viesrules, virules);
5576     u_strcat(viesrules, esrules);
5577     viesruleslength = viruleslength + esruleslength;
5578     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5579
5580     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5581     length = u_unescape(srules, rules, 500);
5582     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5583     if(U_FAILURE(status)){
5584         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585         return;
5586     }
5587
5588     tailoredSet = ucol_getTailoredSet(viescoll, &status);
5589     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5590
5591     if(!uset_equals(tailoredSet, importTailoredSet)){
5592         log_err("Tailored sets not equal");
5593     }
5594
5595     uset_close(importTailoredSet);
5596
5597     itemCount = uset_getItemCount(tailoredSet);
5598
5599     for( i = 0; i < itemCount; i++){
5600         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5601         if(strLength < 2){
5602             for (; start <= end; start++){
5603                 k = 0;
5604                 U16_APPEND(str, k, 500, start, b);
5605                 (void)b;    /* Suppress set but not used warning. */
5606                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5607                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5608                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5609                     log_err("Sort key for %s not equal\n", str);
5610                     break;
5611                 }
5612             }
5613         }else{
5614             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5615             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5616             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5617                 log_err("ZZSort key for %s not equal\n", str);
5618                 break;
5619             }
5620
5621         }
5622     }
5623
5624     uset_close(tailoredSet);
5625
5626     uprv_free(viesrules);
5627
5628     ucol_close(vicoll);
5629     ucol_close(escoll);
5630     ucol_close(viescoll);
5631     ucol_close(importviescoll);
5632 }
5633
5634 static void TestImportWithType(void)
5635 {
5636     UCollator* vicoll;
5637     UCollator* decoll;
5638     UCollator* videcoll;
5639     UCollator* importvidecoll;
5640     UParseError error;
5641     UErrorCode status = U_ZERO_ERROR;
5642     const UChar* virules;
5643     int32_t viruleslength;
5644     const UChar* derules;
5645     int32_t deruleslength;
5646     UChar* viderules;
5647     int32_t videruleslength;
5648     const char srules[500] = "[import vi][import de-u-co-phonebk]";
5649     UChar rules[500];
5650     uint32_t length = 0;
5651     int32_t itemCount;
5652     int32_t i, k;
5653     UChar32 start;
5654     UChar32 end;
5655     UChar str[500];
5656     int32_t strLength;
5657
5658     uint8_t sk1[500];
5659     uint8_t sk2[500];
5660
5661     USet* tailoredSet;
5662     USet* importTailoredSet;
5663
5664     vicoll = ucol_open("vi", &status);
5665     if(U_FAILURE(status)){
5666         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5667         return;
5668     }
5669     virules = ucol_getRules(vicoll, &viruleslength);
5670     if(viruleslength == 0) {
5671         log_data_err("missing vi tailoring rule string\n");
5672         ucol_close(vicoll);
5673         return;
5674     }
5675     /* decoll = ucol_open("de@collation=phonebook", &status); */
5676     decoll = ucol_open("de-u-co-phonebk", &status);
5677     if(U_FAILURE(status)){
5678         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5679         return;
5680     }
5681
5682
5683     derules = ucol_getRules(decoll, &deruleslength);
5684     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5685     viderules[0] = 0;
5686     u_strcat(viderules, virules);
5687     u_strcat(viderules, derules);
5688     videruleslength = viruleslength + deruleslength;
5689     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5690
5691     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5692     length = u_unescape(srules, rules, 500);
5693     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5694     if(U_FAILURE(status)){
5695         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5696         return;
5697     }
5698
5699     tailoredSet = ucol_getTailoredSet(videcoll, &status);
5700     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5701
5702     if(!uset_equals(tailoredSet, importTailoredSet)){
5703         log_err("Tailored sets not equal");
5704     }
5705
5706     uset_close(importTailoredSet);
5707
5708     itemCount = uset_getItemCount(tailoredSet);
5709
5710     for( i = 0; i < itemCount; i++){
5711         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5712         if(strLength < 2){
5713             for (; start <= end; start++){
5714                 k = 0;
5715                 U16_APPEND_UNSAFE(str, k, start);
5716                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5717                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5718                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5719                     log_err("Sort key for %s not equal\n", str);
5720                     break;
5721                 }
5722             }
5723         }else{
5724             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5725             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5726             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5727                 log_err("Sort key for %s not equal\n", str);
5728                 break;
5729             }
5730
5731         }
5732     }
5733
5734     uset_close(tailoredSet);
5735
5736     uprv_free(viderules);
5737
5738     ucol_close(videcoll);
5739     ucol_close(importvidecoll);
5740     ucol_close(vicoll);
5741     ucol_close(decoll);
5742 }
5743
5744 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5745 static const UChar longUpperStr1[]= { /* 155 chars */
5746     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5747     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5748     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5749     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5750     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5751     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5752     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5753     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5754     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5755     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5756 };
5757
5758 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5759 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5760     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5765 };
5766
5767 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5768 static const UChar longUpperStr3[]= { /* 324 chars */
5769     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5781 };
5782
5783 typedef struct {
5784     const UChar * longUpperStrPtr;
5785     int32_t       longUpperStrLen;
5786 } LongUpperStrItem;
5787
5788 /* String pointers must be in reverse collation order of the corresponding strings */
5789 static const LongUpperStrItem longUpperStrItems[] = {
5790     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5791     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5792     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5793     { NULL,          0                           }
5794 };
5795
5796 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5797
5798 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5799 static void TestCaseLevelBufferOverflow(void)
5800 {
5801     UErrorCode status = U_ZERO_ERROR;
5802     UCollator * ucol = ucol_open("root", &status);
5803     if ( U_SUCCESS(status) ) {
5804         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5805         if ( U_SUCCESS(status) ) {
5806             const LongUpperStrItem * itemPtr;
5807             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5808             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5809                 int32_t sortKeyLen;
5810                 if (itemPtr > longUpperStrItems) {
5811                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5812                 }
5813                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5814                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5815                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5816                     break;
5817                 }
5818                 if ( itemPtr > longUpperStrItems ) {
5819                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5820                     if (compareResult >= 0) {
5821                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5822                     }
5823                 }
5824             }
5825         } else {
5826             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5827         }
5828         ucol_close(ucol);
5829     } else {
5830         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5831     }
5832 }
5833
5834 /* Test for #10595 */
5835 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5836 #define KEY_PART_SIZE 16
5837
5838 static void TestNextSortKeyPartJaIdentical(void)
5839 {
5840     UErrorCode status = U_ZERO_ERROR;
5841     UCollator *coll;
5842     uint8_t keyPart[KEY_PART_SIZE];
5843     UCharIterator iter;
5844     uint32_t state[2] = {0, 0};
5845     int32_t keyPartLen;
5846
5847     coll = ucol_open("ja", &status);
5848     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5849     if (U_FAILURE(status)) {
5850         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5851         return;
5852     }
5853
5854     uiter_setString(&iter, testJapaneseName, 5);
5855     keyPartLen = KEY_PART_SIZE;
5856     while (keyPartLen == KEY_PART_SIZE) {
5857         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5858         if (U_FAILURE(status)) {
5859             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5860             break;
5861         }
5862     }
5863
5864     ucol_close(coll);
5865 }
5866
5867 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5868
5869 void addMiscCollTest(TestNode** root)
5870 {
5871     TEST(TestRuleOptions);
5872     TEST(TestBeforePrefixFailure);
5873     TEST(TestContractionClosure);
5874     TEST(TestPrefixCompose);
5875     TEST(TestStrCollIdenticalPrefix);
5876     TEST(TestPrefix);
5877     TEST(TestNewJapanese);
5878     /*TEST(TestLimitations);*/
5879     TEST(TestNonChars);
5880     TEST(TestExtremeCompression);
5881     TEST(TestSurrogates);
5882     TEST(TestVariableTopSetting);
5883     TEST(TestMaxVariable);
5884     TEST(TestBocsuCoverage);
5885     TEST(TestCyrillicTailoring);
5886     TEST(TestCase);
5887     TEST(IncompleteCntTest);
5888     TEST(BlackBirdTest);
5889     TEST(FunkyATest);
5890     TEST(BillFairmanTest);
5891     TEST(TestChMove);
5892     TEST(TestImplicitTailoring);
5893     TEST(TestFCDProblem);
5894     TEST(TestEmptyRule);
5895     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5896     TEST(TestJ815);
5897     /*TEST(TestJ831);*/ /* we changed lv locale */
5898     TEST(TestBefore);
5899     TEST(TestHangulTailoring);
5900     TEST(TestUCARules);
5901     TEST(TestIncrementalNormalize);
5902     TEST(TestComposeDecompose);
5903     TEST(TestCompressOverlap);
5904     TEST(TestContraction);
5905     TEST(TestExpansion);
5906     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5907     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5908     TEST(TestOptimize);
5909     TEST(TestSuppressContractions);
5910     TEST(Alexis2);
5911     TEST(TestHebrewUCA);
5912     TEST(TestPartialSortKeyTermination);
5913     TEST(TestSettings);
5914     TEST(TestEquals);
5915     TEST(TestJ2726);
5916     TEST(NullRule);
5917     TEST(TestNumericCollation);
5918     TEST(TestTibetanConformance);
5919     TEST(TestPinyinProblem);
5920     TEST(TestSeparateTrees);
5921     TEST(TestBeforePinyin);
5922     TEST(TestBeforeTightening);
5923     /*TEST(TestMoreBefore);*/
5924     TEST(TestTailorNULL);
5925     TEST(TestUpperFirstQuaternary);
5926     TEST(TestJ4960);
5927     TEST(TestJ5223);
5928     TEST(TestJ5232);
5929     TEST(TestJ5367);
5930     TEST(TestHiragana);
5931     TEST(TestSortKeyConsistency);
5932     TEST(TestVI5913);  /* VI, RO tailored rules */
5933     TEST(TestCroatianSortKey);
5934     TEST(TestTailor6179);
5935     TEST(TestUCAPrecontext);
5936     TEST(TestOutOfBuffer5468);
5937     TEST(TestSameStrengthList);
5938
5939     TEST(TestSameStrengthListQuoted);
5940     TEST(TestSameStrengthListSupplemental);
5941     TEST(TestSameStrengthListQwerty);
5942     TEST(TestSameStrengthListQuotedQwerty);
5943     TEST(TestSameStrengthListRanges);
5944     TEST(TestSameStrengthListSupplementalRanges);
5945     TEST(TestSpecialCharacters);
5946     TEST(TestPrivateUseCharacters);
5947     TEST(TestPrivateUseCharactersInList);
5948     TEST(TestPrivateUseCharactersInRange);
5949     TEST(TestInvalidListsAndRanges);
5950     TEST(TestImportRulesDeWithPhonebook);
5951     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5952     /* TEST(TestImportRulesCJKWithUnihan); */
5953     TEST(TestImport);
5954     TEST(TestImportWithType);
5955
5956     TEST(TestBeforeRuleWithScriptReordering);
5957     TEST(TestNonLeadBytesDuringCollationReordering);
5958     TEST(TestReorderingAPI);
5959     TEST(TestReorderingAPIWithRuleCreatedCollator);
5960     TEST(TestEquivalentReorderingScripts);
5961     TEST(TestGreekFirstReorder);
5962     TEST(TestGreekLastReorder);
5963     TEST(TestNonScriptReorder);
5964     TEST(TestHaniReorder);
5965     TEST(TestHaniReorderWithOtherRules);
5966     TEST(TestMultipleReorder);
5967     TEST(TestReorderingAcrossCloning);
5968     TEST(TestReorderWithNumericCollation);
5969
5970     TEST(TestCaseLevelBufferOverflow);
5971     TEST(TestNextSortKeyPartJaIdentical);
5972 }
5973
5974 #endif /* #if !UCONFIG_NO_COLLATION */