Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / source / test / cintltst / cnormtst.c
1 /********************************************************************
2  * COPYRIGHT: 
3  * Copyright (c) 1997-2012, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File CNORMTST.C
9 *
10 * Modification History:
11 *        Name                     Description            
12 *     Madhu Katragadda            Ported for C API
13 *     synwee                      added test for quick check
14 *     synwee                      added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unicode/utf16.h"
20 #include "cintltst.h"
21
22 #if !UCONFIG_NO_NORMALIZATION
23
24 #include <stdlib.h>
25 #include <time.h>
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "unicode/unorm.h"
29 #include "cnormtst.h"
30
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
32
33 static void
34 TestAPI(void);
35
36 static void
37 TestNormCoverage(void);
38
39 static void
40 TestConcatenate(void);
41
42 static void
43 TestNextPrevious(void);
44
45 static void TestIsNormalized(void);
46
47 static void
48 TestFCNFKCClosure(void);
49
50 static void
51 TestQuickCheckPerCP(void);
52
53 static void
54 TestComposition(void);
55
56 static void
57 TestFCD(void);
58
59 static void
60 TestGetDecomposition(void);
61
62 static void
63 TestGetRawDecomposition(void);
64
65 static void TestAppendRestoreMiddle(void);
66 static void TestGetEasyToUseInstance(void);
67
68 static const char* const canonTests[][3] = {
69     /* Input*/                    /*Decomposed*/                /*Composed*/
70     { "cat",                    "cat",                        "cat"                    },
71     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
72
73     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
74     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
75     
76     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
77     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
78     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
79     
80     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
81     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
82
83     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
84     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
85     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
86     
87     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
88     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
89     
90     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
91     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
92
93     { "Henry IV",                "Henry IV",                    "Henry IV"                },
94     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
95
96     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
97     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
98     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
99     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
100     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
101     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
102     { "", "", "" }
103 };
104
105 static const char* const compatTests[][3] = {
106     /* Input*/                        /*Decomposed    */                /*Composed*/
107     { "cat",                        "cat",                            "cat"                },
108
109     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
110
111     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
112     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
113
114     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
115     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
116
117     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
118     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
119     
120     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
121
122     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
123     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
124     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
125     { "", "", "" }
126 };
127
128 static const char* const fcdTests[][3] = {
129     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
130     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
131     { "\\u010e", "\\u010e", NULL }  /* D-caron */
132 };
133
134 void addNormTest(TestNode** root);
135
136 void addNormTest(TestNode** root)
137 {
138     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
139     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
140     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
141     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
142     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
143     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
144     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
145     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
146     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
147     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
148     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
149     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
150     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
151     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
152     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
153     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
154     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
155     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
156     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
157     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
158 }
159
160 static const char* const modeStrings[]={
161     "UNORM_NONE",
162     "UNORM_NFD",
163     "UNORM_NFKD",
164     "UNORM_NFC",
165     "UNORM_NFKC",
166     "UNORM_FCD",
167     "UNORM_MODE_COUNT"
168 };
169
170 static void TestNormCases(UNormalizationMode mode,
171                           const char* const cases[][3], int32_t lengthOfCases) {
172     int32_t x, neededLen, length2;
173     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
174     UChar *source=NULL;
175     UChar result[16];
176     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
177     for(x=0; x < lengthOfCases; x++)
178     {
179         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
180         source=CharsToUChars(cases[x][0]);
181         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
182         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
183         if(neededLen!=length2) {
184           log_err("ERROR in unorm_normalize(%s)[%d]: "
185                   "preflight length/NUL %d!=%d preflight length/srcLength\n",
186                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
187         }
188         if(status==U_BUFFER_OVERFLOW_ERROR)
189         {
190             status=U_ZERO_ERROR;
191         }
192         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status); 
193         if(U_FAILURE(status) || neededLen!=length2) {
194             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
195                          modeStrings[mode], austrdup(source), myErrorName(status));
196         } else {
197             assertEqual(result, cases[x][expIndex], x);
198         }
199         length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status); 
200         if(U_FAILURE(status) || neededLen!=length2) {
201             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
202                          modeStrings[mode], austrdup(source), myErrorName(status));
203         } else {
204             assertEqual(result, cases[x][expIndex], x);
205         }
206         free(source);
207     }
208 }
209
210 void TestDecomp() {
211     TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests));
212 }
213
214 void TestCompatDecomp() {
215     TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests));
216 }
217
218 void TestCanonDecompCompose() {
219     TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests));
220 }
221
222 void TestCompatDecompCompose() {
223     TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests));
224 }
225
226 void TestFCD() {
227     TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests));
228 }
229
230 static void assertEqual(const UChar* result, const char* expected, int32_t index)
231 {
232     UChar *expectedUni = CharsToUChars(expected);
233     if(u_strcmp(result, expectedUni)!=0){
234         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
235             austrdup(result) );
236     }
237     free(expectedUni);
238 }
239
240 static void TestNull_check(UChar *src, int32_t srcLen, 
241                     UChar *exp, int32_t expLen,
242                     UNormalizationMode mode,
243                     const char *name)
244 {
245     UErrorCode status = U_ZERO_ERROR;
246     int32_t len, i;
247
248     UChar   result[50];
249
250
251     status = U_ZERO_ERROR;
252
253     for(i=0;i<50;i++)
254       {
255         result[i] = 0xFFFD;
256       }
257
258     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status); 
259
260     if(U_FAILURE(status)) {
261       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
262     } else if (len != expLen) {
263       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
264     } 
265
266     {
267       for(i=0;i<len;i++){
268         if(exp[i] != result[i]) {
269           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
270                   name,
271                   i,
272                   exp[i],
273                   result[i]);
274           return;
275         }
276         log_verbose("     %d: \\u%04X\n", i, result[i]);
277       }
278     }
279     
280     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
281 }
282
283 void TestNull() 
284 {
285
286     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
287     int32_t source_comp_len = 4;
288     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
289     int32_t expect_comp_len = 3;
290
291     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
292     int32_t source_dcmp_len = 3;
293     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
294     int32_t expect_dcmp_len = 5;
295     
296     TestNull_check(source_comp,
297                    source_comp_len,
298                    expect_comp,
299                    expect_comp_len,
300                    UNORM_NFC,
301                    "UNORM_NFC");
302
303     TestNull_check(source_dcmp,
304                    source_dcmp_len,
305                    expect_dcmp,
306                    expect_dcmp_len,
307                    UNORM_NFD,
308                    "UNORM_NFD");
309
310     TestNull_check(source_comp,
311                    source_comp_len,
312                    expect_comp,
313                    expect_comp_len,
314                    UNORM_NFKC,
315                    "UNORM_NFKC");
316
317
318 }
319
320 static void TestQuickCheckResultNO() 
321 {
322   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C, 
323                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
324   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB, 
325                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
326   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE, 
327                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
328   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE, 
329                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330
331
332   const int SIZE = 10;
333
334   int count = 0;
335   UErrorCode error = U_ZERO_ERROR;
336
337   for (; count < SIZE; count ++)
338   {
339     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 
340                                                               UNORM_NO)
341     {
342       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
343       return;
344     }
345     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 
346                                                               UNORM_NO)
347     {
348       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
349       return;
350     }
351     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 
352                                                               UNORM_NO)
353     {
354       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
355       return;
356     }
357     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 
358                                                               UNORM_NO)
359     {
360       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
361       return;
362     }
363   }
364 }
365
366  
367 static void TestQuickCheckResultYES() 
368 {
369   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A, 
370                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
371   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500, 
372                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
373   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB, 
374                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
375   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000, 
376                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
377
378   const int SIZE = 10;
379   int count = 0;
380   UErrorCode error = U_ZERO_ERROR;
381
382   UChar cp = 0;
383   while (cp < 0xA0)
384   {
385     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
386     {
387       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
388       return;
389     }
390     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) != 
391                                                              UNORM_YES)
392     {
393       log_err("ERROR in NFC quick check at U+%04x\n", cp);
394       return;
395     }
396     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
397     {
398       log_err("ERROR in NFKD quick check at U+%04x\n", cp);
399       return;
400     }
401     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) != 
402                                                              UNORM_YES)
403     {
404       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
405       return;
406     }
407     cp ++;
408   }
409
410   for (; count < SIZE; count ++)
411   {
412     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) != 
413                                                              UNORM_YES)
414     {
415       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
416       return;
417     }
418     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) 
419                                                           != UNORM_YES)
420     {
421       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
422       return;
423     }
424     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) != 
425                                                              UNORM_YES)
426     {
427       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
428       return;
429     }
430     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 
431                                                              UNORM_YES)
432     {
433       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
434       return;
435     }
436   }
437 }
438
439 static void TestQuickCheckResultMAYBE() 
440 {
441   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161, 
442                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
443   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E, 
444                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
445
446
447   const int SIZE = 10;
448
449   int count = 0;
450   UErrorCode error = U_ZERO_ERROR;
451
452   /* NFD and NFKD does not have any MAYBE codepoints */
453   for (; count < SIZE; count ++)
454   {
455     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) != 
456                                                            UNORM_MAYBE)
457     {
458       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
459       return;
460     }
461     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) != 
462                                                            UNORM_MAYBE)
463     {
464       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
465       return;
466     }
467   }
468 }
469
470 static void TestQuickCheckStringResult() 
471 {
472   int count;
473   UChar *d = NULL;
474   UChar *c = NULL;
475   UErrorCode error = U_ZERO_ERROR;
476
477   for (count = 0; count < LENGTHOF(canonTests); count ++)
478   {
479     d = CharsToUChars(canonTests[count][1]);
480     c = CharsToUChars(canonTests[count][2]);
481     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) != 
482                                                             UNORM_YES)
483     {
484       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
485       return;
486     }
487
488     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) == 
489                                                             UNORM_NO)
490     {
491       log_err("ERROR in NFC quick check for string at count %d\n", count);
492       return;
493     }
494
495     free(d);
496     free(c);
497   }
498
499   for (count = 0; count < LENGTHOF(compatTests); count ++)
500   {
501     d = CharsToUChars(compatTests[count][1]);
502     c = CharsToUChars(compatTests[count][2]);
503     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) != 
504                                                             UNORM_YES)
505     {
506       log_err("ERROR in NFKD quick check for string at count %d\n", count);
507       return;
508     }
509
510     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) != 
511                                                             UNORM_YES)
512     {
513       log_err("ERROR in NFKC quick check for string at count %d\n", count);
514       return;
515     }
516
517     free(d);
518     free(c);
519   }  
520 }
521
522 void TestQuickCheck() 
523 {
524   TestQuickCheckResultNO();
525   TestQuickCheckResultYES();
526   TestQuickCheckResultMAYBE();
527   TestQuickCheckStringResult(); 
528 }
529
530 /*
531  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
532  * normalized, and some that are not.
533  * Here we pick some specific cases and test the C API.
534  */
535 static void TestIsNormalized(void) {
536     static const UChar notNFC[][8]={            /* strings that are not in NFC */
537         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
538         { 0xfb1d, 0 },                          /* excluded from composition */
539         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
540         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
541     };
542     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
543         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
544         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
545         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
546     };
547
548     int32_t i;
549     UErrorCode errorCode;
550
551     /* API test */
552
553     /* normal case with length>=0 (length -1 used for special cases below) */
554     errorCode=U_ZERO_ERROR;
555     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
556         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
557     }
558
559     /* incoming U_FAILURE */
560     errorCode=U_TRUNCATED_CHAR_FOUND;
561     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
562     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
563         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
564     }
565
566     /* NULL source */
567     errorCode=U_ZERO_ERROR;
568     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
569     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
570         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
571     }
572
573     /* bad length */
574     errorCode=U_ZERO_ERROR;
575     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
576     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
577         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
578     }
579
580     /* specific cases */
581     for(i=0; i<LENGTHOF(notNFC); ++i) {
582         errorCode=U_ZERO_ERROR;
583         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
584             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
585         }
586         errorCode=U_ZERO_ERROR;
587         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
588             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
589         }
590     }
591     for(i=0; i<LENGTHOF(notNFKC); ++i) {
592         errorCode=U_ZERO_ERROR;
593         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
594             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
595         }
596     }
597 }
598
599 void TestCheckFCD() 
600 {
601   UErrorCode status = U_ZERO_ERROR;
602   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 
603                          0x0A}; 
604   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301, 
605                           0x02B9, 0x0314, 0x0315, 0x0316};
606   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7, 
607                          0x0050, 0x0730, 0x09EE, 0x1E10};
608
609   static const UChar datastr[][5] = 
610   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
611     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
612     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
613     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
614   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
615
616   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 
617                             0x6a,
618                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 
619                             0xea,
620                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 
621                             0x0307, 0x0308, 0x0309, 0x030a, 
622                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
623                             0x0327, 0x0328, 0x0329, 0x032a,
624                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 
625                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
626
627   int count = 0;
628   
629   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
630     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
631   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
632     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
633   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
634     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
635
636   if (U_FAILURE(status))
637     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
638
639   while (count < 4)
640   {
641     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
642     if (U_FAILURE(status)) {
643       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
644       break;
645     }
646     else {
647       if (result[count] != fcdresult) {
648         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count, 
649                  result[count]);
650       }
651     }
652     count ++;
653   }
654
655   /* random checks of long strings */
656   status = U_ZERO_ERROR;
657   srand((unsigned)time( NULL ));
658
659   for (count = 0; count < 50; count ++)
660   {
661     int size = 0;
662     UBool testresult = UNORM_YES;
663     UChar data[20];
664     UChar norm[100];
665     UChar nfd[100];
666     int normsize = 0;
667     int nfdsize = 0;
668     
669     while (size != 19) {
670       data[size] = datachar[(rand() * 50) / RAND_MAX];
671       log_verbose("0x%x", data[size]);
672       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0, 
673                                   norm + normsize, 100 - normsize, &status);       
674       if (U_FAILURE(status)) {
675         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
676         break;
677       }
678       size ++;
679     }
680     log_verbose("\n");
681
682     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0, 
683                               nfd, 100, &status);       
684     if (U_FAILURE(status)) {
685       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
686     }
687
688     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
689       testresult = UNORM_NO;
690     }
691     if (testresult == UNORM_YES) {
692       log_verbose("result UNORM_YES\n");
693     }
694     else {
695       log_verbose("result UNORM_NO\n");
696     }
697
698     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
699       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
700     }
701   }
702 }
703
704 static void
705 TestAPI() {
706     static const UChar in[]={ 0x68, 0xe4 };
707     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
708     UErrorCode errorCode;
709     int32_t length;
710
711     /* try preflighting */
712     errorCode=U_ZERO_ERROR;
713     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
714     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
715         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
716         return;
717     }
718
719     errorCode=U_ZERO_ERROR;
720     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
721     if(U_FAILURE(errorCode)) {
722         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
723         return;
724     }
725     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
726         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
727         return;
728     }
729     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
730     if(U_FAILURE(errorCode)) {
731         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
732         return;
733     }
734     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
735     if(U_FAILURE(errorCode)) {
736         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
737         return;
738     }
739 }
740
741 /* test cases to improve test code coverage */
742 enum {
743     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
744     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
745     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
746
747     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
748     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
749     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
750
751     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
752     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
753
754     MUSICAL_VOID_NOTEHEAD=0x1d157,
755     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
756     MUSICAL_STEM=0x1d165,       /* cc=216 */
757     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
758 };
759
760 static void
761 TestNormCoverage() {
762     UChar input[1000], expect[1000], output[1000];
763     UErrorCode errorCode;
764     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
765
766     /* create a long and nasty string with NFKC-unsafe characters */
767     inLength=0;
768
769     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
770     input[inLength++]=HANGUL_KIYEOK;
771     input[inLength++]=HANGUL_WEO;
772     input[inLength++]=HANGUL_KIYEOK_SIOS;
773
774     input[inLength++]=HANGUL_KIYEOK;
775     input[inLength++]=HANGUL_WEO;
776     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
777
778     input[inLength++]=HANGUL_KIYEOK;
779     input[inLength++]=HANGUL_K_WEO;
780     input[inLength++]=HANGUL_KIYEOK_SIOS;
781
782     input[inLength++]=HANGUL_KIYEOK;
783     input[inLength++]=HANGUL_K_WEO;
784     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
785
786     input[inLength++]=HANGUL_K_KIYEOK;
787     input[inLength++]=HANGUL_WEO;
788     input[inLength++]=HANGUL_KIYEOK_SIOS;
789
790     input[inLength++]=HANGUL_K_KIYEOK;
791     input[inLength++]=HANGUL_WEO;
792     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
793
794     input[inLength++]=HANGUL_K_KIYEOK;
795     input[inLength++]=HANGUL_K_WEO;
796     input[inLength++]=HANGUL_KIYEOK_SIOS;
797
798     input[inLength++]=HANGUL_K_KIYEOK;
799     input[inLength++]=HANGUL_K_WEO;
800     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
801
802     /* Hangul LV with normal/compatibility Jamo T */
803     input[inLength++]=HANGUL_AC00;
804     input[inLength++]=HANGUL_KIYEOK_SIOS;
805
806     input[inLength++]=HANGUL_AC00;
807     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
808
809     /* compatibility Jamo L, V */
810     input[inLength++]=HANGUL_K_KIYEOK;
811     input[inLength++]=HANGUL_K_WEO;
812
813     hangulPrefixLength=inLength;
814
815     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
816     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
817     for(i=0; i<200; ++i) {
818         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
819         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
820         input[inLength++]=U16_LEAD(MUSICAL_STEM);
821         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
822     }
823
824     /* (compatibility) Jamo L, T do not compose */
825     input[inLength++]=HANGUL_K_KIYEOK;
826     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
827
828     /* quick checks */
829     errorCode=U_ZERO_ERROR;
830     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
831         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
832     }
833     errorCode=U_ZERO_ERROR;
834     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
835         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
836     }
837     errorCode=U_ZERO_ERROR;
838     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
839         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840     }
841     errorCode=U_ZERO_ERROR;
842     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
843         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844     }
845     errorCode=U_ZERO_ERROR;
846     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
847         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
848     }
849
850     /* NFKC */
851     expectLength=0;
852     expect[expectLength++]=HANGUL_SYLLABLE;
853
854     expect[expectLength++]=HANGUL_SYLLABLE;
855
856     expect[expectLength++]=HANGUL_SYLLABLE;
857
858     expect[expectLength++]=HANGUL_SYLLABLE;
859
860     expect[expectLength++]=HANGUL_SYLLABLE;
861
862     expect[expectLength++]=HANGUL_SYLLABLE;
863
864     expect[expectLength++]=HANGUL_SYLLABLE;
865
866     expect[expectLength++]=HANGUL_SYLLABLE;
867
868     expect[expectLength++]=HANGUL_AC00+3;
869
870     expect[expectLength++]=HANGUL_AC00+3;
871
872     expect[expectLength++]=HANGUL_AC00+14*28;
873
874     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
875     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
876     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
877     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
878     for(i=0; i<200; ++i) {
879         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
880         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
881     }
882     for(i=0; i<200; ++i) {
883         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
884         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
885     }
886
887     expect[expectLength++]=HANGUL_KIYEOK;
888     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
889
890     /* try destination overflow first */
891     errorCode=U_ZERO_ERROR;
892     preflightLength=unorm_normalize(input, inLength,
893                            UNORM_NFKC, 0,
894                            output, 100, /* too short */
895                            &errorCode);
896     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
897         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
898     }
899
900     /* real NFKC */
901     errorCode=U_ZERO_ERROR;
902     length=unorm_normalize(input, inLength,
903                            UNORM_NFKC, 0,
904                            output, sizeof(output)/U_SIZEOF_UCHAR,
905                            &errorCode);
906     if(U_FAILURE(errorCode)) {
907         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
908     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
909         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
910         for(i=0; i<length; ++i) {
911             if(output[i]!=expect[i]) {
912                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
913                 break;
914             }
915         }
916     }
917     if(length!=preflightLength) {
918         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
919     }
920
921     /* FCD */
922     u_memcpy(expect, input, hangulPrefixLength);
923     expectLength=hangulPrefixLength;
924
925     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
926     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
927     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
928     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
929     for(i=0; i<200; ++i) {
930         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
931         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
932     }
933     for(i=0; i<200; ++i) {
934         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
935         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
936     }
937
938     expect[expectLength++]=HANGUL_K_KIYEOK;
939     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
940
941     errorCode=U_ZERO_ERROR;
942     length=unorm_normalize(input, inLength,
943                            UNORM_FCD, 0,
944                            output, sizeof(output)/U_SIZEOF_UCHAR,
945                            &errorCode);
946     if(U_FAILURE(errorCode)) {
947         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
948     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
949         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
950         for(i=0; i<length; ++i) {
951             if(output[i]!=expect[i]) {
952                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
953                 break;
954             }
955         }
956     }
957 }
958
959 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
960 static void
961 TestConcatenate(void) {
962     /* "re + 'sume'" */
963     static const UChar
964     left[]={
965         0x72, 0x65, 0
966     },
967     right[]={
968         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
969     },
970     expect[]={
971         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
972     };
973
974     UChar buffer[100];
975     UErrorCode errorCode;
976     int32_t length;
977
978     /* left with length, right NUL-terminated */
979     errorCode=U_ZERO_ERROR;
980     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
981     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
982         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
983     }
984
985     /* preflighting */
986     errorCode=U_ZERO_ERROR;
987     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
988     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
989         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
990     }
991
992     buffer[2]=0x5555;
993     errorCode=U_ZERO_ERROR;
994     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
995     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
996         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
997     }
998
999     /* enter with U_FAILURE */
1000     buffer[2]=0xaaaa;
1001     errorCode=U_UNEXPECTED_TOKEN;
1002     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1003     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1004         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1005     }
1006
1007     /* illegal arguments */
1008     buffer[2]=0xaaaa;
1009     errorCode=U_ZERO_ERROR;
1010     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1011     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1012         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1013     }
1014
1015     errorCode=U_ZERO_ERROR;
1016     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1017     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1018         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1019     }
1020 }
1021
1022 enum {
1023     _PLUS=0x2b
1024 };
1025
1026 static const char *const _modeString[UNORM_MODE_COUNT]={
1027     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1028 };
1029
1030 static void
1031 _testIter(const UChar *src, int32_t srcLength,
1032           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1033           const UChar *out, int32_t outLength,
1034           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1035     UChar buffer[4];
1036     const UChar *expect, *outLimit, *in;
1037     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1038     UErrorCode errorCode;
1039     UBool neededToNormalize, expectNeeded;
1040
1041     errorCode=U_ZERO_ERROR;
1042     outLimit=out+outLength;
1043     if(forward) {
1044         expect=out;
1045         i=index=0;
1046     } else {
1047         expect=outLimit;
1048         i=srcIndexesLength-2;
1049         index=srcLength;
1050     }
1051
1052     for(;;) {
1053         prevIndex=index;
1054         if(forward) {
1055             if(!iter->hasNext(iter)) {
1056                 return;
1057             }
1058             length=unorm_next(iter,
1059                               buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1060                               mode, 0,
1061                               (UBool)(out!=NULL), &neededToNormalize,
1062                               &errorCode);
1063             expectIndex=srcIndexes[i+1];
1064             in=src+prevIndex;
1065             inLength=expectIndex-prevIndex;
1066
1067             if(out!=NULL) {
1068                 /* get output piece from between plus signs */
1069                 expectLength=0;
1070                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1071                     ++expectLength;
1072                 }
1073                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1074             } else {
1075                 expect=in;
1076                 expectLength=inLength;
1077                 expectNeeded=FALSE;
1078             }
1079         } else {
1080             if(!iter->hasPrevious(iter)) {
1081                 return;
1082             }
1083             length=unorm_previous(iter,
1084                                   buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1085                                   mode, 0,
1086                                   (UBool)(out!=NULL), &neededToNormalize,
1087                                   &errorCode);
1088             expectIndex=srcIndexes[i];
1089             in=src+expectIndex;
1090             inLength=prevIndex-expectIndex;
1091
1092             if(out!=NULL) {
1093                 /* get output piece from between plus signs */
1094                 expectLength=0;
1095                 while(expect!=out && expect[-1]!=_PLUS) {
1096                     ++expectLength;
1097                     --expect;
1098                 }
1099                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1100             } else {
1101                 expect=in;
1102                 expectLength=inLength;
1103                 expectNeeded=FALSE;
1104             }
1105         }
1106         index=iter->getIndex(iter, UITER_CURRENT);
1107
1108         if(U_FAILURE(errorCode)) {
1109             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1110                     forward, _modeString[mode], i, u_errorName(errorCode));
1111             return;
1112         }
1113         if(expectIndex!=index) {
1114             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1115                     forward, _modeString[mode], i, index, expectIndex);
1116             return;
1117         }
1118         if(expectLength!=length) {
1119             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1120                     forward, _modeString[mode], i, length, expectLength);
1121             return;
1122         }
1123         if(0!=u_memcmp(expect, buffer, length)) {
1124             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1125                     forward, _modeString[mode], i);
1126             return;
1127         }
1128         if(neededToNormalize!=expectNeeded) {
1129         }
1130
1131         if(forward) {
1132             expect+=expectLength+1; /* go after the + */
1133             ++i;
1134         } else {
1135             --expect; /* go before the + */
1136             --i;
1137         }
1138     }
1139 }
1140
1141 static void
1142 TestNextPrevious() {
1143     static const UChar
1144     src[]={ /* input string */
1145         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1146     },
1147     nfd[]={ /* + separates expected output pieces */
1148         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1149     },
1150     nfkd[]={
1151         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1152     },
1153     nfc[]={
1154         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1155     },
1156     nfkc[]={
1157         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1158     },
1159     fcd[]={
1160         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1161     };
1162
1163     /* expected iterator indexes in the source string for each iteration piece */
1164     static const int32_t
1165     nfdIndexes[]={
1166         0, 1, 2, 5, 6, 7
1167     },
1168     nfkdIndexes[]={
1169         0, 1, 2, 5, 6, 7
1170     },
1171     nfcIndexes[]={
1172         0, 1, 2, 5, 6, 7
1173     },
1174     nfkcIndexes[]={
1175         0, 1, 2, 5, 7
1176     },
1177     fcdIndexes[]={
1178         0, 1, 2, 5, 6, 7
1179     };
1180
1181     UCharIterator iter;
1182
1183     UChar buffer[4];
1184     int32_t length;
1185
1186     UBool neededToNormalize;
1187     UErrorCode errorCode;
1188
1189     uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1190
1191     /* test iteration with doNormalize */
1192     iter.index=0;
1193     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1194     iter.index=0;
1195     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1196     iter.index=0;
1197     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1198     iter.index=0;
1199     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1200     iter.index=0;
1201     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1202
1203     iter.index=iter.length;
1204     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1205     iter.index=iter.length;
1206     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1207     iter.index=iter.length;
1208     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1209     iter.index=iter.length;
1210     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1211     iter.index=iter.length;
1212     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1213
1214     /* test iteration without doNormalize */
1215     iter.index=0;
1216     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1217     iter.index=0;
1218     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1219     iter.index=0;
1220     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1221     iter.index=0;
1222     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1223     iter.index=0;
1224     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1225
1226     iter.index=iter.length;
1227     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1228     iter.index=iter.length;
1229     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1230     iter.index=iter.length;
1231     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1232     iter.index=iter.length;
1233     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1234     iter.index=iter.length;
1235     _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1236
1237     /* try without neededToNormalize */
1238     errorCode=U_ZERO_ERROR;
1239     buffer[0]=5;
1240     iter.index=1;
1241     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1242                       UNORM_NFD, 0, TRUE, NULL,
1243                       &errorCode);
1244     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1245         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1246         return;
1247     }
1248
1249     /* preflight */
1250     neededToNormalize=9;
1251     iter.index=1;
1252     length=unorm_next(&iter, NULL, 0,
1253                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1254                       &errorCode);
1255     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1256         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1257         return;
1258     }
1259
1260     errorCode=U_ZERO_ERROR;
1261     buffer[0]=buffer[1]=5;
1262     neededToNormalize=9;
1263     iter.index=1;
1264     length=unorm_next(&iter, buffer, 1,
1265                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1266                       &errorCode);
1267     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1268         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1269         return;
1270     }
1271
1272     /* no iterator */
1273     errorCode=U_ZERO_ERROR;
1274     buffer[0]=buffer[1]=5;
1275     neededToNormalize=9;
1276     iter.index=1;
1277     length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1278                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1279                       &errorCode);
1280     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1281         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1282         return;
1283     }
1284
1285     /* illegal mode */
1286     buffer[0]=buffer[1]=5;
1287     neededToNormalize=9;
1288     iter.index=1;
1289     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1290                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1291                       &errorCode);
1292     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1293         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1294         return;
1295     }
1296
1297     /* error coming in */
1298     errorCode=U_MISPLACED_QUANTIFIER;
1299     buffer[0]=5;
1300     iter.index=1;
1301     length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1302                       UNORM_NFD, 0, TRUE, NULL,
1303                       &errorCode);
1304     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1305         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1306         return;
1307     }
1308 }
1309
1310 static void
1311 TestFCNFKCClosure(void) {
1312     static const struct {
1313         UChar32 c;
1314         const UChar s[6];
1315     } tests[]={
1316         { 0x00C4, { 0 } },
1317         { 0x00E4, { 0 } },
1318         { 0x037A, { 0x0020, 0x03B9, 0 } },
1319         { 0x03D2, { 0x03C5, 0 } },
1320         { 0x20A8, { 0x0072, 0x0073, 0 } },
1321         { 0x210B, { 0x0068, 0 } },
1322         { 0x210C, { 0x0068, 0 } },
1323         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1324         { 0x2122, { 0x0074, 0x006D, 0 } },
1325         { 0x2128, { 0x007A, 0 } },
1326         { 0x1D5DB, { 0x0068, 0 } },
1327         { 0x1D5ED, { 0x007A, 0 } },
1328         { 0x0061, { 0 } }
1329     };
1330
1331     UChar buffer[8];
1332     UErrorCode errorCode;
1333     int32_t i, length;
1334
1335     for(i=0; i<LENGTHOF(tests); ++i) {
1336         errorCode=U_ZERO_ERROR;
1337         length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
1338         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1339             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1340         }
1341     }
1342
1343     /* error handling */
1344     errorCode=U_ZERO_ERROR;
1345     length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
1346     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1347         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1348     }
1349
1350     length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
1351     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1352         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1353     }
1354 }
1355
1356 static void
1357 TestQuickCheckPerCP() {
1358     UErrorCode errorCode;
1359     UChar32 c, lead, trail;
1360     UChar s[U16_MAX_LENGTH], nfd[16];
1361     int32_t length, lccc1, lccc2, tccc1, tccc2;
1362     int32_t qc1, qc2;
1363
1364     if(
1365         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1366         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1368         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1370         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1371     ) {
1372         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1373     }
1374
1375     /*
1376      * compare the quick check property values for some code points
1377      * to the quick check results for checking same-code point strings
1378      */
1379     errorCode=U_ZERO_ERROR;
1380     c=0;
1381     while(c<0x110000) {
1382         length=0;
1383         U16_APPEND_UNSAFE(s, length, c);
1384
1385         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1386         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1387         if(qc1!=qc2) {
1388             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1389         }
1390
1391         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1392         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1393         if(qc1!=qc2) {
1394             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1395         }
1396
1397         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1398         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1399         if(qc1!=qc2) {
1400             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1401         }
1402
1403         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1404         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1405         if(qc1!=qc2) {
1406             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1407         }
1408
1409         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
1410         /* length-length == 0 is used to get around a compiler warning. */
1411         U16_GET(nfd, 0, length-length, length, lead);
1412         U16_GET(nfd, 0, length-1, length, trail);
1413
1414         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1415         lccc2=u_getCombiningClass(lead);
1416         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1417         tccc2=u_getCombiningClass(trail);
1418
1419         if(lccc1!=lccc2) {
1420             log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1421                     lccc1, lccc2, c);
1422         }
1423         if(tccc1!=tccc2) {
1424             log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1425                     tccc1, tccc2, c);
1426         }
1427
1428         /* skip some code points */
1429         c=(20*c)/19+1;
1430     }
1431 }
1432
1433 static void
1434 TestComposition(void) {
1435     static const struct {
1436         UNormalizationMode mode;
1437         uint32_t options;
1438         UChar input[12];
1439         UChar expect[12];
1440     } cases[]={
1441         /*
1442          * special cases for UAX #15 bug
1443          * see Unicode Corrigendum #5: Normalization Idempotency
1444          * at http://unicode.org/versions/corrigendum5.html
1445          * (was Public Review Issue #29)
1446          */
1447         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1448         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1449         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1450         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1451
1452         /* TODO: add test cases for UNORM_FCC here (j2151) */
1453     };
1454
1455     UChar output[16];
1456     UErrorCode errorCode;
1457     int32_t i, length;
1458
1459     for(i=0; i<LENGTHOF(cases); ++i) {
1460         errorCode=U_ZERO_ERROR;
1461         length=unorm_normalize(
1462                     cases[i].input, -1,
1463                     cases[i].mode, cases[i].options,
1464                     output, LENGTHOF(output),
1465                     &errorCode);
1466         if( U_FAILURE(errorCode) ||
1467             length!=u_strlen(cases[i].expect) ||
1468             0!=u_memcmp(output, cases[i].expect, length)
1469         ) {
1470             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1471         }
1472     }
1473 }
1474
1475 static void
1476 TestGetDecomposition() {
1477     UChar decomp[32];
1478     int32_t length;
1479
1480     UErrorCode errorCode=U_ZERO_ERROR;
1481     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1482     if(U_FAILURE(errorCode)) {
1483         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1484         return;
1485     }
1486
1487     length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1488     if(U_FAILURE(errorCode) || length>=0) {
1489         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1490     }
1491     errorCode=U_ZERO_ERROR;
1492     length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1493     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1494         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1495     }
1496     errorCode=U_ZERO_ERROR;
1497     length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1498     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1499         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1500     }
1501     errorCode=U_ZERO_ERROR;
1502     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1503     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1504         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1505     }
1506     errorCode=U_ZERO_ERROR;
1507     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1508     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1509         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1510     }
1511     errorCode=U_ZERO_ERROR;
1512     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1513     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1514         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1515     }
1516 }
1517
1518 static void
1519 TestGetRawDecomposition() {
1520     UChar decomp[32];
1521     int32_t length;
1522
1523     UErrorCode errorCode=U_ZERO_ERROR;
1524     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1525     if(U_FAILURE(errorCode)) {
1526         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1527         return;
1528     }
1529     /*
1530      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1531      * without recursive decomposition.
1532      */
1533
1534     length=unorm2_getRawDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1535     if(U_FAILURE(errorCode) || length>=0) {
1536         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1537     }
1538     errorCode=U_ZERO_ERROR;
1539     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1540     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1541         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1542     }
1543     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1544     errorCode=U_ZERO_ERROR;
1545     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, LENGTHOF(decomp), &errorCode);
1546     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1547         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1548     }
1549     /* U+212B ANGSTROM SIGN */
1550     errorCode=U_ZERO_ERROR;
1551     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, LENGTHOF(decomp), &errorCode);
1552     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1553         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1554     }
1555     errorCode=U_ZERO_ERROR;
1556     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, LENGTHOF(decomp), &errorCode);
1557     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1558         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1559     }
1560     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1561     errorCode=U_ZERO_ERROR;
1562     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1563     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1564         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1565     }
1566     errorCode=U_ZERO_ERROR;
1567     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1568     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1569         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1570     }
1571     errorCode=U_ZERO_ERROR;
1572     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1573     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1574         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1575     }
1576     errorCode=U_ZERO_ERROR;
1577     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1578     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1579         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1580     }
1581 }
1582
1583 static void
1584 TestAppendRestoreMiddle() {
1585     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1586     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1587     /* NFC: C5 is 'A with ring above' */
1588     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1589     int32_t length;
1590     UErrorCode errorCode=U_ZERO_ERROR;
1591     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1592     if(U_FAILURE(errorCode)) {
1593         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1594         return;
1595     }
1596     /*
1597      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1598      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1599      * still fits into a[] but the full result still overflows this capacity.
1600      * (Let it modify the destination buffer before reallocating internally.)
1601      */
1602     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1603     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(expected)) {
1604         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1605         return;
1606     }
1607     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1608     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1609         log_err("unorm2_append(overflow) modified the first string\n");
1610         return;
1611     }
1612     errorCode=U_ZERO_ERROR;
1613     length=unorm2_append(n2, a, -1, LENGTHOF(a), b, -1, &errorCode);
1614     if(U_FAILURE(errorCode) || length!=LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1615         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1616         return;
1617     }
1618 }
1619
1620 static void
1621 TestGetEasyToUseInstance() {
1622     static const UChar in[]={
1623         0xA0,  /* -> <noBreak> 0020 */
1624         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1625     };
1626     UChar out[32];
1627     int32_t length;
1628
1629     UErrorCode errorCode=U_ZERO_ERROR;
1630     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1631     if(U_FAILURE(errorCode)) {
1632         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1633         return;
1634     }
1635     length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1636     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1637         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1638                 (int)length, u_errorName(errorCode));
1639     }
1640
1641     errorCode=U_ZERO_ERROR;
1642     n2=unorm2_getNFDInstance(&errorCode);
1643     if(U_FAILURE(errorCode)) {
1644         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1645         return;
1646     }
1647     length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1648     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1649         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1650                 (int)length, u_errorName(errorCode));
1651     }
1652
1653     errorCode=U_ZERO_ERROR;
1654     n2=unorm2_getNFKCInstance(&errorCode);
1655     if(U_FAILURE(errorCode)) {
1656         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1657         return;
1658     }
1659     length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1660     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1661         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1662                 (int)length, u_errorName(errorCode));
1663     }
1664
1665     errorCode=U_ZERO_ERROR;
1666     n2=unorm2_getNFKDInstance(&errorCode);
1667     if(U_FAILURE(errorCode)) {
1668         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1669         return;
1670     }
1671     length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1672     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1673         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1674                 (int)length, u_errorName(errorCode));
1675     }
1676
1677     errorCode=U_ZERO_ERROR;
1678     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1679     if(U_FAILURE(errorCode)) {
1680         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1681         return;
1682     }
1683     length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1684     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1685         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1686                 (int)length, u_errorName(errorCode));
1687     }
1688 }
1689
1690 #endif /* #if !UCONFIG_NO_NORMALIZATION */