1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2001-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: trietest.c
12 * tab size: 8 (not used)
15 * created on: 2001nov20
16 * created by: Markus W. Scherer
20 #include "unicode/utypes.h"
21 #include "unicode/utf16.h"
29 /* definitions from standalone utrie development */
30 #define log_err printf
31 #define log_verbose printf
34 #define u_errorName(errorCode) "some error code"
37 /* Values for setting possibly overlapping, out-of-order ranges of values */
38 typedef struct SetRange {
46 * value is set from the previous boundary's limit to before
47 * this boundary's limit
49 typedef struct CheckRange {
55 static uint32_t U_CALLCONV
56 _testFoldedValue32(UNewTrie *trie, UChar32 start, int32_t offset) {
57 uint32_t foldedValue, value;
65 value=utrie_get32(trie, start, &inBlockZero);
67 start+=UTRIE_DATA_BLOCK_LENGTH;
75 return ((uint32_t)offset<<16)|foldedValue;
81 static int32_t U_CALLCONV
82 _testFoldingOffset32(uint32_t data) {
83 return (int32_t)(data>>16);
86 static uint32_t U_CALLCONV
87 _testFoldedValue16(UNewTrie *trie, UChar32 start, int32_t offset) {
88 uint32_t foldedValue, value;
96 value=utrie_get32(trie, start, &inBlockZero);
98 start+=UTRIE_DATA_BLOCK_LENGTH;
106 return (uint32_t)(offset|0x8000);
112 static int32_t U_CALLCONV
113 _testFoldingOffset16(uint32_t data) {
115 return (int32_t)(data&0x7fff);
121 static uint32_t U_CALLCONV
122 _testEnumValue(const void *context, uint32_t value) {
126 static UBool U_CALLCONV
127 _testEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
128 const CheckRange **pb=(const CheckRange **)context;
129 const CheckRange *b=(*pb)++;
132 if(start!=(b-1)->limit || limit!=b->limit || value!=b->value) {
133 log_err("error: utrie_enum() delivers wrong range [U+%04lx..U+%04lx[.0x%lx instead of [U+%04lx..U+%04lx[.0x%lx\n",
135 (b-1)->limit, b->limit, b->value);
141 testTrieIteration(const char *testName,
143 const CheckRange checkRanges[], int32_t countCheckRanges) {
147 const UChar *p, *limit;
151 int32_t i, length, countValues;
155 length=countValues=0;
156 for(i=0; i<countCheckRanges; ++i) {
157 c=checkRanges[i].limit;
160 U16_APPEND_UNSAFE(s, length, c);
161 values[countValues++]=checkRanges[i].value;
171 if(trie->data32!=NULL) {
172 UTRIE_NEXT32(trie, p, limit, c, c2, value);
174 UTRIE_NEXT16(trie, p, limit, c, c2, value);
176 if(value!=values[i]) {
177 log_err("error: wrong value from UTRIE_NEXT(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
178 testName, c, c2, value, values[i]);
183 !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
185 log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
192 if(trie->data32==NULL) {
193 value=UTRIE_GET16_FROM_LEAD(trie, c);
194 offset=trie->getFoldingOffset(value);
196 value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
198 value=trie->initialValue;
201 value=UTRIE_GET32_FROM_LEAD(trie, c);
202 offset=trie->getFoldingOffset(value);
204 value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
206 value=trie->initialValue;
209 if(value!=values[i]) {
210 log_err("error: wrong value from UTRIE_GETXX_FROM_OFFSET_TRAIL(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
211 testName, c, c2, value, values[i]);
216 if(trie->data32==NULL) {
217 UTRIE_GET16_FROM_PAIR(trie, c, c2, value);
219 UTRIE_GET32_FROM_PAIR(trie, c, c2, value);
221 if(value!=values[i]) {
222 log_err("error: wrong value from UTRIE_GETXX_FROM_PAIR(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
223 testName, c, c2, value, values[i]);
235 if(trie->data32!=NULL) {
236 UTRIE_PREVIOUS32(trie, s, p, c, c2, value);
238 UTRIE_PREVIOUS16(trie, s, p, c, c2, value);
240 if(value!=values[i]) {
241 log_err("error: wrong value from UTRIE_PREVIOUS(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
242 testName, c, c2, value, values[i]);
247 !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
249 log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
256 testTrieRangesWithMalloc(const char *testName,
257 const SetRange setRanges[], int32_t countSetRanges,
258 const CheckRange checkRanges[], int32_t countCheckRanges,
259 UBool dataIs32, UBool latin1Linear) {
260 UTrieGetFoldingOffset *getFoldingOffset;
261 const CheckRange *enumRanges;
264 uint32_t value, value2;
265 UChar32 start, limit;
267 UErrorCode errorCode;
269 uint8_t* storage =NULL;
270 static const int32_t DEFAULT_STORAGE_SIZE = 32768;
271 storage = (uint8_t*) uprv_malloc(sizeof(uint8_t)*DEFAULT_STORAGE_SIZE);
273 log_verbose("\ntesting Trie '%s'\n", testName);
274 newTrie=utrie_open(NULL, NULL, 2000,
275 checkRanges[0].value, checkRanges[0].value,
278 /* set values from setRanges[] */
280 for(i=0; i<countSetRanges; ++i) {
281 start=setRanges[i].start;
282 limit=setRanges[i].limit;
283 value=setRanges[i].value;
284 overwrite=setRanges[i].overwrite;
285 if((limit-start)==1 && overwrite) {
286 ok&=utrie_set32(newTrie, start, value);
288 ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
292 log_err("error: setting values into a trie failed (%s)\n", testName);
296 /* verify that all these values are in the new Trie */
298 for(i=0; i<countCheckRanges; ++i) {
299 limit=checkRanges[i].limit;
300 value=checkRanges[i].value;
303 if(value!=utrie_get32(newTrie, start, NULL)) {
304 log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
305 testName, start, utrie_get32(newTrie, start, NULL), value);
312 getFoldingOffset=_testFoldingOffset32;
314 getFoldingOffset=_testFoldingOffset16;
317 errorCode=U_ZERO_ERROR;
318 length=utrie_serialize(newTrie, storage, DEFAULT_STORAGE_SIZE,
319 dataIs32 ? _testFoldedValue32 : _testFoldedValue16,
322 if(U_FAILURE(errorCode)) {
323 log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
324 utrie_close(newTrie);
328 /* test linear Latin-1 range from utrie_getData() */
333 data=utrie_getData(newTrie, &dataLength);
335 for(i=0; i<countCheckRanges && start<=0xff; ++i) {
336 limit=checkRanges[i].limit;
337 value=checkRanges[i].value;
339 while(start<limit && start<=0xff) {
340 if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
341 log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
342 testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
349 utrie_close(newTrie);
351 errorCode=U_ZERO_ERROR;
352 if(!utrie_unserialize(&trie, storage, length, &errorCode)) {
353 log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
356 trie.getFoldingOffset=getFoldingOffset;
358 if(dataIs32!=(trie.data32!=NULL)) {
359 log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
361 if(latin1Linear!=trie.isLatin1Linear) {
362 log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
365 /* verify that all these values are in the unserialized Trie */
367 for(i=0; i<countCheckRanges; ++i) {
368 limit=checkRanges[i].limit;
369 value=checkRanges[i].value;
372 /* skip surrogates */
380 value2=UTRIE_GET32_FROM_BMP(&trie, start);
382 value2=UTRIE_GET16_FROM_BMP(&trie, start);
385 log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
386 testName, start, value2, value);
388 if(!U16_IS_LEAD(start)) {
390 value2=UTRIE_GET32_FROM_LEAD(&trie, start);
392 value2=UTRIE_GET16_FROM_LEAD(&trie, start);
395 log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
396 testName, start, value2, value);
401 UTRIE_GET32(&trie, start, value2);
403 UTRIE_GET16(&trie, start, value2);
406 log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
407 testName, start, value2, value);
413 /* enumerate and verify all ranges */
414 enumRanges=checkRanges+1;
415 utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);
417 /* test linear Latin-1 range */
418 if(trie.isLatin1Linear) {
419 if(trie.data32!=NULL) {
420 const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);
422 for(start=0; start<0x100; ++start) {
423 if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
424 log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
425 testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
429 const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);
431 for(start=0; start<0x100; ++start) {
432 if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
433 log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
434 testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
440 testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
445 testTrieRanges(const char *testName,
446 const SetRange setRanges[], int32_t countSetRanges,
447 const CheckRange checkRanges[], int32_t countCheckRanges,
448 UBool dataIs32, UBool latin1Linear) {
450 double bogus; /* needed for aligining the storage */
451 uint8_t storage[32768];
453 UTrieGetFoldingOffset *getFoldingOffset;
454 UNewTrieGetFoldedValue *getFoldedValue;
455 const CheckRange *enumRanges;
458 uint32_t value, value2;
459 UChar32 start, limit;
461 UErrorCode errorCode;
464 log_verbose("\ntesting Trie '%s'\n", testName);
465 newTrie=utrie_open(NULL, NULL, 2000,
466 checkRanges[0].value, checkRanges[0].value,
469 /* set values from setRanges[] */
471 for(i=0; i<countSetRanges; ++i) {
472 start=setRanges[i].start;
473 limit=setRanges[i].limit;
474 value=setRanges[i].value;
475 overwrite=setRanges[i].overwrite;
476 if((limit-start)==1 && overwrite) {
477 ok&=utrie_set32(newTrie, start, value);
479 ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
483 log_err("error: setting values into a trie failed (%s)\n", testName);
487 /* verify that all these values are in the new Trie */
489 for(i=0; i<countCheckRanges; ++i) {
490 limit=checkRanges[i].limit;
491 value=checkRanges[i].value;
494 if(value!=utrie_get32(newTrie, start, NULL)) {
495 log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
496 testName, start, utrie_get32(newTrie, start, NULL), value);
503 getFoldingOffset=_testFoldingOffset32;
504 getFoldedValue=_testFoldedValue32;
506 getFoldingOffset=_testFoldingOffset16;
507 getFoldedValue=_testFoldedValue16;
511 * code coverage for utrie.c/defaultGetFoldedValue(),
512 * pick some combination of parameters for selecting the UTrie defaults
514 if(!dataIs32 && latin1Linear) {
515 getFoldingOffset=NULL;
519 errorCode=U_ZERO_ERROR;
520 length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage),
524 if(U_FAILURE(errorCode)) {
525 log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
526 utrie_close(newTrie);
529 if (length >= (int32_t)sizeof(storageHolder.storage)) {
530 log_err("error: utrie_serialize(%s) needs more memory\n", testName);
531 utrie_close(newTrie);
535 /* test linear Latin-1 range from utrie_getData() */
540 data=utrie_getData(newTrie, &dataLength);
542 for(i=0; i<countCheckRanges && start<=0xff; ++i) {
543 limit=checkRanges[i].limit;
544 value=checkRanges[i].value;
546 while(start<limit && start<=0xff) {
547 if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
548 log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
549 testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
556 utrie_close(newTrie);
558 errorCode=U_ZERO_ERROR;
559 if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) {
560 log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
563 if(getFoldingOffset!=NULL) {
564 trie.getFoldingOffset=getFoldingOffset;
567 if(dataIs32!=(trie.data32!=NULL)) {
568 log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
570 if(latin1Linear!=trie.isLatin1Linear) {
571 log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
574 /* verify that all these values are in the unserialized Trie */
576 for(i=0; i<countCheckRanges; ++i) {
577 limit=checkRanges[i].limit;
578 value=checkRanges[i].value;
581 /* skip surrogates */
589 value2=UTRIE_GET32_FROM_BMP(&trie, start);
591 value2=UTRIE_GET16_FROM_BMP(&trie, start);
594 log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
595 testName, start, value2, value);
597 if(!U16_IS_LEAD(start)) {
599 value2=UTRIE_GET32_FROM_LEAD(&trie, start);
601 value2=UTRIE_GET16_FROM_LEAD(&trie, start);
604 log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
605 testName, start, value2, value);
610 UTRIE_GET32(&trie, start, value2);
612 UTRIE_GET16(&trie, start, value2);
615 log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
616 testName, start, value2, value);
622 /* enumerate and verify all ranges */
623 enumRanges=checkRanges+1;
624 utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);
626 /* test linear Latin-1 range */
627 if(trie.isLatin1Linear) {
628 if(trie.data32!=NULL) {
629 const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);
631 for(start=0; start<0x100; ++start) {
632 if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
633 log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
634 testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
638 const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);
640 for(start=0; start<0x100; ++start) {
641 if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
642 log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
643 testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
649 testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
653 testTrieRanges2(const char *testName,
654 const SetRange setRanges[], int32_t countSetRanges,
655 const CheckRange checkRanges[], int32_t countCheckRanges,
659 testTrieRanges(testName,
660 setRanges, countSetRanges,
661 checkRanges, countCheckRanges,
663 testTrieRangesWithMalloc(testName,
664 setRanges, countSetRanges,
665 checkRanges, countCheckRanges,
668 uprv_strcpy(name, testName);
669 uprv_strcat(name, "-latin1Linear");
671 setRanges, countSetRanges,
672 checkRanges, countCheckRanges,
674 testTrieRangesWithMalloc(name,
675 setRanges, countSetRanges,
676 checkRanges, countCheckRanges,
681 testTrieRanges4(const char *testName,
682 const SetRange setRanges[], int32_t countSetRanges,
683 const CheckRange checkRanges[], int32_t countCheckRanges) {
686 uprv_strcpy(name, testName);
687 uprv_strcat(name, ".32");
688 testTrieRanges2(name,
689 setRanges, countSetRanges,
690 checkRanges, countCheckRanges,
693 uprv_strcpy(name, testName);
694 uprv_strcat(name, ".16");
695 testTrieRanges2(name,
696 setRanges, countSetRanges,
697 checkRanges, countCheckRanges,
701 /* test data ----------------------------------------------------------------*/
703 /* set consecutive ranges, even with value 0 */
704 static const SetRange
707 {0x20, 0xa7, 0x1234, FALSE},
708 {0xa7, 0x3400, 0, FALSE},
709 {0x3400, 0x9fa6, 0x6162, FALSE},
710 {0x9fa6, 0xda9e, 0x3132, FALSE},
711 {0xdada, 0xeeee, 0x87ff, FALSE}, /* try to disrupt _testFoldingOffset16() */
712 {0xeeee, 0x11111, 1, FALSE},
713 {0x11111, 0x44444, 0x6162, FALSE},
714 {0x44444, 0x60003, 0, FALSE},
715 {0xf0003, 0xf0004, 0xf, FALSE},
716 {0xf0004, 0xf0006, 0x10, FALSE},
717 {0xf0006, 0xf0007, 0x11, FALSE},
718 {0xf0007, 0xf0020, 0x12, FALSE},
719 {0xf0020, 0x110000, 0, FALSE}
722 static const CheckRange
724 {0, 0}, /* dummy start range to make _testEnumRange() simpler */
742 /* set some interesting overlapping ranges */
743 static const SetRange
745 {0x21, 0x7f, 0x5555, TRUE},
746 {0x2f800,0x2fedc, 0x7a, TRUE},
747 {0x72, 0xdd, 3, TRUE},
748 {0xdd, 0xde, 4, FALSE},
749 {0x201, 0x220, 6, TRUE}, /* 3 consecutive blocks with the same pattern but discontiguous value ranges */
750 {0x221, 0x240, 6, TRUE},
751 {0x241, 0x260, 6, TRUE},
752 {0x2f987,0x2fa98, 5, TRUE},
753 {0x2f777,0x2f833, 0, TRUE},
754 {0x2f900,0x2ffee, 1, FALSE},
755 {0x2ffee,0x2ffef, 2, TRUE}
758 static const CheckRange
760 {0, 0}, /* dummy start range to make _testEnumRange() simpler */
780 /* use a non-zero initial value */
781 static const SetRange
783 {0x31, 0xa4, 1, FALSE},
784 {0x3400, 0x6789, 2, FALSE},
785 {0x30000,0x34567,9, TRUE},
786 {0x45678,0x56789,3, TRUE}
789 static const CheckRange
791 {0, 9}, /* dummy start range, also carries the initial value */
803 testTrieRanges4("set1",
804 setRanges1, UPRV_LENGTHOF(setRanges1),
805 checkRanges1, UPRV_LENGTHOF(checkRanges1));
806 testTrieRanges4("set2-overlap",
807 setRanges2, UPRV_LENGTHOF(setRanges2),
808 checkRanges2, UPRV_LENGTHOF(checkRanges2));
809 testTrieRanges4("set3-initial-9",
810 setRanges3, UPRV_LENGTHOF(setRanges3),
811 checkRanges3, UPRV_LENGTHOF(checkRanges3));
814 /* test utrie_unserializeDummy() -------------------------------------------- */
816 static int32_t U_CALLCONV
817 dummyGetFoldingOffset(uint32_t data) {
818 return -1; /* never get non-initialValue data for supplementary code points */
822 dummyTest(UBool make16BitTrie) {
823 int32_t mem[UTRIE_DUMMY_SIZE/4];
826 UErrorCode errorCode;
829 uint32_t value, initialValue, leadUnitValue;
833 leadUnitValue=0xaffe;
835 initialValue=0x01234567;
836 leadUnitValue=0x89abcdef;
839 errorCode=U_ZERO_ERROR;
840 utrie_unserializeDummy(&trie, mem, sizeof(mem), initialValue, leadUnitValue, make16BitTrie, &errorCode);
841 if(U_FAILURE(errorCode)) {
842 log_err("utrie_unserializeDummy(make16BitTrie=%d) failed - %s\n", make16BitTrie, u_errorName(errorCode));
845 trie.getFoldingOffset=dummyGetFoldingOffset;
847 /* test that all code points have initialValue */
848 for(c=0; c<=0x10ffff; ++c) {
850 UTRIE_GET16(&trie, c, value);
852 UTRIE_GET32(&trie, c, value);
854 if(value!=initialValue) {
855 log_err("UTRIE_GET%s(dummy, U+%04lx)=0x%lx instead of 0x%lx\n",
856 make16BitTrie ? "16" : "32", (long)c, (long)value, (long)initialValue);
860 /* test that the lead surrogate code units have leadUnitValue */
861 for(c=0xd800; c<=0xdbff; ++c) {
863 value=UTRIE_GET16_FROM_LEAD(&trie, c);
865 value=UTRIE_GET32_FROM_LEAD(&trie, c);
867 if(value!=leadUnitValue) {
868 log_err("UTRIE_GET%s_FROM_LEAD(dummy, U+%04lx)=0x%lx instead of 0x%lx\n",
869 make16BitTrie ? "16" : "32", (long)c, (long)value, (long)leadUnitValue);
875 DummyTrieTest(void) {
881 addTrieTest(TestNode** root);
884 addTrieTest(TestNode** root) {
885 addTest(root, &TrieTest, "tsutil/trietest/TrieTest");
886 addTest(root, &DummyTrieTest, "tsutil/trietest/DummyTrieTest");