1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 * Copyright (c) 1999-2016, International Business Machines Corporation and
5 * others. All Rights Reserved.
7 * Generator for source/i18n/collunsafe.h
12 #include "unicode/uversion.h"
13 #include "unicode/uniset.h"
14 #include "collationroot.h"
15 #include "collationtailoring.h"
18 * Define the type of generator to use. Choose one.
20 #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
21 #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
22 #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
24 int main(int argc, const char *argv[]) {
25 UErrorCode errorCode = U_ZERO_ERROR;
27 // Get the unsafeBackwardsSet
28 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
29 if(U_FAILURE(errorCode)) {
30 fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
33 const UVersionInfo &version = rootEntry->tailoring->version;
34 const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
36 u_versionToString(version, verString);
37 fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
38 int32_t rangeCount = unsafeBackwardSet->getRangeCount();
41 fprintf(stderr, ".. serializing\n");
42 // UnicodeSet serialization
44 UErrorCode preflightCode = U_ZERO_ERROR;
46 int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
47 if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
48 fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
51 uint16_t *serializedData = new uint16_t[serializedCount];
53 unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
54 if(U_FAILURE(errorCode)) {
55 delete [] serializedData;
56 fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
62 fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
63 // attempt to use pattern
65 UnicodeString pattern;
66 UnicodeSet set(*unsafeBackwardSet);
68 set.toPattern(pattern, FALSE);
70 if(U_SUCCESS(errorCode)) {
71 // This fails (bug# ?) - which is why this method was abandoned.
73 // UnicodeSet usA(pattern, errorCode);
74 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
79 const UChar *buf = pattern.getBuffer();
80 int32_t needed = pattern.length();
85 int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
87 fprintf(stderr,"===\n%s\n===\n", buf2);
90 const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
91 if(U_SUCCESS(errorCode)) {
92 //UnicodeSet us(unsafeBackwardPattern, errorCode);
93 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
95 fprintf(stderr, "Uset OK - \n");
100 // Generate the output file.
102 printf("// collunsafe.h\n");
103 printf("// %s\n", U_COPYRIGHT_STRING);
105 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
106 printf("// Machine generated, do not edit.\n");
108 printf("#ifndef COLLUNSAFE_H\n"
109 "#define COLLUNSAFE_H\n"
111 "#include \"unicode/utypes.h\"\n"
113 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
114 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
119 printf("#define COLLUNSAFE_PATTERN 1\n");
120 printf("static const int32_t collunsafe_len = %d;\n", needed);
121 printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
122 for(int i=0;i<needed;i++) {
123 if( (i>0) && (i%8 == 0) ) {
124 printf(" // %d\n", i);
126 printf("0x%04X", buf[i]); // TODO check
127 if(i != (needed-1)) {
131 printf(" //%d\n};\n", (needed-1));
135 fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
136 printf("#define COLLUNSAFE_RANGE 1\n");
137 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
138 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
139 for(int32_t i=0;i<rangeCount;i++) {
140 printf(" 0x%04X, 0x%04X, // %d\n",
141 unsafeBackwardSet->getRangeStart(i),
142 unsafeBackwardSet->getRangeEnd(i),
149 printf("#define COLLUNSAFE_SERIALIZE 1\n");
150 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
151 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
152 for(int32_t i=0;i<serializedCount;i++) {
153 if( (i>0) && (i%8 == 0) ) {
154 printf(" // %d\n", i);
156 printf("0x%04X", serializedData[i]); // TODO check
157 if(i != (serializedCount-1)) {
167 return(U_SUCCESS(errorCode)?0:1);