4eea504bc26b774494f499834307aaccdd23c121
[platform/upstream/icu.git] / source / common / uloc_keytype.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2014-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 #include "unicode/utypes.h"
10
11 #include "cstring.h"
12 #include "uassert.h"
13 #include "ucln_cmn.h"
14 #include "uhash.h"
15 #include "umutex.h"
16 #include "uresimp.h"
17 #include "uvector.h"
18 #include "udataswp.h" /* for InvChar functions */
19
20 static UHashtable* gLocExtKeyMap = NULL;
21 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
22 static icu::UVector* gKeyTypeStringPool = NULL;
23 static icu::UVector* gLocExtKeyDataEntries = NULL;
24 static icu::UVector* gLocExtTypeEntries = NULL;
25
26 // bit flags for special types
27 typedef enum {
28     SPECIALTYPE_NONE = 0,
29     SPECIALTYPE_CODEPOINTS = 1,
30     SPECIALTYPE_REORDER_CODE = 2,
31     SPECIALTYPE_RG_KEY_VALUE = 4
32 } SpecialType;
33
34 typedef struct LocExtKeyData {
35     const char*     legacyId;
36     const char*     bcpId;
37     UHashtable*     typeMap;
38     uint32_t        specialTypes;
39 } LocExtKeyData;
40
41 typedef struct LocExtType {
42     const char*     legacyId;
43     const char*     bcpId;
44 } LocExtType;
45
46 U_CDECL_BEGIN
47
48 static UBool U_CALLCONV
49 uloc_key_type_cleanup(void) {
50     if (gLocExtKeyMap != NULL) {
51         uhash_close(gLocExtKeyMap);
52         gLocExtKeyMap = NULL;
53     }
54
55     delete gLocExtKeyDataEntries;
56     gLocExtKeyDataEntries = NULL;
57
58     delete gLocExtTypeEntries;
59     gLocExtTypeEntries = NULL;
60
61     delete gKeyTypeStringPool;
62     gKeyTypeStringPool = NULL;
63
64     gLocExtKeyMapInitOnce.reset();
65     return TRUE;
66 }
67
68 static void U_CALLCONV
69 uloc_deleteKeyTypeStringPoolEntry(void* obj) {
70     uprv_free(obj);
71 }
72
73 static void U_CALLCONV
74 uloc_deleteKeyDataEntry(void* obj) {
75     LocExtKeyData* keyData = (LocExtKeyData*)obj;
76     if (keyData->typeMap != NULL) {
77         uhash_close(keyData->typeMap);
78     }
79     uprv_free(keyData);
80 }
81
82 static void U_CALLCONV
83 uloc_deleteTypeEntry(void* obj) {
84     uprv_free(obj);
85 }
86
87 U_CDECL_END
88
89
90 static void U_CALLCONV
91 initFromResourceBundle(UErrorCode& sts) {
92     U_NAMESPACE_USE
93     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
94
95     gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
96
97     LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
98     LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
99     LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
100
101     if (U_FAILURE(sts)) {
102         return;
103     }
104
105     UErrorCode tmpSts = U_ZERO_ERROR;
106     LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
107     tmpSts = U_ZERO_ERROR;
108     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
109
110     // initialize vectors storing dynamically allocated objects
111     gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
112     if (gKeyTypeStringPool == NULL) {
113         if (U_SUCCESS(sts)) {
114             sts = U_MEMORY_ALLOCATION_ERROR;
115         }
116     }
117     if (U_FAILURE(sts)) {
118         return;
119     }
120     gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
121     if (gLocExtKeyDataEntries == NULL) {
122         if (U_SUCCESS(sts)) {
123             sts = U_MEMORY_ALLOCATION_ERROR;
124         }
125     }
126     if (U_FAILURE(sts)) {
127         return;
128     }
129     gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
130     if (gLocExtTypeEntries == NULL) {
131         if (U_SUCCESS(sts)) {
132             sts = U_MEMORY_ALLOCATION_ERROR;
133         }
134     }
135     if (U_FAILURE(sts)) {
136         return;
137     }
138
139     // iterate through keyMap resource
140     LocalUResourceBundlePointer keyMapEntry;
141
142     while (ures_hasNext(keyMapRes.getAlias())) {
143         keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
144         if (U_FAILURE(sts)) {
145             break;
146         }
147         const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
148         int32_t bcpKeyIdLen = 0;
149         const UChar* uBcpKeyId = ures_getString(keyMapEntry.getAlias(), &bcpKeyIdLen, &sts);
150         if (U_FAILURE(sts)) {
151             break;
152         }
153
154         // empty value indicates that BCP key is same with the legacy key.
155         const char* bcpKeyId = legacyKeyId;
156         if (bcpKeyIdLen > 0) {
157             char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
158             if (bcpKeyIdBuf == NULL) {
159                 sts = U_MEMORY_ALLOCATION_ERROR;
160                 break;
161             }
162             u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
163             bcpKeyIdBuf[bcpKeyIdLen] = 0;
164             gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
165             if (U_FAILURE(sts)) {
166                 break;
167             }
168             bcpKeyId = bcpKeyIdBuf;
169         }
170
171         UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
172
173         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
174         if (U_FAILURE(sts)) {
175             break;
176         }
177         uint32_t specialTypes = SPECIALTYPE_NONE;
178
179         LocalUResourceBundlePointer typeAliasResByKey;
180         LocalUResourceBundlePointer bcpTypeAliasResByKey;
181
182         if (typeAliasRes.isValid()) {
183             tmpSts = U_ZERO_ERROR;
184             typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
185             if (U_FAILURE(tmpSts)) {
186                 typeAliasResByKey.orphan();
187             }
188         }
189         if (bcpTypeAliasRes.isValid()) {
190             tmpSts = U_ZERO_ERROR;
191             bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
192             if (U_FAILURE(tmpSts)) {
193                 bcpTypeAliasResByKey.orphan();
194             }
195         }
196
197         // look up type map for the key, and walk through the mapping data
198         tmpSts = U_ZERO_ERROR;
199         LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
200         if (U_FAILURE(tmpSts)) {
201             // type map for each key must exist
202             U_ASSERT(FALSE);
203         } else {
204             LocalUResourceBundlePointer typeMapEntry;
205
206             while (ures_hasNext(typeMapResByKey.getAlias())) {
207                 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
208                 if (U_FAILURE(sts)) {
209                     break;
210                 }
211                 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
212
213                 // special types
214                 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
215                     specialTypes |= SPECIALTYPE_CODEPOINTS;
216                     continue;
217                 }
218                 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
219                     specialTypes |= SPECIALTYPE_REORDER_CODE;
220                     continue;
221                 }
222                 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
223                     specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
224                     continue;
225                 }
226
227                 if (isTZ) {
228                     // a timezone key uses a colon instead of a slash in the resource.
229                     // e.g. America:Los_Angeles
230                     if (uprv_strchr(legacyTypeId, ':') != NULL) {
231                         int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
232                         char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
233                         if (legacyTypeIdBuf == NULL) {
234                             sts = U_MEMORY_ALLOCATION_ERROR;
235                             break;
236                         }
237                         const char* p = legacyTypeId;
238                         char* q = legacyTypeIdBuf;
239                         while (*p) {
240                             if (*p == ':') {
241                                 *q++ = '/';
242                             } else {
243                                 *q++ = *p;
244                             }
245                             p++;
246                         }
247                         *q = 0;
248
249                         gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
250                         if (U_FAILURE(sts)) {
251                             break;
252                         }
253                         legacyTypeId = legacyTypeIdBuf;
254                     }
255                 }
256
257                 int32_t bcpTypeIdLen = 0;
258                 const UChar* uBcpTypeId = ures_getString(typeMapEntry.getAlias(), &bcpTypeIdLen, &sts);
259                 if (U_FAILURE(sts)) {
260                     break;
261                 }
262
263                 // empty value indicates that BCP type is same with the legacy type.
264                 const char* bcpTypeId = legacyTypeId;
265                 if (bcpTypeIdLen > 0) {
266                     char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
267                     if (bcpTypeIdBuf == NULL) {
268                         sts = U_MEMORY_ALLOCATION_ERROR;
269                         break;
270                     }
271                     u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
272                     bcpTypeIdBuf[bcpTypeIdLen] = 0;
273                     gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
274                     if (U_FAILURE(sts)) {
275                         break;
276                     }
277                     bcpTypeId = bcpTypeIdBuf;
278                 }
279
280                 // Note: legacy type value should never be
281                 // equivalent to bcp type value of a different
282                 // type under the same key. So we use a single
283                 // map for lookup.
284                 LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
285                 if (t == NULL) {
286                     sts = U_MEMORY_ALLOCATION_ERROR;
287                     break;
288                 }
289                 t->bcpId = bcpTypeId;
290                 t->legacyId = legacyTypeId;
291                 gLocExtTypeEntries->addElement((void*)t, sts);
292                 if (U_FAILURE(sts)) {
293                     break;
294                 }
295
296                 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
297                 if (bcpTypeId != legacyTypeId) {
298                     // different type value
299                     uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
300                 }
301                 if (U_FAILURE(sts)) {
302                     break;
303                 }
304
305                 // also put aliases in the map
306                 if (typeAliasResByKey.isValid()) {
307                     LocalUResourceBundlePointer typeAliasDataEntry;
308
309                     ures_resetIterator(typeAliasResByKey.getAlias());
310                     while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
311                         int32_t toLen;
312                         typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
313                         const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
314                         if (U_FAILURE(sts)) {
315                             break;
316                         }
317                         // check if this is an alias of canoncal legacy type
318                         if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
319                             const char* from = ures_getKey(typeAliasDataEntry.getAlias());
320                             if (isTZ) {
321                                 // replace colon with slash if necessary
322                                 if (uprv_strchr(from, ':') != NULL) {
323                                     int32_t fromLen = uprv_strlen(from);
324                                     char* fromBuf = (char*)uprv_malloc(fromLen + 1);
325                                     if (fromBuf == NULL) {
326                                         sts = U_MEMORY_ALLOCATION_ERROR;
327                                         break;
328                                     }
329                                     const char* p = from;
330                                     char* q = fromBuf;
331                                     while (*p) {
332                                         if (*p == ':') {
333                                             *q++ = '/';
334                                         } else {
335                                             *q++ = *p;
336                                         }
337                                         p++;
338                                     }
339                                     *q = 0;
340
341                                     gKeyTypeStringPool->addElement(fromBuf, sts);
342                                     if (U_FAILURE(sts)) {
343                                         break;
344                                     }
345                                     from = fromBuf;
346                                 }
347                             }
348                             uhash_put(typeDataMap, (void*)from, t, &sts);
349                         }
350                     }
351                     if (U_FAILURE(sts)) {
352                         break;
353                     }
354                 }
355
356                 if (bcpTypeAliasResByKey.isValid()) {
357                     LocalUResourceBundlePointer bcpTypeAliasDataEntry;
358
359                     ures_resetIterator(bcpTypeAliasResByKey.getAlias());
360                     while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
361                         int32_t toLen;
362                         bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
363                         const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
364                         if (U_FAILURE(sts)) {
365                             break;
366                         }
367                         // check if this is an alias of bcp type
368                         if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
369                             const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
370                             uhash_put(typeDataMap, (void*)from, t, &sts);
371                         }
372                     }
373                     if (U_FAILURE(sts)) {
374                         break;
375                     }
376                 }
377             }
378         }
379         if (U_FAILURE(sts)) {
380             break;
381         }
382
383         LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
384         if (keyData == NULL) {
385             sts = U_MEMORY_ALLOCATION_ERROR;
386             break;
387         }
388         keyData->bcpId = bcpKeyId;
389         keyData->legacyId = legacyKeyId;
390         keyData->specialTypes = specialTypes;
391         keyData->typeMap = typeDataMap;
392
393         gLocExtKeyDataEntries->addElement((void*)keyData, sts);
394         if (U_FAILURE(sts)) {
395             break;
396         }
397
398         uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
399         if (legacyKeyId != bcpKeyId) {
400             // different key value
401             uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
402         }
403         if (U_FAILURE(sts)) {
404             break;
405         }
406     }
407 }
408
409 static UBool
410 init() {
411     UErrorCode sts = U_ZERO_ERROR;
412     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
413     if (U_FAILURE(sts)) {
414         return FALSE;
415     }
416     return TRUE;
417 }
418
419 static UBool
420 isSpecialTypeCodepoints(const char* val) {
421     int32_t subtagLen = 0;
422     const char* p = val;
423     while (*p) {
424         if (*p == '-') {
425             if (subtagLen < 4 || subtagLen > 6) {
426                 return FALSE;
427             }
428             subtagLen = 0;
429         } else if ((*p >= '0' && *p <= '9') ||
430                     (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
431                     (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
432             subtagLen++;
433         } else {
434             return FALSE;
435         }
436         p++;
437     }
438     return (subtagLen >= 4 && subtagLen <= 6);
439 }
440
441 static UBool
442 isSpecialTypeReorderCode(const char* val) {
443     int32_t subtagLen = 0;
444     const char* p = val;
445     while (*p) {
446         if (*p == '-') {
447             if (subtagLen < 3 || subtagLen > 8) {
448                 return FALSE;
449             }
450             subtagLen = 0;
451         } else if (uprv_isASCIILetter(*p)) {
452             subtagLen++;
453         } else {
454             return FALSE;
455         }
456         p++;
457     }
458     return (subtagLen >=3 && subtagLen <=8);
459 }
460
461 static UBool
462 isSpecialTypeRgKeyValue(const char* val) {
463     int32_t subtagLen = 0;
464     const char* p = val;
465     while (*p) {
466         if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
467                     (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
468             subtagLen++;
469         } else {
470             return FALSE;
471         }
472         p++;
473     }
474     return (subtagLen == 6);
475     return TRUE;
476 }
477
478 U_CFUNC const char*
479 ulocimp_toBcpKey(const char* key) {
480     if (!init()) {
481         return NULL;
482     }
483
484     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
485     if (keyData != NULL) {
486         return keyData->bcpId;
487     }
488     return NULL;
489 }
490
491 U_CFUNC const char*
492 ulocimp_toLegacyKey(const char* key) {
493     if (!init()) {
494         return NULL;
495     }
496
497     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
498     if (keyData != NULL) {
499         return keyData->legacyId;
500     }
501     return NULL;
502 }
503
504 U_CFUNC const char*
505 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
506     if (isKnownKey != NULL) {
507         *isKnownKey = FALSE;
508     }
509     if (isSpecialType != NULL) {
510         *isSpecialType = FALSE;
511     }
512
513     if (!init()) {
514         return NULL;
515     }
516
517     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
518     if (keyData != NULL) {
519         if (isKnownKey != NULL) {
520             *isKnownKey = TRUE;
521         }
522         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
523         if (t != NULL) {
524             return t->bcpId;
525         }
526         if (keyData->specialTypes != SPECIALTYPE_NONE) {
527             UBool matched = FALSE;
528             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
529                 matched = isSpecialTypeCodepoints(type);
530             }
531             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
532                 matched = isSpecialTypeReorderCode(type);
533             }
534             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
535                 matched = isSpecialTypeRgKeyValue(type);
536             }
537             if (matched) {
538                 if (isSpecialType != NULL) {
539                     *isSpecialType = TRUE;
540                 }
541                 return type;
542             }
543         }
544     }
545     return NULL;
546 }
547
548
549 U_CFUNC const char*
550 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
551     if (isKnownKey != NULL) {
552         *isKnownKey = FALSE;
553     }
554     if (isSpecialType != NULL) {
555         *isSpecialType = FALSE;
556     }
557
558     if (!init()) {
559         return NULL;
560     }
561
562     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
563     if (keyData != NULL) {
564         if (isKnownKey != NULL) {
565             *isKnownKey = TRUE;
566         }
567         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
568         if (t != NULL) {
569             return t->legacyId;
570         }
571         if (keyData->specialTypes != SPECIALTYPE_NONE) {
572             UBool matched = FALSE;
573             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
574                 matched = isSpecialTypeCodepoints(type);
575             }
576             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
577                 matched = isSpecialTypeReorderCode(type);
578             }
579             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
580                 matched = isSpecialTypeRgKeyValue(type);
581             }
582             if (matched) {
583                 if (isSpecialType != NULL) {
584                     *isSpecialType = TRUE;
585                 }
586                 return type;
587             }
588         }
589     }
590     return NULL;
591 }
592