1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_res.cpp
10 * tab size: 8 (not used)
14 * This file contains dependencies that the collation run-time doesn't normally
15 * need. This mainly contains resource bundle usage and collation meta information
17 * Modification history
19 * 1996-1999 various members of ICU team maintained C API for collation framework
20 * 02/16/2001 synwee Added internal method getPrevSpecialCE
21 * 03/01/2001 synwee Added maxexpansion functionality.
22 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
23 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
24 * 2012-2014 markus Rewritten in C++ again.
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coll.h"
32 #include "unicode/localpointer.h"
33 #include "unicode/locid.h"
34 #include "unicode/tblcoll.h"
35 #include "unicode/ucol.h"
36 #include "unicode/uloc.h"
37 #include "unicode/unistr.h"
38 #include "unicode/ures.h"
42 #include "collationdatareader.h"
43 #include "collationroot.h"
44 #include "collationtailoring.h"
53 #include "unifiedcache.h"
62 static const UChar *rootRules = NULL;
63 static int32_t rootRulesLength = 0;
64 static UResourceBundle *rootBundle = NULL;
65 static UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
71 static UBool U_CALLCONV
75 ures_close(rootBundle);
82 CollationLoader::loadRootRules(UErrorCode &errorCode) {
83 if(U_FAILURE(errorCode)) { return; }
84 rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode);
85 if(U_FAILURE(errorCode)) { return; }
86 rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode);
87 if(U_FAILURE(errorCode)) {
88 ures_close(rootBundle);
92 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
98 CollationLoader::appendRootRules(UnicodeString &s) {
99 UErrorCode errorCode = U_ZERO_ERROR;
100 umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode);
101 if(U_SUCCESS(errorCode)) {
102 s.append(rootRules, rootRulesLength);
107 CollationLoader::loadRules(const char *localeID, const char *collationType,
108 UnicodeString &rules, UErrorCode &errorCode) {
109 if(U_FAILURE(errorCode)) { return; }
110 U_ASSERT(collationType != NULL && *collationType != 0);
111 // Copy the type for lowercasing.
113 int32_t typeLength = uprv_strlen(collationType);
114 if(typeLength >= UPRV_LENGTHOF(type)) {
115 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
118 uprv_memcpy(type, collationType, typeLength + 1);
119 T_CString_toLowerCase(type);
121 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode));
122 LocalUResourceBundlePointer collations(
123 ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
124 LocalUResourceBundlePointer data(
125 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
127 const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
128 if(U_FAILURE(errorCode)) { return; }
130 // No string pointer aliasing so that we need not hold onto the resource bundle.
131 rules.setTo(s, length);
132 if(rules.isBogus()) {
133 errorCode = U_MEMORY_ALLOCATION_ERROR;
137 template<> U_I18N_API
138 const CollationCacheEntry *
139 LocaleCacheKey<CollationCacheEntry>::createObject(const void *creationContext,
140 UErrorCode &errorCode) const {
141 CollationLoader *loader =
142 reinterpret_cast<CollationLoader *>(
143 const_cast<void *>(creationContext));
144 return loader->createCacheEntry(errorCode);
147 const CollationCacheEntry *
148 CollationLoader::loadTailoring(const Locale &locale, UErrorCode &errorCode) {
149 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
150 if(U_FAILURE(errorCode)) { return NULL; }
151 const char *name = locale.getName();
152 if(*name == 0 || uprv_strcmp(name, "root") == 0) {
154 // Have to add a ref.
159 // Clear warning codes before loading where they get cached.
160 errorCode = U_ZERO_ERROR;
161 CollationLoader loader(rootEntry, locale, errorCode);
163 // getCacheEntry adds a ref for us.
164 return loader.getCacheEntry(errorCode);
167 CollationLoader::CollationLoader(const CollationCacheEntry *re, const Locale &requested,
168 UErrorCode &errorCode)
169 : cache(UnifiedCache::getInstance(errorCode)), rootEntry(re),
170 validLocale(re->validLocale), locale(requested),
171 typesTried(0), typeFallback(FALSE),
172 bundle(NULL), collations(NULL), data(NULL) {
175 if(U_FAILURE(errorCode)) { return; }
177 // Canonicalize the locale ID: Ignore all irrelevant keywords.
178 const char *baseName = locale.getBaseName();
179 if(uprv_strcmp(locale.getName(), baseName) != 0) {
180 locale = Locale(baseName);
182 // Fetch the collation type from the locale ID.
183 int32_t typeLength = requested.getKeywordValue("collation",
184 type, UPRV_LENGTHOF(type) - 1, errorCode);
185 if(U_FAILURE(errorCode)) {
186 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
189 type[typeLength] = 0; // in case of U_NOT_TERMINATED_WARNING
190 if(typeLength == 0) {
191 // No collation type.
192 } else if(uprv_stricmp(type, "default") == 0) {
193 // Ignore "default" (case-insensitive).
196 // Copy the collation type.
197 T_CString_toLowerCase(type);
198 locale.setKeywordValue("collation", type, errorCode);
203 CollationLoader::~CollationLoader() {
205 ures_close(collations);
209 const CollationCacheEntry *
210 CollationLoader::createCacheEntry(UErrorCode &errorCode) {
211 // This is a linear lookup and fallback flow turned into a state machine.
212 // Most local variables have been turned into instance fields.
213 // In a cache miss, cache.get() calls CacheKey::createObject(),
214 // which means that we progress via recursion.
215 // loadFromCollations() will recurse to itself as well for collation type fallback.
217 return loadFromLocale(errorCode);
218 } else if(collations == NULL) {
219 return loadFromBundle(errorCode);
220 } else if(data == NULL) {
221 return loadFromCollations(errorCode);
223 return loadFromData(errorCode);
227 const CollationCacheEntry *
228 CollationLoader::loadFromLocale(UErrorCode &errorCode) {
229 if(U_FAILURE(errorCode)) { return NULL; }
230 U_ASSERT(bundle == NULL);
231 bundle = ures_openNoDefault(U_ICUDATA_COLL, locale.getBaseName(), &errorCode);
232 if(errorCode == U_MISSING_RESOURCE_ERROR) {
233 errorCode = U_USING_DEFAULT_WARNING;
235 // Have to add that ref that we promise.
239 Locale requestedLocale(locale);
240 const char *vLocale = ures_getLocaleByType(bundle, ULOC_ACTUAL_LOCALE, &errorCode);
241 if(U_FAILURE(errorCode)) { return NULL; }
242 locale = validLocale = Locale(vLocale); // no type until loadFromCollations()
244 locale.setKeywordValue("collation", type, errorCode);
246 if(locale != requestedLocale) {
247 return getCacheEntry(errorCode);
249 return loadFromBundle(errorCode);
253 const CollationCacheEntry *
254 CollationLoader::loadFromBundle(UErrorCode &errorCode) {
255 if(U_FAILURE(errorCode)) { return NULL; }
256 U_ASSERT(collations == NULL);
257 // There are zero or more tailorings in the collations table.
258 collations = ures_getByKey(bundle, "collations", NULL, &errorCode);
259 if(errorCode == U_MISSING_RESOURCE_ERROR) {
260 errorCode = U_USING_DEFAULT_WARNING;
261 // Return the root tailoring with the validLocale, without collation type.
262 return makeCacheEntryFromRoot(validLocale, errorCode);
264 if(U_FAILURE(errorCode)) { return NULL; }
266 // Fetch the default type from the data.
268 UErrorCode internalErrorCode = U_ZERO_ERROR;
269 LocalUResourceBundlePointer def(
270 ures_getByKeyWithFallback(collations, "default", NULL, &internalErrorCode));
272 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
273 if(U_SUCCESS(internalErrorCode) && 0 < length && length < UPRV_LENGTHOF(defaultType)) {
274 u_UCharsToChars(s, defaultType, length + 1);
276 uprv_strcpy(defaultType, "standard");
280 // Record which collation types we have looked for already,
281 // so that we do not deadlock in the cache.
283 // If there is no explicit type, then we look in the cache
284 // for the entry with the default type.
285 // If the explicit type is the default type, then we do not look in the cache
286 // for the entry with an empty type.
287 // Otherwise, two concurrent requests with opposite fallbacks would deadlock each other.
288 // Also, it is easier to always enter the next method with a non-empty type.
290 uprv_strcpy(type, defaultType);
291 typesTried |= TRIED_DEFAULT;
292 if(uprv_strcmp(type, "search") == 0) {
293 typesTried |= TRIED_SEARCH;
295 if(uprv_strcmp(type, "standard") == 0) {
296 typesTried |= TRIED_STANDARD;
298 locale.setKeywordValue("collation", type, errorCode);
299 return getCacheEntry(errorCode);
301 if(uprv_strcmp(type, defaultType) == 0) {
302 typesTried |= TRIED_DEFAULT;
304 if(uprv_strcmp(type, "search") == 0) {
305 typesTried |= TRIED_SEARCH;
307 if(uprv_strcmp(type, "standard") == 0) {
308 typesTried |= TRIED_STANDARD;
310 return loadFromCollations(errorCode);
314 const CollationCacheEntry *
315 CollationLoader::loadFromCollations(UErrorCode &errorCode) {
316 if(U_FAILURE(errorCode)) { return NULL; }
317 U_ASSERT(data == NULL);
318 // Load the collations/type tailoring, with type fallback.
319 LocalUResourceBundlePointer localData(
320 ures_getByKeyWithFallback(collations, type, NULL, &errorCode));
321 int32_t typeLength = uprv_strlen(type);
322 if(errorCode == U_MISSING_RESOURCE_ERROR) {
323 errorCode = U_USING_DEFAULT_WARNING;
325 if((typesTried & TRIED_SEARCH) == 0 &&
326 typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) {
327 // fall back from something like "searchjl" to "search"
328 typesTried |= TRIED_SEARCH;
330 } else if((typesTried & TRIED_DEFAULT) == 0) {
331 // fall back to the default type
332 typesTried |= TRIED_DEFAULT;
333 uprv_strcpy(type, defaultType);
334 } else if((typesTried & TRIED_STANDARD) == 0) {
335 // fall back to the "standard" type
336 typesTried |= TRIED_STANDARD;
337 uprv_strcpy(type, "standard");
339 // Return the root tailoring with the validLocale, without collation type.
340 return makeCacheEntryFromRoot(validLocale, errorCode);
342 locale.setKeywordValue("collation", type, errorCode);
343 return getCacheEntry(errorCode);
345 if(U_FAILURE(errorCode)) { return NULL; }
347 data = localData.orphan();
348 const char *actualLocale = ures_getLocaleByType(data, ULOC_ACTUAL_LOCALE, &errorCode);
349 if(U_FAILURE(errorCode)) { return NULL; }
350 const char *vLocale = validLocale.getBaseName();
351 UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0;
353 // Set the collation types on the informational locales,
354 // except when they match the default types (for brevity and backwards compatibility).
355 // For the valid locale, suppress the default type.
356 if(uprv_strcmp(type, defaultType) != 0) {
357 validLocale.setKeywordValue("collation", type, errorCode);
358 if(U_FAILURE(errorCode)) { return NULL; }
361 // Is this the same as the root collator? If so, then use that instead.
362 if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) &&
363 uprv_strcmp(type, "standard") == 0) {
365 errorCode = U_USING_DEFAULT_WARNING;
367 return makeCacheEntryFromRoot(validLocale, errorCode);
370 locale = Locale(actualLocale);
371 if(actualAndValidLocalesAreDifferent) {
372 locale.setKeywordValue("collation", type, errorCode);
373 const CollationCacheEntry *entry = getCacheEntry(errorCode);
374 return makeCacheEntry(validLocale, entry, errorCode);
376 return loadFromData(errorCode);
380 const CollationCacheEntry *
381 CollationLoader::loadFromData(UErrorCode &errorCode) {
382 if(U_FAILURE(errorCode)) { return NULL; }
383 LocalPointer<CollationTailoring> t(new CollationTailoring(rootEntry->tailoring->settings));
384 if(t.isNull() || t->isBogus()) {
385 errorCode = U_MEMORY_ALLOCATION_ERROR;
390 LocalUResourceBundlePointer binary(ures_getByKey(data, "%%CollationBin", NULL, &errorCode));
391 // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
392 // but that created undesirable dependencies.
394 const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode);
395 CollationDataReader::read(rootEntry->tailoring, inBytes, length, *t, errorCode);
396 // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
397 // but that created undesirable dependencies.
398 if(U_FAILURE(errorCode)) { return NULL; }
400 // Try to fetch the optional rules string.
402 UErrorCode internalErrorCode = U_ZERO_ERROR;
404 const UChar *s = ures_getStringByKey(data, "Sequence", &length,
406 if(U_SUCCESS(internalErrorCode)) {
407 t->rules.setTo(TRUE, s, length);
411 const char *actualLocale = locale.getBaseName(); // without type
412 const char *vLocale = validLocale.getBaseName();
413 UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0;
415 // For the actual locale, suppress the default type *according to the actual locale*.
416 // For example, zh has default=pinyin and contains all of the Chinese tailorings.
417 // zh_Hant has default=stroke but has no other data.
418 // For the valid locale "zh_Hant" we need to suppress stroke.
419 // For the actual locale "zh" we need to suppress pinyin instead.
420 if(actualAndValidLocalesAreDifferent) {
421 // Opening a bundle for the actual locale should always succeed.
422 LocalUResourceBundlePointer actualBundle(
423 ures_open(U_ICUDATA_COLL, actualLocale, &errorCode));
424 if(U_FAILURE(errorCode)) { return NULL; }
425 UErrorCode internalErrorCode = U_ZERO_ERROR;
426 LocalUResourceBundlePointer def(
427 ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL,
428 &internalErrorCode));
430 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
431 if(U_SUCCESS(internalErrorCode) && length < UPRV_LENGTHOF(defaultType)) {
432 u_UCharsToChars(s, defaultType, length + 1);
434 uprv_strcpy(defaultType, "standard");
437 t->actualLocale = locale;
438 if(uprv_strcmp(type, defaultType) != 0) {
439 t->actualLocale.setKeywordValue("collation", type, errorCode);
440 } else if(uprv_strcmp(locale.getName(), locale.getBaseName()) != 0) {
441 // Remove the collation keyword if it was set.
442 t->actualLocale.setKeywordValue("collation", NULL, errorCode);
444 if(U_FAILURE(errorCode)) { return NULL; }
447 errorCode = U_USING_DEFAULT_WARNING;
451 const CollationCacheEntry *entry = new CollationCacheEntry(validLocale, t.getAlias());
453 errorCode = U_MEMORY_ALLOCATION_ERROR;
457 // Have to add that reference that we promise.
462 const CollationCacheEntry *
463 CollationLoader::getCacheEntry(UErrorCode &errorCode) {
464 LocaleCacheKey<CollationCacheEntry> key(locale);
465 const CollationCacheEntry *entry = NULL;
466 cache->get(key, this, entry, errorCode);
470 const CollationCacheEntry *
471 CollationLoader::makeCacheEntryFromRoot(
472 const Locale &/*loc*/,
473 UErrorCode &errorCode) const {
474 if (U_FAILURE(errorCode)) {
478 return makeCacheEntry(validLocale, rootEntry, errorCode);
481 const CollationCacheEntry *
482 CollationLoader::makeCacheEntry(
484 const CollationCacheEntry *entryFromCache,
485 UErrorCode &errorCode) {
486 if(U_FAILURE(errorCode) || loc == entryFromCache->validLocale) {
487 return entryFromCache;
489 CollationCacheEntry *entry = new CollationCacheEntry(loc, entryFromCache->tailoring);
491 errorCode = U_MEMORY_ALLOCATION_ERROR;
492 entryFromCache->removeRef();
496 entryFromCache->removeRef();
505 ucol_open(const char *loc,
508 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
509 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
510 UCollator *result = NULL;
512 Collator *coll = Collator::createInstance(loc, *status);
513 if(U_SUCCESS(*status)) {
514 result = coll->toUCollator();
516 UTRACE_EXIT_PTR_STATUS(result, *status);
521 U_CAPI int32_t U_EXPORT2
522 ucol_getDisplayName( const char *objLoc,
525 int32_t resultLength,
528 if(U_FAILURE(*status)) return -1;
530 if(!(result==NULL && resultLength==0)) {
531 // NULL destination for pure preflighting: empty dummy string
532 // otherwise, alias the destination buffer
533 dst.setTo(result, 0, resultLength);
535 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
536 return dst.extract(result, resultLength, *status);
539 U_CAPI const char* U_EXPORT2
540 ucol_getAvailable(int32_t index)
543 const Locale *loc = Collator::getAvailableLocales(count);
544 if (loc != NULL && index < count) {
545 return loc[index].getName();
550 U_CAPI int32_t U_EXPORT2
551 ucol_countAvailable()
554 Collator::getAvailableLocales(count);
558 #if !UCONFIG_NO_SERVICE
559 U_CAPI UEnumeration* U_EXPORT2
560 ucol_openAvailableLocales(UErrorCode *status) {
561 // This is a wrapper over Collator::getAvailableLocales()
562 if (U_FAILURE(*status)) {
565 StringEnumeration *s = icu::Collator::getAvailableLocales();
567 *status = U_MEMORY_ALLOCATION_ERROR;
570 return uenum_openFromStringEnumeration(s, status);
574 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
576 static const char RESOURCE_NAME[] = "collations";
578 static const char* const KEYWORDS[] = { "collation" };
580 #define KEYWORD_COUNT UPRV_LENGTHOF(KEYWORDS)
582 U_CAPI UEnumeration* U_EXPORT2
583 ucol_getKeywords(UErrorCode *status) {
584 UEnumeration *result = NULL;
585 if (U_SUCCESS(*status)) {
586 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
591 U_CAPI UEnumeration* U_EXPORT2
592 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
593 if (U_FAILURE(*status)) {
596 // hard-coded to accept exactly one collation keyword
597 // modify if additional collation keyword is added later
598 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
600 *status = U_ILLEGAL_ARGUMENT_ERROR;
603 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
606 static const UEnumeration defaultKeywordValues = {
609 ulist_close_keyword_values_iterator,
610 ulist_count_keyword_values,
612 ulist_next_keyword_value,
613 ulist_reset_keyword_values_iterator
618 struct KeywordsSink : public ResourceSink {
620 KeywordsSink(UErrorCode &errorCode) :
621 values(ulist_createEmptyList(&errorCode)), hasDefault(FALSE) {}
622 virtual ~KeywordsSink();
624 virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
625 UErrorCode &errorCode) {
626 if (U_FAILURE(errorCode)) { return; }
627 ResourceTable collations = value.getTable(errorCode);
628 for (int32_t i = 0; collations.getKeyAndValue(i, key, value); ++i) {
629 UResType type = value.getType();
630 if (type == URES_STRING) {
631 if (!hasDefault && uprv_strcmp(key, "default") == 0) {
633 defcoll.appendInvariantChars(value.getUnicodeString(errorCode), errorCode);
634 if (U_SUCCESS(errorCode) && !defcoll.isEmpty()) {
635 char *ownedDefault = uprv_strdup(defcoll.data());
636 if (ownedDefault == NULL) {
637 errorCode = U_MEMORY_ALLOCATION_ERROR;
640 ulist_removeString(values, defcoll.data());
641 ulist_addItemBeginList(values, ownedDefault, TRUE, &errorCode);
645 } else if (type == URES_TABLE && uprv_strncmp(key, "private-", 8) != 0) {
646 if (!ulist_containsString(values, key, (int32_t)uprv_strlen(key))) {
647 ulist_addItemEndList(values, key, FALSE, &errorCode);
650 if (U_FAILURE(errorCode)) { return; }
658 KeywordsSink::~KeywordsSink() {
659 ulist_deleteList(values);
664 U_CAPI UEnumeration* U_EXPORT2
665 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
666 UBool /*commonlyUsed*/, UErrorCode* status) {
667 // Note: The parameter commonlyUsed is not used.
668 // The switch is in the method signature for consistency
669 // with other locale services.
671 // Read available collation values from collation bundles.
672 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, locale, status));
673 KeywordsSink sink(*status);
674 ures_getAllItemsWithFallback(bundle.getAlias(), RESOURCE_NAME, sink, *status);
675 if (U_FAILURE(*status)) { return NULL; }
677 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
679 *status = U_MEMORY_ALLOCATION_ERROR;
682 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
683 en->context = sink.values;
684 sink.values = NULL; // Avoid deletion in the sink destructor.
688 U_CAPI int32_t U_EXPORT2
689 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
690 const char* keyword, const char* locale,
691 UBool* isAvailable, UErrorCode* status)
693 // N.B.: Resource name is "collations" but keyword is "collation"
694 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
695 "collations", keyword, locale,
696 isAvailable, TRUE, status);
699 #endif /* #if !UCONFIG_NO_COLLATION */