2 *******************************************************************************
3 * Copyright (C) 2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
10 * Modification History:*
11 * Date Name Description
12 * 01/15/13 Emmons Original Port from ICU4J
13 ********************************************************************************
18 * \brief C++ API: Region classes (territory containment)
21 #include "unicode/region.h"
22 #include "unicode/utypes.h"
23 #include "unicode/uobject.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ures.h"
26 #include "unicode/decimfmt.h"
32 #include "region_impl.h"
34 #if !UCONFIG_NO_FORMATTING
39 static void U_CALLCONV
40 deleteRegion(void *obj) {
41 delete (icu::Region *)obj;
45 * Cleanup callback func
47 static UBool U_CALLCONV region_cleanup(void)
49 icu::Region::cleanupRegionData();
58 static UMutex gRegionDataLock = U_MUTEX_INITIALIZER;
59 static UBool regionDataIsLoaded = false;
60 static UVector* availableRegions[URGN_LIMIT];
62 static UHashtable *regionAliases;
63 static UHashtable *regionIDMap;
64 static UHashtable *numericCodeMap;
66 static const UChar UNKNOWN_REGION_ID [] = { 0x5A, 0x5A, 0 }; /* "ZZ" */
67 static const UChar OUTLYING_OCEANIA_REGION_ID [] = { 0x51, 0x4F, 0 }; /* "QO" */
68 static const UChar WORLD_ID [] = { 0x30, 0x30, 0x31, 0 }; /* "001" */
70 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegionNameEnumeration)
73 * Initializes the region data from the ICU resource bundles. The region data
74 * contains the basic relationships such as which regions are known, what the numeric
75 * codes are, any known aliases, and the territory containment data.
77 * If the region data has already loaded, then this method simply returns without doing
78 * anything meaningful.
80 void Region::loadRegionData() {
82 if (regionDataIsLoaded) {
86 umtx_lock(&gRegionDataLock);
88 if (regionDataIsLoaded) { // In case another thread gets to it before we do...
89 umtx_unlock(&gRegionDataLock);
94 UErrorCode status = U_ZERO_ERROR;
96 UResourceBundle* regionCodes = NULL;
97 UResourceBundle* territoryAlias = NULL;
98 UResourceBundle* codeMappings = NULL;
99 UResourceBundle* worldContainment = NULL;
100 UResourceBundle* territoryContainment = NULL;
101 UResourceBundle* groupingContainment = NULL;
103 DecimalFormat *df = new DecimalFormat(status);
104 if (U_FAILURE(status)) {
105 umtx_unlock(&gRegionDataLock);
108 df->setParseIntegerOnly(TRUE);
110 regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
111 uhash_setValueDeleter(regionIDMap, deleteRegion);
113 numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status);
115 regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
116 uhash_setKeyDeleter(regionAliases,uprv_deleteUObject);
118 UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status);
119 regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status);
120 territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status);
122 UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status);
123 codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status);
125 territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status);
126 worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status);
127 groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status);
129 UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
131 while ( ures_hasNext(worldContainment) ) {
132 UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status));
133 continents->addElement(continentName,status);
136 UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
137 while ( ures_hasNext(groupingContainment) ) {
138 UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status));
139 groupings->addElement(groupingName,status);
142 while ( ures_hasNext(regionCodes) ) {
143 UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status);
144 Region *r = new Region();
146 r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV);
147 r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
149 uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status);
151 UErrorCode ps = U_ZERO_ERROR;
152 df->parse(r->idStr,result,ps);
153 if ( U_SUCCESS(ps) ) {
154 r->code = result.getLong(); // Convert string to number
155 uhash_iput(numericCodeMap,r->code,(void *)r,&status);
156 r->type = URGN_SUBCONTINENT;
163 // Process the territory aliases
164 while ( ures_hasNext(territoryAlias) ) {
165 UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status);
166 const char *aliasFrom = ures_getKey(res);
167 UnicodeString* aliasFromStr = new UnicodeString(aliasFrom, -1, US_INV);
168 UnicodeString aliasTo = ures_getUnicodeString(res,&status);
171 Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo);
172 Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr);
174 if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region
175 uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status);
177 if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
178 aliasFromRegion = new Region();
179 aliasFromRegion->idStr.setTo(*aliasFromStr);
180 aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV);
181 uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status);
183 UErrorCode ps = U_ZERO_ERROR;
184 df->parse(aliasFromRegion->idStr,result,ps);
185 if ( U_SUCCESS(ps) ) {
186 aliasFromRegion->code = result.getLong(); // Convert string to number
187 uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status);
189 aliasFromRegion->code = -1;
191 aliasFromRegion->type = URGN_DEPRECATED;
193 aliasFromRegion->type = URGN_DEPRECATED;
197 aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
198 UnicodeString currentRegion;
199 currentRegion.remove();
200 for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) {
201 if ( aliasTo.charAt(i) != 0x0020 ) {
202 currentRegion.append(aliasTo.charAt(i));
204 if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) {
205 Region *target = (Region *)uhash_get(regionIDMap,(void *)¤tRegion);
207 UnicodeString *preferredValue = new UnicodeString(target->idStr);
208 aliasFromRegion->preferredValues->addElement((void *)preferredValue,status);
210 currentRegion.remove();
216 // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
217 while ( ures_hasNext(codeMappings) ) {
218 UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status);
219 if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) {
220 UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status);
221 UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status);
222 UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status);
224 Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID);
227 UErrorCode ps = U_ZERO_ERROR;
228 df->parse(codeMappingNumber,result,ps);
229 if ( U_SUCCESS(ps) ) {
230 r->code = result.getLong(); // Convert string to number
231 uhash_iput(numericCodeMap,r->code,(void *)r,&status);
233 UnicodeString *code3 = new UnicodeString(codeMapping3Letter);
234 uhash_put(regionAliases,(void *)code3, (void *)r,&status);
240 // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
242 UnicodeString WORLD_ID_STRING(WORLD_ID);
243 r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID_STRING);
245 r->type = URGN_WORLD;
248 UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID);
249 r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID_STRING);
251 r->type = URGN_UNKNOWN;
254 for ( int32_t i = 0 ; i < continents->size() ; i++ ) {
255 r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i));
257 r->type = URGN_CONTINENT;
262 for ( int32_t i = 0 ; i < groupings->size() ; i++ ) {
263 r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i));
265 r->type = URGN_GROUPING;
270 // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
271 // even though it looks like a territory code. Need to handle it here.
273 UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID);
274 r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID_STRING);
276 r->type = URGN_SUBCONTINENT;
279 // Load territory containment info from the supplemental data.
280 while ( ures_hasNext(territoryContainment) ) {
281 UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status);
282 const char *parent = ures_getKey(mapping);
283 UnicodeString parentStr = UnicodeString(parent, -1 , US_INV);
284 Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr);
286 for ( int j = 0 ; j < ures_getSize(mapping); j++ ) {
287 UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status);
288 Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child);
289 if ( parentRegion != NULL && childRegion != NULL ) {
291 // Add the child region to the set of regions contained by the parent
292 if (parentRegion->containedRegions == NULL) {
293 parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
296 UnicodeString *childStr = new UnicodeString();
297 childStr->fastCopyFrom(childRegion->idStr);
298 parentRegion->containedRegions->addElement((void *)childStr,status);
300 // Set the parent region to be the containing region of the child.
301 // Regions of type GROUPING can't be set as the parent, since another region
302 // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
303 if ( parentRegion->type != URGN_GROUPING) {
304 childRegion->containingRegion = parentRegion;
311 // Create the availableRegions lists
313 while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) {
314 Region *ar = (Region *)element->value.pointer;
315 if ( availableRegions[ar->type] == NULL ) {
316 availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
318 UnicodeString *arString = new UnicodeString(ar->idStr);
319 availableRegions[ar->type]->addElement((void *)arString,status);
322 ures_close(territoryContainment);
323 ures_close(worldContainment);
324 ures_close(groupingContainment);
326 ures_close(codeMappings);
328 ures_close(territoryAlias);
329 ures_close(regionCodes);
334 ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup);
336 regionDataIsLoaded = true;
337 umtx_unlock(&gRegionDataLock);
341 void Region::cleanupRegionData() {
343 for (int32_t i = 0 ; i < URGN_LIMIT ; i++ ) {
344 if ( availableRegions[i] ) {
345 delete availableRegions[i];
350 uhash_close(regionAliases);
353 if (numericCodeMap) {
354 uhash_close(numericCodeMap);
358 uhash_close(regionIDMap);
365 containingRegion(NULL),
366 containedRegions(NULL),
367 preferredValues(NULL) {
372 if (containedRegions) {
373 delete containedRegions;
375 if (preferredValues) {
376 delete preferredValues;
381 * Returns true if the two regions are equal.
384 Region::operator==(const Region &that) const {
385 return (idStr == that.idStr);
389 * Returns true if the two regions are NOT equal; that is, if operator ==() returns false.
392 Region::operator!=(const Region &that) const {
393 return (idStr != that.idStr);
397 * Returns a pointer to a Region using the given region code. The region code can be either 2-letter ISO code,
398 * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the LDML specification.
399 * The identifier will be canonicalized internally using the supplemental metadata as defined in the CLDR.
400 * If the region code is NULL or not recognized, the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR )
402 const Region* U_EXPORT2
403 Region::getInstance(const char *region_code, UErrorCode &status) {
405 if ( !region_code ) {
406 status = U_ILLEGAL_ARGUMENT_ERROR;
412 if (regionIDMap == NULL) {
413 status = U_ILLEGAL_ARGUMENT_ERROR;
417 UnicodeString regionCodeString = UnicodeString(region_code, -1, US_INV);
418 Region *r = (Region *)uhash_get(regionIDMap,(void *)®ionCodeString);
421 r = (Region *)uhash_get(regionAliases,(void *)®ionCodeString);
424 if ( !r ) { // Unknown region code
425 status = U_ILLEGAL_ARGUMENT_ERROR;
429 if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) {
430 StringEnumeration *pv = r->getPreferredValues();
432 const UnicodeString *ustr = pv->snext(status);
433 r = (Region *)uhash_get(regionIDMap,(void *)ustr);
442 * Returns a pointer to a Region using the given numeric region code. If the numeric region code is not recognized,
443 * the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ).
445 const Region* U_EXPORT2
446 Region::getInstance (int32_t code, UErrorCode &status) {
450 if (numericCodeMap == NULL) {
451 status = U_ILLEGAL_ARGUMENT_ERROR;
455 Region *r = (Region *)uhash_iget(numericCodeMap,code);
457 if ( !r ) { // Just in case there's an alias that's numeric, try to find it.
458 UErrorCode fs = U_ZERO_ERROR;
459 UnicodeString pat = UNICODE_STRING_SIMPLE("00#");
460 DecimalFormat *df = new DecimalFormat(pat,fs);
466 r = (Region *)uhash_get(regionAliases,&id);
470 status = U_ILLEGAL_ARGUMENT_ERROR;
474 if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) {
475 StringEnumeration *pv = r->getPreferredValues();
477 const UnicodeString *ustr = pv->snext(status);
478 r = (Region *)uhash_get(regionIDMap,(void *)ustr);
487 * Returns an enumeration over the IDs of all known regions that match the given type.
489 StringEnumeration* U_EXPORT2
490 Region::getAvailable(URegionType type) {
493 UErrorCode status = U_ZERO_ERROR;
494 return new RegionNameEnumeration(availableRegions[type],status);
500 * Returns a pointer to the region that contains this region. Returns NULL if this region is code "001" (World)
501 * or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) returns the
502 * region "039" (Southern Europe).
505 Region::getContainingRegion() const {
507 return containingRegion;
511 * Return a pointer to the region that geographically contains this region and matches the given type,
512 * moving multiple steps up the containment chain if necessary. Returns NULL if no containing region can be found
513 * that matches the given type. Note: The URegionTypes = "URGN_GROUPING", "URGN_DEPRECATED", or "URGN_UNKNOWN"
514 * are not appropriate for use in this API. NULL will be returned in this case. For example, calling this method
515 * with region "IT" (Italy) for type "URGN_CONTINENT" returns the region "150" ( Europe ).
518 Region::getContainingRegion(URegionType type) const {
520 if ( containingRegion == NULL ) {
524 if ( containingRegion->type == type ) {
525 return containingRegion;
527 return containingRegion->getContainingRegion(type);
532 * Return an enumeration over the IDs of all the regions that are immediate children of this region in the
533 * region hierarchy. These returned regions could be either macro regions, territories, or a mixture of the two,
534 * depending on the containment data as defined in CLDR. This API may return NULL if this region doesn't have
535 * any sub-regions. For example, calling this method with region "150" (Europe) returns an enumeration containing
536 * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) - "154" (Northern Europe)
537 * and "155" (Western Europe).
540 Region::getContainedRegions() const {
542 UErrorCode status = U_ZERO_ERROR;
543 return new RegionNameEnumeration(containedRegions,status);
547 * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region
548 * hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any
549 * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type
550 * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
553 Region::getContainedRegions( URegionType type ) const {
556 UErrorCode status = U_ZERO_ERROR;
557 UVector *result = new UVector(NULL, uhash_compareChars, status);
559 StringEnumeration *cr = getContainedRegions();
561 for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) {
562 const char *id = cr->next(NULL,status);
563 const Region *r = Region::getInstance(id,status);
564 if ( r->getType() == type ) {
565 result->addElement((void *)&r->idStr,status);
567 StringEnumeration *children = r->getContainedRegions(type);
568 for ( int32_t j = 0 ; j < children->count(status) ; j++ ) {
569 const char *id2 = children->next(NULL,status);
570 const Region *r2 = Region::getInstance(id2,status);
571 result->addElement((void *)&r2->idStr,status);
577 StringEnumeration* resultEnumeration = new RegionNameEnumeration(result,status);
579 return resultEnumeration;
583 * Returns true if this region contains the supplied other region anywhere in the region hierarchy.
586 Region::contains(const Region &other) const {
589 if (!containedRegions) {
592 if (containedRegions->contains((void *)&other.idStr)) {
595 for ( int32_t i = 0 ; i < containedRegions->size() ; i++ ) {
596 UnicodeString *crStr = (UnicodeString *)containedRegions->elementAt(i);
597 Region *cr = (Region *) uhash_get(regionIDMap,(void *)crStr);
598 if ( cr && cr->contains(other) ) {
608 * For deprecated regions, return an enumeration over the IDs of the regions that are the preferred replacement
609 * regions for this region. Returns NULL for a non-deprecated region. For example, calling this method with region
610 * "SU" (Soviet Union) would return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
613 Region::getPreferredValues() const {
615 UErrorCode status = U_ZERO_ERROR;
616 if ( type == URGN_DEPRECATED ) {
617 return new RegionNameEnumeration(preferredValues,status);
625 * Return this region's canonical region code.
628 Region::getRegionCode() const {
633 Region::getNumericCode() const {
638 * Returns the region type of this region.
641 Region::getType() const {
645 RegionNameEnumeration::RegionNameEnumeration(UVector *fNameList, UErrorCode& status) {
647 if (fNameList && U_SUCCESS(status)) {
648 fRegionNames = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, fNameList->size(),status);
649 for ( int32_t i = 0 ; i < fNameList->size() ; i++ ) {
650 UnicodeString* this_region_name = (UnicodeString *)fNameList->elementAt(i);
651 UnicodeString* new_region_name = new UnicodeString(*this_region_name);
652 fRegionNames->addElement((void *)new_region_name,status);
661 RegionNameEnumeration::snext(UErrorCode& status) {
662 if (U_FAILURE(status) || (fRegionNames==NULL)) {
665 const UnicodeString* nextStr = (const UnicodeString *)fRegionNames->elementAt(pos);
673 RegionNameEnumeration::reset(UErrorCode& /*status*/) {
678 RegionNameEnumeration::count(UErrorCode& /*status*/) const {
679 return (fRegionNames==NULL) ? 0 : fRegionNames->size();
682 RegionNameEnumeration::~RegionNameEnumeration() {
688 #endif /* #if !UCONFIG_NO_FORMATTING */