1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
11 * Created by: Richard Gillam
13 * Modification History:
15 * Date Name Description
16 * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17 * methods to get and set it.
18 * 04/02/97 aliu Made operator!= inline; fixed return value
20 * 04/15/97 aliu Cleanup for AIX/Win32.
21 * 04/24/97 aliu Numerous changes per code review.
22 * 08/18/98 stephen Changed getDisplayName()
23 * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24 * Added getISOCountries(), getISOLanguages(),
25 * getLanguagesForCountry()
26 * 03/16/99 bertrand rehaul.
27 * 07/21/99 stephen Added U_CFUNC setDefault
28 * 11/09/99 weiv Added const char * getName() const;
29 * 04/12/00 srl removing unicodestring api's and cached hash code
30 * 08/10/01 grhoten Change the static Locales to accessor functions
31 ******************************************************************************
35 #include "unicode/locid.h"
36 #include "unicode/uloc.h"
50 static UBool U_CALLCONV locale_cleanup(void);
55 static Locale *gLocaleCache = NULL;
56 static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
58 // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
59 static UMutex gDefaultLocaleMutex = U_MUTEX_INITIALIZER;
60 static UHashtable *gDefaultLocalesHashT = NULL;
61 static Locale *gDefaultLocale = NULL;
64 * \def ULOC_STRING_LIMIT
65 * strings beyond this value crash in CharString
67 #define ULOC_STRING_LIMIT 357913941
71 typedef enum ELocalePos {
85 eCHINA, /* Alias for PRC */
98 U_CFUNC int32_t locale_getKeywords(const char *localeID,
100 char *keywords, int32_t keywordCapacity,
101 char *values, int32_t valuesCapacity, int32_t *valLen,
107 // Deleter function for Locales owned by the default Locale hash table/
109 static void U_CALLCONV
110 deleteLocale(void *obj) {
111 delete (icu::Locale *) obj;
114 static UBool U_CALLCONV locale_cleanup(void)
118 delete [] gLocaleCache;
120 gLocaleCacheInitOnce.reset();
122 if (gDefaultLocalesHashT) {
123 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
124 gDefaultLocalesHashT = NULL;
126 gDefaultLocale = NULL;
131 static void U_CALLCONV locale_init(UErrorCode &status) {
134 U_ASSERT(gLocaleCache == NULL);
135 gLocaleCache = new Locale[(int)eMAX_LOCALES];
136 if (gLocaleCache == NULL) {
137 status = U_MEMORY_ALLOCATION_ERROR;
140 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
141 gLocaleCache[eROOT] = Locale("");
142 gLocaleCache[eENGLISH] = Locale("en");
143 gLocaleCache[eFRENCH] = Locale("fr");
144 gLocaleCache[eGERMAN] = Locale("de");
145 gLocaleCache[eITALIAN] = Locale("it");
146 gLocaleCache[eJAPANESE] = Locale("ja");
147 gLocaleCache[eKOREAN] = Locale("ko");
148 gLocaleCache[eCHINESE] = Locale("zh");
149 gLocaleCache[eFRANCE] = Locale("fr", "FR");
150 gLocaleCache[eGERMANY] = Locale("de", "DE");
151 gLocaleCache[eITALY] = Locale("it", "IT");
152 gLocaleCache[eJAPAN] = Locale("ja", "JP");
153 gLocaleCache[eKOREA] = Locale("ko", "KR");
154 gLocaleCache[eCHINA] = Locale("zh", "CN");
155 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
156 gLocaleCache[eUK] = Locale("en", "GB");
157 gLocaleCache[eUS] = Locale("en", "US");
158 gLocaleCache[eCANADA] = Locale("en", "CA");
159 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
166 Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
167 // Synchronize this entire function.
168 Mutex lock(&gDefaultLocaleMutex);
170 UBool canonicalize = FALSE;
172 // If given a NULL string for the locale id, grab the default
173 // name from the system.
174 // (Different from most other locale APIs, where a null name means use
175 // the current ICU default locale.)
177 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
178 canonicalize = TRUE; // always canonicalize host ID
181 char localeNameBuf[512];
184 uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
186 uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
188 localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
189 // a long name filling the buffer.
190 // (long names are truncated.)
192 if (U_FAILURE(status)) {
193 return gDefaultLocale;
196 if (gDefaultLocalesHashT == NULL) {
197 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
198 if (U_FAILURE(status)) {
199 return gDefaultLocale;
201 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
202 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
205 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
206 if (newDefault == NULL) {
207 newDefault = new Locale(Locale::eBOGUS);
208 if (newDefault == NULL) {
209 status = U_MEMORY_ALLOCATION_ERROR;
210 return gDefaultLocale;
212 newDefault->init(localeNameBuf, FALSE);
213 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
214 if (U_FAILURE(status)) {
215 return gDefaultLocale;
218 gDefaultLocale = newDefault;
219 return gDefaultLocale;
226 locale_set_default(const char *id)
229 UErrorCode status = U_ZERO_ERROR;
230 locale_set_default_internal(id, status);
235 locale_get_default(void)
238 return Locale::getDefault().getName();
244 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
246 /*Character separating the posix id fields*/
248 // In the platform codepage.
253 if (baseName != fullName) {
257 /*if fullName is on the heap, we free it*/
258 if (fullName != fullNameBuffer)
266 : UObject(), fullName(fullNameBuffer), baseName(NULL)
272 * Internal constructor to allow construction of a locale object with
273 * NO side effects. (Default constructor tries to get
274 * the default locale.)
276 Locale::Locale(Locale::ELocaleType)
277 : UObject(), fullName(fullNameBuffer), baseName(NULL)
283 Locale::Locale( const char * newLanguage,
284 const char * newCountry,
285 const char * newVariant,
286 const char * newKeywords)
287 : UObject(), fullName(fullNameBuffer), baseName(NULL)
289 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
291 init(NULL, FALSE); /* shortcut */
295 UErrorCode status = U_ZERO_ERROR;
302 // Calculate the size of the resulting string.
305 if ( newLanguage != NULL )
307 lsize = (int32_t)uprv_strlen(newLanguage);
308 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
315 CharString togo(newLanguage, lsize, status); // start with newLanguage
318 if ( newCountry != NULL )
320 csize = (int32_t)uprv_strlen(newCountry);
321 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
329 if ( newVariant != NULL )
331 // remove leading _'s
332 while(newVariant[0] == SEP_CHAR)
337 // remove trailing _'s
338 vsize = (int32_t)uprv_strlen(newVariant);
339 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
343 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
357 size += 2; // at least: __v
359 else if ( csize > 0 )
361 size += 1; // at least: _v
364 if ( newKeywords != NULL)
366 ksize = (int32_t)uprv_strlen(newKeywords);
367 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
374 // NOW we have the full locale string..
375 // Now, copy it back.
377 // newLanguage is already copied
379 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
381 togo.append(SEP_CHAR, status);
386 togo.append(newCountry, status);
391 togo.append(SEP_CHAR, status)
392 .append(newVariant, vsize, status);
397 if (uprv_strchr(newKeywords, '=')) {
398 togo.append('@', status); /* keyword parsing */
401 togo.append('_', status); /* Variant parsing with a script */
403 togo.append('_', status); /* No country found */
406 togo.append(newKeywords, status);
409 if (U_FAILURE(status)) {
410 // Something went wrong with appending, etc.
414 // Parse it, because for example 'language' might really be a complete
416 init(togo.data(), FALSE);
420 Locale::Locale(const Locale &other)
421 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
426 Locale &Locale::operator=(const Locale &other)
428 if (this == &other) {
432 /* Free our current storage */
433 if (baseName != fullName) {
437 if(fullName != fullNameBuffer) {
439 fullName = fullNameBuffer;
442 /* Allocate the full name if necessary */
443 if(other.fullName != other.fullNameBuffer) {
444 fullName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(other.fullName)+1));
445 if (fullName == NULL) {
449 /* Copy the full name */
450 uprv_strcpy(fullName, other.fullName);
452 /* Copy the baseName if it differs from fullName. */
453 if (other.baseName == other.fullName) {
456 if (other.baseName) {
457 baseName = uprv_strdup(other.baseName);
461 /* Copy the language and country fields */
462 uprv_strcpy(language, other.language);
463 uprv_strcpy(script, other.script);
464 uprv_strcpy(country, other.country);
466 /* The variantBegin is an offset, just copy it */
467 variantBegin = other.variantBegin;
468 fIsBogus = other.fIsBogus;
473 Locale::clone() const {
474 return new Locale(*this);
478 Locale::operator==( const Locale& other) const
480 return (uprv_strcmp(other.fullName, fullName) == 0);
483 #define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
485 /*This function initializes a Locale from a C locale ID*/
486 Locale& Locale::init(const char* localeID, UBool canonicalize)
489 /* Free our current storage */
490 if (baseName != fullName) {
494 if(fullName != fullNameBuffer) {
496 fullName = fullNameBuffer;
500 // just an easy way to have a common error-exit
501 // without goto and without another function
504 char *field[5] = {0};
505 int32_t fieldLen[5] = {0};
507 int32_t variantField;
511 if(localeID == NULL) {
512 // not an error, just set the default locale
513 return *this = getDefault();
516 /* preset all fields to empty */
517 language[0] = script[0] = country[0] = 0;
519 // "canonicalize" the locale ID to ICU/Java format
521 length = canonicalize ?
522 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
523 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
525 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
526 /*Go to heap for the fullName if necessary*/
527 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
529 fullName = fullNameBuffer;
530 break; // error: out of memory
533 length = canonicalize ?
534 uloc_canonicalize(localeID, fullName, length+1, &err) :
535 uloc_getName(localeID, fullName, length+1, &err);
537 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
538 /* should never occur */
542 variantBegin = length;
544 /* after uloc_getName/canonicalize() we know that only '_' are separators */
545 separator = field[0] = fullName;
547 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) && fieldIdx < UPRV_LENGTHOF(field)-1) {
548 field[fieldIdx] = separator + 1;
549 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
552 // variant may contain @foo or .foo POSIX cruft; remove it
553 separator = uprv_strchr(field[fieldIdx-1], '@');
554 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
555 if (separator!=NULL || sep2!=NULL) {
556 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
559 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
561 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
564 if (fieldLen[0] >= (int32_t)(sizeof(language)))
566 break; // error: the language field is too long
569 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
570 if (fieldLen[0] > 0) {
571 /* We have a language */
572 uprv_memcpy(language, fullName, fieldLen[0]);
573 language[fieldLen[0]] = 0;
575 if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
576 ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
577 ISASCIIALPHA(field[1][3])) {
578 /* We have at least a script */
579 uprv_memcpy(script, field[1], fieldLen[1]);
580 script[fieldLen[1]] = 0;
584 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
585 /* We have a country */
586 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
587 country[fieldLen[variantField]] = 0;
589 } else if (fieldLen[variantField] == 0) {
590 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
593 if (fieldLen[variantField] > 0) {
594 /* We have a variant */
595 variantBegin = (int32_t)(field[variantField] - fullName);
600 if (U_FAILURE(err)) {
604 // successful end of init()
606 } while(0); /*loop doesn't iterate*/
608 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
615 * Set up the base name.
616 * If there are no key words, it's exactly the full name.
617 * If key words exist, it's the full name truncated at the '@' character.
618 * Need to set up both at init() and after setting a keyword.
621 Locale::initBaseName(UErrorCode &status) {
622 if (U_FAILURE(status)) {
625 U_ASSERT(baseName==NULL || baseName==fullName);
626 const char *atPtr = uprv_strchr(fullName, '@');
627 const char *eqPtr = uprv_strchr(fullName, '=');
628 if (atPtr && eqPtr && atPtr < eqPtr) {
630 int32_t baseNameLength = (int32_t)(atPtr - fullName);
631 baseName = (char *)uprv_malloc(baseNameLength + 1);
632 if (baseName == NULL) {
633 status = U_MEMORY_ALLOCATION_ERROR;
636 uprv_strncpy(baseName, fullName, baseNameLength);
637 baseName[baseNameLength] = 0;
639 // The original computation of variantBegin leaves it equal to the length
640 // of fullName if there is no variant. It should instead be
641 // the length of the baseName.
642 if (variantBegin > baseNameLength) {
643 variantBegin = baseNameLength;
652 Locale::hashCode() const
654 return ustr_hashCharsN(fullName, uprv_strlen(fullName));
658 Locale::setToBogus() {
659 /* Free our current storage */
660 if(baseName != fullName) {
664 if(fullName != fullNameBuffer) {
666 fullName = fullNameBuffer;
676 const Locale& U_EXPORT2
680 Mutex lock(&gDefaultLocaleMutex);
681 if (gDefaultLocale != NULL) {
682 return *gDefaultLocale;
685 UErrorCode status = U_ZERO_ERROR;
686 return *locale_set_default_internal(NULL, status);
692 Locale::setDefault( const Locale& newLocale,
695 if (U_FAILURE(status)) {
699 /* Set the default from the full name string of the supplied locale.
700 * This is a convenient way to access the default locale caching mechanisms.
702 const char *localeID = newLocale.getName();
703 locale_set_default_internal(localeID, status);
707 Locale::createFromName (const char *name)
720 Locale::createCanonical(const char* name) {
722 loc.init(name, TRUE);
727 Locale::getISO3Language() const
729 return uloc_getISO3Language(fullName);
734 Locale::getISO3Country() const
736 return uloc_getISO3Country(fullName);
740 * Return the LCID value as specified in the "LocaleID" resource for this
741 * locale. The LocaleID must be expressed as a hexadecimal number, from
742 * one to four digits. If the LocaleID resource is not present, or is
743 * in an incorrect format, 0 is returned. The LocaleID is for use in
744 * Windows (it is an LCID), but is available on all platforms.
747 Locale::getLCID() const
749 return uloc_getLCID(fullName);
752 const char* const* U_EXPORT2 Locale::getISOCountries()
754 return uloc_getISOCountries();
757 const char* const* U_EXPORT2 Locale::getISOLanguages()
759 return uloc_getISOLanguages();
762 // Set the locale's data based on a posix id.
763 void Locale::setFromPOSIXID(const char *posixID)
768 const Locale & U_EXPORT2
769 Locale::getRoot(void)
771 return getLocale(eROOT);
774 const Locale & U_EXPORT2
775 Locale::getEnglish(void)
777 return getLocale(eENGLISH);
780 const Locale & U_EXPORT2
781 Locale::getFrench(void)
783 return getLocale(eFRENCH);
786 const Locale & U_EXPORT2
787 Locale::getGerman(void)
789 return getLocale(eGERMAN);
792 const Locale & U_EXPORT2
793 Locale::getItalian(void)
795 return getLocale(eITALIAN);
798 const Locale & U_EXPORT2
799 Locale::getJapanese(void)
801 return getLocale(eJAPANESE);
804 const Locale & U_EXPORT2
805 Locale::getKorean(void)
807 return getLocale(eKOREAN);
810 const Locale & U_EXPORT2
811 Locale::getChinese(void)
813 return getLocale(eCHINESE);
816 const Locale & U_EXPORT2
817 Locale::getSimplifiedChinese(void)
819 return getLocale(eCHINA);
822 const Locale & U_EXPORT2
823 Locale::getTraditionalChinese(void)
825 return getLocale(eTAIWAN);
829 const Locale & U_EXPORT2
830 Locale::getFrance(void)
832 return getLocale(eFRANCE);
835 const Locale & U_EXPORT2
836 Locale::getGermany(void)
838 return getLocale(eGERMANY);
841 const Locale & U_EXPORT2
842 Locale::getItaly(void)
844 return getLocale(eITALY);
847 const Locale & U_EXPORT2
848 Locale::getJapan(void)
850 return getLocale(eJAPAN);
853 const Locale & U_EXPORT2
854 Locale::getKorea(void)
856 return getLocale(eKOREA);
859 const Locale & U_EXPORT2
860 Locale::getChina(void)
862 return getLocale(eCHINA);
865 const Locale & U_EXPORT2
868 return getLocale(eCHINA);
871 const Locale & U_EXPORT2
872 Locale::getTaiwan(void)
874 return getLocale(eTAIWAN);
877 const Locale & U_EXPORT2
880 return getLocale(eUK);
883 const Locale & U_EXPORT2
886 return getLocale(eUS);
889 const Locale & U_EXPORT2
890 Locale::getCanada(void)
892 return getLocale(eCANADA);
895 const Locale & U_EXPORT2
896 Locale::getCanadaFrench(void)
898 return getLocale(eCANADA_FRENCH);
902 Locale::getLocale(int locid)
904 Locale *localeCache = getLocaleCache();
905 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
906 if (localeCache == NULL) {
907 // Failure allocating the locale cache.
908 // The best we can do is return a NULL reference.
911 return localeCache[locid]; /*operating on NULL*/
915 This function is defined this way in order to get around static
916 initialization and static destruction.
919 Locale::getLocaleCache(void)
921 UErrorCode status = U_ZERO_ERROR;
922 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
926 class KeywordEnumeration : public StringEnumeration {
931 UnicodeString currUSKey;
932 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
935 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
936 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
938 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
939 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
940 if(U_SUCCESS(status) && keywordLen != 0) {
941 if(keys == NULL || keywordLen < 0) {
942 status = U_ILLEGAL_ARGUMENT_ERROR;
944 keywords = (char *)uprv_malloc(keywordLen+1);
945 if (keywords == NULL) {
946 status = U_MEMORY_ALLOCATION_ERROR;
949 uprv_memcpy(keywords, keys, keywordLen);
950 keywords[keywordLen] = 0;
951 current = keywords + currentIndex;
958 virtual ~KeywordEnumeration();
960 virtual StringEnumeration * clone() const
962 UErrorCode status = U_ZERO_ERROR;
963 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
966 virtual int32_t count(UErrorCode &/*status*/) const {
971 kw += uprv_strlen(kw)+1;
976 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
979 if(U_SUCCESS(status) && *current != 0) {
981 len = (int32_t)uprv_strlen(current);
983 if(resultLength != NULL) {
987 if(resultLength != NULL) {
995 virtual const UnicodeString* snext(UErrorCode& status) {
996 int32_t resultLength = 0;
997 const char *s = next(&resultLength, status);
998 return setChars(s, resultLength, status);
1001 virtual void reset(UErrorCode& /*status*/) {
1006 const char KeywordEnumeration::fgClassID = '\0';
1008 KeywordEnumeration::~KeywordEnumeration() {
1009 uprv_free(keywords);
1013 Locale::createKeywords(UErrorCode &status) const
1016 int32_t keywordCapacity = 256;
1017 StringEnumeration *result = NULL;
1019 const char* variantStart = uprv_strchr(fullName, '@');
1020 const char* assignment = uprv_strchr(fullName, '=');
1022 if(assignment > variantStart) {
1023 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1025 result = new KeywordEnumeration(keywords, keyLen, 0, status);
1028 status = U_INVALID_FORMAT_ERROR;
1035 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
1037 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
1041 Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
1043 uloc_setKeywordValue(keywordName, keywordValue, fullName, ULOC_FULLNAME_CAPACITY, &status);
1044 if (U_SUCCESS(status) && baseName == fullName) {
1045 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
1046 initBaseName(status);
1051 Locale::getBaseName() const {