1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2009-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
23 /* struct holding a single variant */
24 typedef struct VariantListEntry {
26 struct VariantListEntry *next;
29 /* struct holding a single attribute value */
30 typedef struct AttributeListEntry {
31 const char *attribute;
32 struct AttributeListEntry *next;
35 /* struct holding a single extension */
36 typedef struct ExtensionListEntry {
39 struct ExtensionListEntry *next;
43 typedef struct ULanguageTag {
44 char *buf; /* holding parsed subtags */
46 const char *extlang[MAXEXTLANG];
49 VariantListEntry *variants;
50 ExtensionListEntry *extensions;
51 const char *privateuse;
52 const char *grandfathered;
57 #define PRIVATEUSE 'x'
60 #define LOCALE_SEP '_'
61 #define LOCALE_EXT_SEP '@'
62 #define LOCALE_KEYWORD_SEP ';'
63 #define LOCALE_KEY_TYPE_SEP '='
65 #define ISALPHA(c) uprv_isASCIILetter(c)
66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
68 static const char EMPTY[] = "";
69 static const char LANG_UND[] = "und";
70 static const char PRIVATEUSE_KEY[] = "x";
71 static const char _POSIX[] = "_POSIX";
72 static const char POSIX_KEY[] = "va";
73 static const char POSIX_VALUE[] = "posix";
74 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
75 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
76 static const char LOCALE_TYPE_YES[] = "yes";
78 #define LANG_UND_LEN 3
80 static const char* const GRANDFATHERED[] = {
81 /* grandfathered preferred */
83 "cel-gaulish", "xtg-x-cel-gaulish",
84 "en-GB-oed", "en-GB-x-oed",
87 "i-default", "en-x-i-default",
88 "i-enochian", "und-x-i-enochian",
92 "i-mingo", "see-x-i-mingo",
105 "zh-min", "nan-x-zh-min",
111 static const char DEPRECATEDLANGS[][4] = {
119 * -------------------------------------------------
121 * These ultag_ functions may be exposed as APIs later
123 * -------------------------------------------------
127 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
130 ultag_close(ULanguageTag* langtag);
133 ultag_getLanguage(const ULanguageTag* langtag);
137 ultag_getJDKLanguage(const ULanguageTag* langtag);
141 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
144 ultag_getExtlangSize(const ULanguageTag* langtag);
147 ultag_getScript(const ULanguageTag* langtag);
150 ultag_getRegion(const ULanguageTag* langtag);
153 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
156 ultag_getVariantsSize(const ULanguageTag* langtag);
159 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
162 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
165 ultag_getExtensionsSize(const ULanguageTag* langtag);
168 ultag_getPrivateUse(const ULanguageTag* langtag);
172 ultag_getGrandfathered(const ULanguageTag* langtag);
176 * -------------------------------------------------
178 * Language subtag syntax validation functions
180 * -------------------------------------------------
184 _isAlphaString(const char* s, int32_t len) {
186 for (i = 0; i < len; i++) {
187 if (!ISALPHA(*(s + i))) {
195 _isNumericString(const char* s, int32_t len) {
197 for (i = 0; i < len; i++) {
198 if (!ISNUMERIC(*(s + i))) {
206 _isAlphaNumericString(const char* s, int32_t len) {
208 for (i = 0; i < len; i++) {
209 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
217 _isLanguageSubtag(const char* s, int32_t len) {
219 * language = 2*3ALPHA ; shortest ISO 639 code
220 * ["-" extlang] ; sometimes followed by
221 * ; extended language subtags
222 * / 4ALPHA ; or reserved for future use
223 * / 5*8ALPHA ; or registered language subtag
226 len = (int32_t)uprv_strlen(s);
228 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
235 _isExtlangSubtag(const char* s, int32_t len) {
237 * extlang = 3ALPHA ; selected ISO 639 codes
238 * *2("-" 3ALPHA) ; permanently reserved
241 len = (int32_t)uprv_strlen(s);
243 if (len == 3 && _isAlphaString(s, len)) {
250 _isScriptSubtag(const char* s, int32_t len) {
252 * script = 4ALPHA ; ISO 15924 code
255 len = (int32_t)uprv_strlen(s);
257 if (len == 4 && _isAlphaString(s, len)) {
264 _isRegionSubtag(const char* s, int32_t len) {
266 * region = 2ALPHA ; ISO 3166-1 code
267 * / 3DIGIT ; UN M.49 code
270 len = (int32_t)uprv_strlen(s);
272 if (len == 2 && _isAlphaString(s, len)) {
275 if (len == 3 && _isNumericString(s, len)) {
282 _isVariantSubtag(const char* s, int32_t len) {
284 * variant = 5*8alphanum ; registered variants
285 * / (DIGIT 3alphanum)
288 len = (int32_t)uprv_strlen(s);
290 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
293 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
300 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
302 * variant = 1*8alphanum ; registered variants
303 * / (DIGIT 3alphanum)
306 len = (int32_t)uprv_strlen(s);
308 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
315 _isExtensionSingleton(const char* s, int32_t len) {
317 * extension = singleton 1*("-" (2*8alphanum))
320 len = (int32_t)uprv_strlen(s);
322 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
329 _isExtensionSubtag(const char* s, int32_t len) {
331 * extension = singleton 1*("-" (2*8alphanum))
334 len = (int32_t)uprv_strlen(s);
336 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
343 _isExtensionSubtags(const char* s, int32_t len) {
345 const char *pSubtag = NULL;
348 len = (int32_t)uprv_strlen(s);
351 while ((p - s) < len) {
353 if (pSubtag == NULL) {
356 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
360 } else if (pSubtag == NULL) {
365 if (pSubtag == NULL) {
368 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
372 _isPrivateuseValueSubtag(const char* s, int32_t len) {
374 * privateuse = "x" 1*("-" (1*8alphanum))
377 len = (int32_t)uprv_strlen(s);
379 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
386 _isPrivateuseValueSubtags(const char* s, int32_t len) {
388 const char *pSubtag = NULL;
391 len = (int32_t)uprv_strlen(s);
394 while ((p - s) < len) {
396 if (pSubtag == NULL) {
399 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
403 } else if (pSubtag == NULL) {
408 if (pSubtag == NULL) {
411 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
415 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
417 len = (int32_t)uprv_strlen(s);
419 if (len == 2 && _isAlphaNumericString(s, len)) {
426 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
428 int32_t subtagLen = 0;
431 len = (int32_t)uprv_strlen(s);
434 for (p = s; len > 0; p++, len--) {
440 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
450 return (subtagLen >= 3);
453 * -------------------------------------------------
457 * -------------------------------------------------
461 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
464 if (*first == NULL) {
468 VariantListEntry *prev, *cur;
471 /* variants order should be preserved */
481 /* Checking for duplicate variant */
482 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
484 /* duplicated variant */
497 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
500 if (*first == NULL) {
504 AttributeListEntry *prev, *cur;
507 /* reorder variants in alphabetical order */
516 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
527 /* duplicated variant */
541 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
544 if (*first == NULL) {
548 ExtensionListEntry *prev, *cur;
551 /* reorder variants in alphabetical order */
561 /* special handling for locale to bcp conversion */
564 len = (int32_t)uprv_strlen(ext->key);
565 curlen = (int32_t)uprv_strlen(cur->key);
567 if (len == 1 && curlen == 1) {
568 if (*(ext->key) == *(cur->key)) {
570 } else if (*(ext->key) == PRIVATEUSE) {
572 } else if (*(cur->key) == PRIVATEUSE) {
575 cmp = *(ext->key) - *(cur->key);
577 } else if (len == 1) {
578 cmp = *(ext->key) - LDMLEXT;
579 } else if (curlen == 1) {
580 cmp = LDMLEXT - *(cur->key);
582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583 /* Both are u extension keys - we need special handling for 'attribute' */
585 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
587 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
593 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
605 /* duplicated extension key */
618 _initializeULanguageTag(ULanguageTag* langtag) {
623 langtag->language = EMPTY;
624 for (i = 0; i < MAXEXTLANG; i++) {
625 langtag->extlang[i] = NULL;
628 langtag->script = EMPTY;
629 langtag->region = EMPTY;
631 langtag->variants = NULL;
632 langtag->extensions = NULL;
634 langtag->grandfathered = EMPTY;
635 langtag->privateuse = EMPTY;
639 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
640 char buf[ULOC_LANG_CAPACITY];
641 UErrorCode tmpStatus = U_ZERO_ERROR;
645 if (U_FAILURE(*status)) {
649 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
650 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
652 *status = U_ILLEGAL_ARGUMENT_ERROR;
658 /* Note: returned language code is in lower case letters */
661 if (reslen < capacity) {
662 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
664 reslen += LANG_UND_LEN;
665 } else if (!_isLanguageSubtag(buf, len)) {
666 /* invalid language code */
668 *status = U_ILLEGAL_ARGUMENT_ERROR;
671 if (reslen < capacity) {
672 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
674 reslen += LANG_UND_LEN;
676 /* resolve deprecated */
677 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
678 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
679 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
680 len = (int32_t)uprv_strlen(buf);
684 if (reslen < capacity) {
685 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
689 u_terminateChars(appendAt, capacity, reslen, status);
694 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
695 char buf[ULOC_SCRIPT_CAPACITY];
696 UErrorCode tmpStatus = U_ZERO_ERROR;
700 if (U_FAILURE(*status)) {
704 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
705 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
707 *status = U_ILLEGAL_ARGUMENT_ERROR;
713 if (!_isScriptSubtag(buf, len)) {
714 /* invalid script code */
716 *status = U_ILLEGAL_ARGUMENT_ERROR;
720 if (reslen < capacity) {
721 *(appendAt + reslen) = SEP;
725 if (reslen < capacity) {
726 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
731 u_terminateChars(appendAt, capacity, reslen, status);
736 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
737 char buf[ULOC_COUNTRY_CAPACITY];
738 UErrorCode tmpStatus = U_ZERO_ERROR;
742 if (U_FAILURE(*status)) {
746 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
747 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
749 *status = U_ILLEGAL_ARGUMENT_ERROR;
755 if (!_isRegionSubtag(buf, len)) {
756 /* invalid region code */
758 *status = U_ILLEGAL_ARGUMENT_ERROR;
762 if (reslen < capacity) {
763 *(appendAt + reslen) = SEP;
767 if (reslen < capacity) {
768 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
773 u_terminateChars(appendAt, capacity, reslen, status);
778 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
779 char buf[ULOC_FULLNAME_CAPACITY];
780 UErrorCode tmpStatus = U_ZERO_ERROR;
784 if (U_FAILURE(*status)) {
788 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
789 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
791 *status = U_ILLEGAL_ARGUMENT_ERROR;
799 VariantListEntry *var;
800 VariantListEntry *varFirst = NULL;
805 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
809 *p = 0; /* terminate */
813 *status = U_ILLEGAL_ARGUMENT_ERROR;
816 /* ignore empty variant */
818 /* ICU uses upper case letters for variants, but
819 the canonical format is lowercase in BCP47 */
820 for (i = 0; *(pVar + i) != 0; i++) {
821 *(pVar + i) = uprv_tolower(*(pVar + i));
825 if (_isVariantSubtag(pVar, -1)) {
826 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
827 /* emit the variant to the list */
828 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
830 *status = U_MEMORY_ALLOCATION_ERROR;
834 if (!_addVariantToList(&varFirst, var)) {
835 /* duplicated variant */
838 *status = U_ILLEGAL_ARGUMENT_ERROR;
843 /* Special handling for POSIX variant, need to remember that we had it and then */
844 /* treat it like an extension later. */
848 *status = U_ILLEGAL_ARGUMENT_ERROR;
850 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
851 /* Handle private use subtags separately */
855 /* reset variant starting position */
857 } else if (pVar == NULL) {
863 if (U_SUCCESS(*status)) {
864 if (varFirst != NULL) {
867 /* write out validated/normalized variants to the target */
869 while (var != NULL) {
870 if (reslen < capacity) {
871 *(appendAt + reslen) = SEP;
874 varLen = (int32_t)uprv_strlen(var->variant);
875 if (reslen < capacity) {
876 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
886 while (var != NULL) {
887 VariantListEntry *tmpVar = var->next;
892 if (U_FAILURE(*status)) {
897 u_terminateChars(appendAt, capacity, reslen, status);
902 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
903 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
904 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
905 int32_t attrBufLength = 0;
906 UEnumeration *keywordEnum = NULL;
909 keywordEnum = uloc_openKeywords(localeID, status);
910 if (U_FAILURE(*status) && !hadPosix) {
911 uenum_close(keywordEnum);
914 if (keywordEnum != NULL || hadPosix) {
915 /* reorder extensions */
918 ExtensionListEntry *firstExt = NULL;
919 ExtensionListEntry *ext;
920 AttributeListEntry *firstAttr = NULL;
921 AttributeListEntry *attr;
923 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
924 char *pExtBuf = extBuf;
925 int32_t extBufCapacity = sizeof(extBuf);
926 const char *bcpKey, *bcpValue;
927 UErrorCode tmpStatus = U_ZERO_ERROR;
932 key = uenum_next(keywordEnum, NULL, status);
936 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
937 /* buf must be null-terminated */
938 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
940 *status = U_ILLEGAL_ARGUMENT_ERROR;
943 /* ignore this keyword */
944 tmpStatus = U_ZERO_ERROR;
948 keylen = (int32_t)uprv_strlen(key);
949 isBcpUExt = (keylen > 1);
951 /* special keyword used for representing Unicode locale attributes */
952 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
957 for (; i < len; i++) {
959 attrBuf[attrBufLength++] = buf[i];
965 if (attrBufLength > 0) {
966 attrBuf[attrBufLength] = 0;
968 } else if (i >= len){
972 /* create AttributeListEntry */
973 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
975 *status = U_MEMORY_ALLOCATION_ERROR;
978 attrValue = (char*)uprv_malloc(attrBufLength + 1);
979 if (attrValue == NULL) {
980 *status = U_MEMORY_ALLOCATION_ERROR;
983 uprv_strcpy(attrValue, attrBuf);
984 attr->attribute = attrValue;
986 if (!_addAttributeToList(&firstAttr, attr)) {
988 uprv_free(attrValue);
990 *status = U_ILLEGAL_ARGUMENT_ERROR;
995 /* for a place holder ExtensionListEntry */
996 bcpKey = LOCALE_ATTRIBUTE_KEY;
999 } else if (isBcpUExt) {
1000 bcpKey = uloc_toUnicodeLocaleKey(key);
1001 if (bcpKey == NULL) {
1003 *status = U_ILLEGAL_ARGUMENT_ERROR;
1009 /* we've checked buf is null-terminated above */
1010 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1011 if (bcpValue == NULL) {
1013 *status = U_ILLEGAL_ARGUMENT_ERROR;
1018 if (bcpValue == buf) {
1020 When uloc_toUnicodeLocaleType(key, buf) returns the
1021 input value as is, the value is well-formed, but has
1022 no known mapping. This implementation normalizes the
1023 the value to lower case
1025 int32_t bcpValueLen = uprv_strlen(bcpValue);
1026 if (bcpValueLen < extBufCapacity) {
1027 uprv_strcpy(pExtBuf, bcpValue);
1028 T_CString_toLowerCase(pExtBuf);
1032 pExtBuf += (bcpValueLen + 1);
1033 extBufCapacity -= (bcpValueLen + 1);
1036 *status = U_ILLEGAL_ARGUMENT_ERROR;
1043 if (*key == PRIVATEUSE) {
1044 if (!_isPrivateuseValueSubtags(buf, len)) {
1046 *status = U_ILLEGAL_ARGUMENT_ERROR;
1052 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1054 *status = U_ILLEGAL_ARGUMENT_ERROR;
1061 if ((len + 1) < extBufCapacity) {
1062 uprv_memcpy(pExtBuf, buf, len);
1070 extBufCapacity -= (len + 1);
1072 *status = U_ILLEGAL_ARGUMENT_ERROR;
1077 /* create ExtensionListEntry */
1078 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1080 *status = U_MEMORY_ALLOCATION_ERROR;
1084 ext->value = bcpValue;
1086 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1089 *status = U_ILLEGAL_ARGUMENT_ERROR;
1095 /* Special handling for POSIX variant - add the keywords for POSIX */
1097 /* create ExtensionListEntry for POSIX */
1098 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1100 *status = U_MEMORY_ALLOCATION_ERROR;
1103 ext->key = POSIX_KEY;
1104 ext->value = POSIX_VALUE;
1106 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1111 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1112 UBool startLDMLExtension = FALSE;
1113 for (ext = firstExt; ext; ext = ext->next) {
1114 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1115 /* first LDML u singlton extension */
1116 if (reslen < capacity) {
1117 *(appendAt + reslen) = SEP;
1120 if (reslen < capacity) {
1121 *(appendAt + reslen) = LDMLEXT;
1125 startLDMLExtension = TRUE;
1128 /* write out the sorted BCP47 attributes, extensions and private use */
1129 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1130 /* write the value for the attributes */
1131 for (attr = firstAttr; attr; attr = attr->next) {
1132 if (reslen < capacity) {
1133 *(appendAt + reslen) = SEP;
1136 len = (int32_t)uprv_strlen(attr->attribute);
1137 if (reslen < capacity) {
1138 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1143 if (reslen < capacity) {
1144 *(appendAt + reslen) = SEP;
1147 len = (int32_t)uprv_strlen(ext->key);
1148 if (reslen < capacity) {
1149 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1152 if (reslen < capacity) {
1153 *(appendAt + reslen) = SEP;
1156 len = (int32_t)uprv_strlen(ext->value);
1157 if (reslen < capacity) {
1158 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1167 while (ext != NULL) {
1168 ExtensionListEntry *tmpExt = ext->next;
1174 while (attr != NULL) {
1175 AttributeListEntry *tmpAttr = attr->next;
1176 char *pValue = (char *)attr->attribute;
1182 uenum_close(keywordEnum);
1184 if (U_FAILURE(*status)) {
1189 return u_terminateChars(appendAt, capacity, reslen, status);
1193 * Append keywords parsed from LDML extension value
1194 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1195 * Note: char* buf is used for storing keywords
1198 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1199 const char *pTag; /* beginning of current subtag */
1200 const char *pKwds; /* beginning of key-type pairs */
1201 UBool variantExists = *posixVariant;
1203 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1204 ExtensionListEntry *kwd, *nextKwd;
1206 AttributeListEntry *attrFirst = NULL; /* first attribute */
1207 AttributeListEntry *attr, *nextAttr;
1212 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1213 int32_t attrBufIdx = 0;
1215 /* Reset the posixVariant value */
1216 *posixVariant = FALSE;
1221 /* Iterate through u extension attributes */
1223 /* locate next separator char */
1224 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1226 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1231 /* add this attribute to the list */
1232 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1234 *status = U_MEMORY_ALLOCATION_ERROR;
1238 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1239 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1240 attrBuf[attrBufIdx + len] = 0;
1241 attr->attribute = &attrBuf[attrBufIdx];
1242 attrBufIdx += (len + 1);
1244 *status = U_ILLEGAL_ARGUMENT_ERROR;
1248 if (!_addAttributeToList(&attrFirst, attr)) {
1249 *status = U_ILLEGAL_ARGUMENT_ERROR;
1257 /* next to the separator */
1263 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1265 if (attrBufIdx > bufSize) {
1266 /* attrBufIdx == <total length of attribute subtag> + 1 */
1267 *status = U_ILLEGAL_ARGUMENT_ERROR;
1271 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1273 *status = U_MEMORY_ALLOCATION_ERROR;
1277 kwd->key = LOCALE_ATTRIBUTE_KEY;
1280 /* attribute subtags sorted in alphabetical order as type */
1282 while (attr != NULL) {
1283 nextAttr = attr->next;
1285 /* buffer size check is done above */
1286 if (attr != attrFirst) {
1287 *(buf + bufIdx) = SEP;
1291 len = uprv_strlen(attr->attribute);
1292 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1297 *(buf + bufIdx) = 0;
1300 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1301 *status = U_ILLEGAL_ARGUMENT_ERROR;
1306 /* once keyword entry is created, delete the attribute list */
1308 while (attr != NULL) {
1309 nextAttr = attr->next;
1317 const char *pBcpKey = NULL; /* u extenstion key subtag */
1318 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1319 int32_t bcpKeyLen = 0;
1320 int32_t bcpTypeLen = 0;
1321 UBool isDone = FALSE;
1324 /* BCP47 representation of LDML key/type pairs */
1326 const char *pNextBcpKey = NULL;
1327 int32_t nextBcpKeyLen = 0;
1328 UBool emitKeyword = FALSE;
1331 /* locate next separator char */
1332 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1334 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1338 nextBcpKeyLen = len;
1344 U_ASSERT(pBcpKey != NULL);
1345 /* within LDML type subtags */
1347 bcpTypeLen += (len + 1);
1357 /* next to the separator */
1361 /* processing last one */
1367 const char *pKey = NULL; /* LDML key */
1368 const char *pType = NULL; /* LDML type */
1370 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1372 U_ASSERT(pBcpKey != NULL);
1374 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1375 /* the BCP key is invalid */
1376 *status = U_ILLEGAL_ARGUMENT_ERROR;
1380 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1381 bcpKeyBuf[bcpKeyLen] = 0;
1383 /* u extension key to LDML key */
1384 pKey = uloc_toLegacyKey(bcpKeyBuf);
1386 *status = U_ILLEGAL_ARGUMENT_ERROR;
1389 if (pKey == bcpKeyBuf) {
1391 The key returned by toLegacyKey points to the input buffer.
1392 We normalize the result key to lower case.
1394 T_CString_toLowerCase(bcpKeyBuf);
1395 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1396 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1397 pKey = buf + bufIdx;
1398 bufIdx += bcpKeyLen;
1399 *(buf + bufIdx) = 0;
1402 *status = U_BUFFER_OVERFLOW_ERROR;
1408 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
1409 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1410 /* the BCP type is too long */
1411 *status = U_ILLEGAL_ARGUMENT_ERROR;
1415 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1416 bcpTypeBuf[bcpTypeLen] = 0;
1418 /* BCP type to locale type */
1419 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1420 if (pType == NULL) {
1421 *status = U_ILLEGAL_ARGUMENT_ERROR;
1424 if (pType == bcpTypeBuf) {
1426 The type returned by toLegacyType points to the input buffer.
1427 We normalize the result type to lower case.
1429 /* normalize to lower case */
1430 T_CString_toLowerCase(bcpTypeBuf);
1431 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1432 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1433 pType = buf + bufIdx;
1434 bufIdx += bcpTypeLen;
1435 *(buf + bufIdx) = 0;
1438 *status = U_BUFFER_OVERFLOW_ERROR;
1443 /* typeless - default type value is "yes" */
1444 pType = LOCALE_TYPE_YES;
1447 /* Special handling for u-va-posix, since we want to treat this as a variant,
1449 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1450 *posixVariant = TRUE;
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1455 *status = U_MEMORY_ALLOCATION_ERROR;
1462 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1463 *status = U_ILLEGAL_ARGUMENT_ERROR;
1469 pBcpKey = pNextBcpKey;
1470 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1478 while (kwd != NULL) {
1479 nextKwd = kwd->next;
1480 _addExtensionToList(appendTo, kwd, FALSE);
1488 while (attr != NULL) {
1489 nextAttr = attr->next;
1495 while (kwd != NULL) {
1496 nextKwd = kwd->next;
1504 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1508 ExtensionListEntry *kwdFirst = NULL;
1509 ExtensionListEntry *kwd;
1510 const char *key, *type;
1511 char *kwdBuf = NULL;
1512 int32_t kwdBufLength = capacity;
1513 UBool posixVariant = FALSE;
1515 if (U_FAILURE(*status)) {
1519 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1520 if (kwdBuf == NULL) {
1521 *status = U_MEMORY_ALLOCATION_ERROR;
1525 /* Determine if variants already exists */
1526 if (ultag_getVariantsSize(langtag)) {
1527 posixVariant = TRUE;
1530 n = ultag_getExtensionsSize(langtag);
1532 /* resolve locale keywords and reordering keys */
1533 for (i = 0; i < n; i++) {
1534 key = ultag_getExtensionKey(langtag, i);
1535 type = ultag_getExtensionValue(langtag, i);
1536 if (*key == LDMLEXT) {
1537 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1538 if (U_FAILURE(*status)) {
1542 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1544 *status = U_MEMORY_ALLOCATION_ERROR;
1549 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1551 *status = U_ILLEGAL_ARGUMENT_ERROR;
1557 if (U_SUCCESS(*status)) {
1558 type = ultag_getPrivateUse(langtag);
1559 if ((int32_t)uprv_strlen(type) > 0) {
1560 /* add private use as a keyword */
1561 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1563 *status = U_MEMORY_ALLOCATION_ERROR;
1565 kwd->key = PRIVATEUSE_KEY;
1567 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1569 *status = U_ILLEGAL_ARGUMENT_ERROR;
1575 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1577 if (U_SUCCESS(*status) && posixVariant) {
1578 len = (int32_t) uprv_strlen(_POSIX);
1579 if (reslen < capacity) {
1580 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1585 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1586 /* write out the sorted keywords */
1587 UBool firstValue = TRUE;
1590 if (reslen < capacity) {
1593 *(appendAt + reslen) = LOCALE_EXT_SEP;
1597 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1603 len = (int32_t)uprv_strlen(kwd->key);
1604 if (reslen < capacity) {
1605 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1610 if (reslen < capacity) {
1611 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1616 len = (int32_t)uprv_strlen(kwd->value);
1617 if (reslen < capacity) {
1618 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1628 while (kwd != NULL) {
1629 ExtensionListEntry *tmpKwd = kwd->next;
1636 if (U_FAILURE(*status)) {
1640 return u_terminateChars(appendAt, capacity, reslen, status);
1644 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1645 char buf[ULOC_FULLNAME_CAPACITY];
1646 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1647 UErrorCode tmpStatus = U_ZERO_ERROR;
1651 if (U_FAILURE(*status)) {
1655 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1656 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1658 *status = U_ILLEGAL_ARGUMENT_ERROR;
1666 UBool firstValue = TRUE;
1673 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1677 *p = 0; /* terminate */
1679 if (pPriv != NULL) {
1680 /* Private use in the canonical format is lowercase in BCP47 */
1681 for (i = 0; *(pPriv + i) != 0; i++) {
1682 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1686 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1688 if (!_isVariantSubtag(pPriv, -1)) {
1694 } else if (strict) {
1695 *status = U_ILLEGAL_ARGUMENT_ERROR;
1702 if (reslen < capacity) {
1703 tmpAppend[reslen++] = SEP;
1707 if (reslen < capacity) {
1708 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1711 if (reslen < capacity) {
1712 tmpAppend[reslen++] = SEP;
1715 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1716 if (reslen < capacity) {
1717 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1721 if (reslen < capacity) {
1722 tmpAppend[reslen++] = SEP;
1728 len = (int32_t)uprv_strlen(pPriv);
1729 if (reslen < capacity) {
1730 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1735 /* reset private use starting position */
1737 } else if (pPriv == NULL) {
1743 if (U_FAILURE(*status)) {
1748 if (U_SUCCESS(*status)) {
1750 if (reslen < capacity) {
1751 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1755 u_terminateChars(appendAt, capacity, reslen, status);
1761 * -------------------------------------------------
1765 * -------------------------------------------------
1768 /* Bit flags used by the parser */
1779 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
1780 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
1781 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
1783 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
1784 #pragma optimize( "", off )
1787 static ULanguageTag*
1788 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1792 char *pSubtag, *pNext, *pLastGoodPosition;
1795 ExtensionListEntry *pExtension;
1796 char *pExtValueSubtag, *pExtValueSubtagEnd;
1798 UBool privateuseVar = FALSE;
1799 int32_t grandfatheredLen = 0;
1801 if (parsedLen != NULL) {
1805 if (U_FAILURE(*status)) {
1810 tagLen = (int32_t)uprv_strlen(tag);
1813 /* copy the entire string */
1814 tagBuf = (char*)uprv_malloc(tagLen + 1);
1815 if (tagBuf == NULL) {
1816 *status = U_MEMORY_ALLOCATION_ERROR;
1819 uprv_memcpy(tagBuf, tag, tagLen);
1820 *(tagBuf + tagLen) = 0;
1822 /* create a ULanguageTag */
1823 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1826 *status = U_MEMORY_ALLOCATION_ERROR;
1829 _initializeULanguageTag(t);
1832 if (tagLen < MINLEN) {
1833 /* the input tag is too short - return empty ULanguageTag */
1837 /* check if the tag is grandfathered */
1838 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1839 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1840 int32_t newTagLength;
1842 grandfatheredLen = tagLen; /* back up for output parsedLen */
1843 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1844 if (tagLen < newTagLength) {
1846 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1847 if (tagBuf == NULL) {
1848 *status = U_MEMORY_ALLOCATION_ERROR;
1853 tagLen = newTagLength;
1855 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1861 * langtag = language
1870 pNext = pLastGoodPosition = tagBuf;
1873 pExtValueSubtag = NULL;
1874 pExtValueSubtagEnd = NULL;
1881 /* locate next separator char */
1895 subtagLen = (int32_t)(pSep - pSubtag);
1898 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1899 *pSep = 0; /* terminate */
1900 t->language = T_CString_toLowerCase(pSubtag);
1902 pLastGoodPosition = pSep;
1903 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1908 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1910 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1912 pLastGoodPosition = pSep;
1913 if (extlangIdx < 3) {
1914 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1916 next = SCRT | REGN | VART | EXTS | PRIV;
1922 if (_isScriptSubtag(pSubtag, subtagLen)) {
1928 *p = uprv_toupper(*p);
1931 *p = uprv_tolower(*p);
1934 t->script = pSubtag;
1936 pLastGoodPosition = pSep;
1937 next = REGN | VART | EXTS | PRIV;
1942 if (_isRegionSubtag(pSubtag, subtagLen)) {
1944 t->region = T_CString_toUpperCase(pSubtag);
1946 pLastGoodPosition = pSep;
1947 next = VART | EXTS | PRIV;
1952 if (_isVariantSubtag(pSubtag, subtagLen) ||
1953 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1954 VariantListEntry *var;
1957 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1959 *status = U_MEMORY_ALLOCATION_ERROR;
1963 var->variant = T_CString_toUpperCase(pSubtag);
1964 isAdded = _addVariantToList(&(t->variants), var);
1966 /* duplicated variant entry */
1970 pLastGoodPosition = pSep;
1971 next = VART | EXTS | PRIV;
1976 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1977 if (pExtension != NULL) {
1978 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1979 /* the previous extension is incomplete */
1980 uprv_free(pExtension);
1985 /* terminate the previous extension value */
1986 *pExtValueSubtagEnd = 0;
1987 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1989 /* insert the extension to the list */
1990 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1991 pLastGoodPosition = pExtValueSubtagEnd;
1993 /* stop parsing here */
1994 uprv_free(pExtension);
2000 /* create a new extension */
2001 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2002 if (pExtension == NULL) {
2003 *status = U_MEMORY_ALLOCATION_ERROR;
2007 pExtension->key = T_CString_toLowerCase(pSubtag);
2008 pExtension->value = NULL; /* will be set later */
2011 * reset the start and the end location of extension value
2012 * subtags for this extension
2014 pExtValueSubtag = NULL;
2015 pExtValueSubtagEnd = NULL;
2022 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2023 if (pExtValueSubtag == NULL) {
2024 /* if the start postion of this extension's value is not yet,
2025 this one is the first value subtag */
2026 pExtValueSubtag = pSubtag;
2029 /* Mark the end of this subtag */
2030 pExtValueSubtagEnd = pSep;
2031 next = EXTS | EXTV | PRIV;
2037 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2040 if (pExtension != NULL) {
2041 /* Process the last extension */
2042 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2043 /* the previous extension is incomplete */
2044 uprv_free(pExtension);
2048 /* terminate the previous extension value */
2049 *pExtValueSubtagEnd = 0;
2050 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2052 /* insert the extension to the list */
2053 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2054 pLastGoodPosition = pExtValueSubtagEnd;
2057 /* stop parsing here */
2058 uprv_free(pExtension);
2065 /* The rest of part will be private use value subtags */
2066 if (pNext == NULL) {
2067 /* empty private use subtag */
2070 /* back up the private use value start position */
2071 pPrivuseVal = pNext;
2073 /* validate private use value subtags */
2089 subtagLen = (int32_t)(pSep - pSubtag);
2091 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2094 privateuseVar = TRUE;
2096 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2097 pLastGoodPosition = pSep;
2107 if (pLastGoodPosition - pPrivuseVal > 0) {
2108 *pLastGoodPosition = 0;
2109 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2111 /* No more subtags, exiting the parse loop */
2117 /* If we fell through here, it means this subtag is illegal - quit parsing */
2121 if (pExtension != NULL) {
2122 /* Process the last extension */
2123 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2124 /* the previous extension is incomplete */
2125 uprv_free(pExtension);
2127 /* terminate the previous extension value */
2128 *pExtValueSubtagEnd = 0;
2129 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2130 /* insert the extension to the list */
2131 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2132 pLastGoodPosition = pExtValueSubtagEnd;
2134 uprv_free(pExtension);
2139 if (parsedLen != NULL) {
2140 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2151 * Ticket #12705 - Turn optimization back on.
2153 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2154 #pragma optimize( "", on )
2158 ultag_close(ULanguageTag* langtag) {
2160 if (langtag == NULL) {
2164 uprv_free(langtag->buf);
2166 if (langtag->variants) {
2167 VariantListEntry *curVar = langtag->variants;
2169 VariantListEntry *nextVar = curVar->next;
2175 if (langtag->extensions) {
2176 ExtensionListEntry *curExt = langtag->extensions;
2178 ExtensionListEntry *nextExt = curExt->next;
2188 ultag_getLanguage(const ULanguageTag* langtag) {
2189 return langtag->language;
2194 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2196 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2197 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2198 return DEPRECATEDLANGS[i + 1];
2201 return langtag->language;
2206 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2207 if (idx >= 0 && idx < MAXEXTLANG) {
2208 return langtag->extlang[idx];
2214 ultag_getExtlangSize(const ULanguageTag* langtag) {
2217 for (i = 0; i < MAXEXTLANG; i++) {
2218 if (langtag->extlang[i]) {
2226 ultag_getScript(const ULanguageTag* langtag) {
2227 return langtag->script;
2231 ultag_getRegion(const ULanguageTag* langtag) {
2232 return langtag->region;
2236 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2237 const char *var = NULL;
2238 VariantListEntry *cur = langtag->variants;
2252 ultag_getVariantsSize(const ULanguageTag* langtag) {
2254 VariantListEntry *cur = langtag->variants;
2266 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2267 const char *key = NULL;
2268 ExtensionListEntry *cur = langtag->extensions;
2282 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2283 const char *val = NULL;
2284 ExtensionListEntry *cur = langtag->extensions;
2298 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2300 ExtensionListEntry *cur = langtag->extensions;
2312 ultag_getPrivateUse(const ULanguageTag* langtag) {
2313 return langtag->privateuse;
2318 ultag_getGrandfathered(const ULanguageTag* langtag) {
2319 return langtag->grandfathered;
2325 * -------------------------------------------------
2327 * Locale/BCP47 conversion APIs, exposed as uloc_*
2329 * -------------------------------------------------
2331 U_CAPI int32_t U_EXPORT2
2332 uloc_toLanguageTag(const char* localeID,
2334 int32_t langtagCapacity,
2336 UErrorCode* status) {
2337 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2338 char canonical[256];
2340 UErrorCode tmpStatus = U_ZERO_ERROR;
2341 UBool hadPosix = FALSE;
2342 const char* pKeywordStart;
2344 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2346 if (uprv_strlen(localeID) > 0) {
2347 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2348 if (tmpStatus != U_ZERO_ERROR) {
2349 *status = U_ILLEGAL_ARGUMENT_ERROR;
2354 /* For handling special case - private use only tag */
2355 pKeywordStart = locale_getKeywordsStart(canonical);
2356 if (pKeywordStart == canonical) {
2357 UEnumeration *kwdEnum;
2361 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2362 if (kwdEnum != NULL) {
2363 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2368 key = uenum_next(kwdEnum, &len, &tmpStatus);
2369 if (len == 1 && *key == PRIVATEUSE) {
2370 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2371 buf[0] = PRIVATEUSE;
2373 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2374 if (U_SUCCESS(tmpStatus)) {
2375 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2376 /* return private use only tag */
2378 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2379 u_terminateChars(langtag, langtagCapacity, reslen, status);
2381 } else if (strict) {
2382 *status = U_ILLEGAL_ARGUMENT_ERROR;
2385 /* if not strict mode, then "und" will be returned */
2387 *status = U_ILLEGAL_ARGUMENT_ERROR;
2392 uenum_close(kwdEnum);
2399 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2400 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2401 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2402 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2403 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2404 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2410 U_CAPI int32_t U_EXPORT2
2411 uloc_forLanguageTag(const char* langtag,
2413 int32_t localeIDCapacity,
2414 int32_t* parsedLength,
2415 UErrorCode* status) {
2418 const char *subtag, *p;
2421 UBool noRegion = TRUE;
2423 lt = ultag_parse(langtag, -1, parsedLength, status);
2424 if (U_FAILURE(*status)) {
2429 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2430 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2431 len = (int32_t)uprv_strlen(subtag);
2433 if (reslen < localeIDCapacity) {
2434 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2441 subtag = ultag_getScript(lt);
2442 len = (int32_t)uprv_strlen(subtag);
2444 if (reslen < localeIDCapacity) {
2445 *(localeID + reslen) = LOCALE_SEP;
2449 /* write out the script in title case */
2452 if (reslen < localeIDCapacity) {
2454 *(localeID + reslen) = uprv_toupper(*p);
2456 *(localeID + reslen) = *p;
2465 subtag = ultag_getRegion(lt);
2466 len = (int32_t)uprv_strlen(subtag);
2468 if (reslen < localeIDCapacity) {
2469 *(localeID + reslen) = LOCALE_SEP;
2472 /* write out the retion in upper case */
2475 if (reslen < localeIDCapacity) {
2476 *(localeID + reslen) = uprv_toupper(*p);
2485 n = ultag_getVariantsSize(lt);
2488 if (reslen < localeIDCapacity) {
2489 *(localeID + reslen) = LOCALE_SEP;
2494 for (i = 0; i < n; i++) {
2495 subtag = ultag_getVariant(lt, i);
2496 if (reslen < localeIDCapacity) {
2497 *(localeID + reslen) = LOCALE_SEP;
2500 /* write out the variant in upper case */
2503 if (reslen < localeIDCapacity) {
2504 *(localeID + reslen) = uprv_toupper(*p);
2513 n = ultag_getExtensionsSize(lt);
2514 subtag = ultag_getPrivateUse(lt);
2515 if (n > 0 || uprv_strlen(subtag) > 0) {
2516 if (reslen == 0 && n > 0) {
2517 /* need a language */
2518 if (reslen < localeIDCapacity) {
2519 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2521 reslen += LANG_UND_LEN;
2523 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2528 return u_terminateChars(localeID, localeIDCapacity, reslen, status);