2 **********************************************************************
3 * Copyright (C) 2009-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22 /* struct holding a single variant */
23 typedef struct VariantListEntry {
25 struct VariantListEntry *next;
28 /* struct holding a single attribute value */
29 typedef struct AttributeListEntry {
30 const char *attribute;
31 struct AttributeListEntry *next;
34 /* struct holding a single extension */
35 typedef struct ExtensionListEntry {
38 struct ExtensionListEntry *next;
42 typedef struct ULanguageTag {
43 char *buf; /* holding parsed subtags */
45 const char *extlang[MAXEXTLANG];
48 VariantListEntry *variants;
49 ExtensionListEntry *extensions;
50 const char *privateuse;
51 const char *grandfathered;
56 #define PRIVATEUSE 'x'
59 #define LOCALE_SEP '_'
60 #define LOCALE_EXT_SEP '@'
61 #define LOCALE_KEYWORD_SEP ';'
62 #define LOCALE_KEY_TYPE_SEP '='
64 #define ISALPHA(c) uprv_isASCIILetter(c)
65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
67 static const char EMPTY[] = "";
68 static const char LANG_UND[] = "und";
69 static const char PRIVATEUSE_KEY[] = "x";
70 static const char _POSIX[] = "_POSIX";
71 static const char POSIX_KEY[] = "va";
72 static const char POSIX_VALUE[] = "posix";
73 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
74 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
75 static const char LOCALE_TYPE_YES[] = "yes";
77 #define LANG_UND_LEN 3
79 static const char* const GRANDFATHERED[] = {
80 /* grandfathered preferred */
82 "cel-gaulish", "xtg-x-cel-gaulish",
83 "en-GB-oed", "en-GB-x-oed",
86 "i-default", "en-x-i-default",
87 "i-enochian", "und-x-i-enochian",
91 "i-mingo", "see-x-i-mingo",
104 "zh-min", "nan-x-zh-min",
110 static const char DEPRECATEDLANGS[][4] = {
118 * -------------------------------------------------
120 * These ultag_ functions may be exposed as APIs later
122 * -------------------------------------------------
126 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
129 ultag_close(ULanguageTag* langtag);
132 ultag_getLanguage(const ULanguageTag* langtag);
136 ultag_getJDKLanguage(const ULanguageTag* langtag);
140 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
143 ultag_getExtlangSize(const ULanguageTag* langtag);
146 ultag_getScript(const ULanguageTag* langtag);
149 ultag_getRegion(const ULanguageTag* langtag);
152 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
155 ultag_getVariantsSize(const ULanguageTag* langtag);
158 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
161 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
164 ultag_getExtensionsSize(const ULanguageTag* langtag);
167 ultag_getPrivateUse(const ULanguageTag* langtag);
171 ultag_getGrandfathered(const ULanguageTag* langtag);
175 * -------------------------------------------------
177 * Language subtag syntax validation functions
179 * -------------------------------------------------
183 _isAlphaString(const char* s, int32_t len) {
185 for (i = 0; i < len; i++) {
186 if (!ISALPHA(*(s + i))) {
194 _isNumericString(const char* s, int32_t len) {
196 for (i = 0; i < len; i++) {
197 if (!ISNUMERIC(*(s + i))) {
205 _isAlphaNumericString(const char* s, int32_t len) {
207 for (i = 0; i < len; i++) {
208 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
216 _isLanguageSubtag(const char* s, int32_t len) {
218 * language = 2*3ALPHA ; shortest ISO 639 code
219 * ["-" extlang] ; sometimes followed by
220 * ; extended language subtags
221 * / 4ALPHA ; or reserved for future use
222 * / 5*8ALPHA ; or registered language subtag
225 len = (int32_t)uprv_strlen(s);
227 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
234 _isExtlangSubtag(const char* s, int32_t len) {
236 * extlang = 3ALPHA ; selected ISO 639 codes
237 * *2("-" 3ALPHA) ; permanently reserved
240 len = (int32_t)uprv_strlen(s);
242 if (len == 3 && _isAlphaString(s, len)) {
249 _isScriptSubtag(const char* s, int32_t len) {
251 * script = 4ALPHA ; ISO 15924 code
254 len = (int32_t)uprv_strlen(s);
256 if (len == 4 && _isAlphaString(s, len)) {
263 _isRegionSubtag(const char* s, int32_t len) {
265 * region = 2ALPHA ; ISO 3166-1 code
266 * / 3DIGIT ; UN M.49 code
269 len = (int32_t)uprv_strlen(s);
271 if (len == 2 && _isAlphaString(s, len)) {
274 if (len == 3 && _isNumericString(s, len)) {
281 _isVariantSubtag(const char* s, int32_t len) {
283 * variant = 5*8alphanum ; registered variants
284 * / (DIGIT 3alphanum)
287 len = (int32_t)uprv_strlen(s);
289 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
292 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
299 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
301 * variant = 1*8alphanum ; registered variants
302 * / (DIGIT 3alphanum)
305 len = (int32_t)uprv_strlen(s);
307 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
314 _isExtensionSingleton(const char* s, int32_t len) {
316 * extension = singleton 1*("-" (2*8alphanum))
319 len = (int32_t)uprv_strlen(s);
321 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
328 _isExtensionSubtag(const char* s, int32_t len) {
330 * extension = singleton 1*("-" (2*8alphanum))
333 len = (int32_t)uprv_strlen(s);
335 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
342 _isExtensionSubtags(const char* s, int32_t len) {
344 const char *pSubtag = NULL;
347 len = (int32_t)uprv_strlen(s);
350 while ((p - s) < len) {
352 if (pSubtag == NULL) {
355 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
359 } else if (pSubtag == NULL) {
364 if (pSubtag == NULL) {
367 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
371 _isPrivateuseValueSubtag(const char* s, int32_t len) {
373 * privateuse = "x" 1*("-" (1*8alphanum))
376 len = (int32_t)uprv_strlen(s);
378 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
385 _isPrivateuseValueSubtags(const char* s, int32_t len) {
387 const char *pSubtag = NULL;
390 len = (int32_t)uprv_strlen(s);
393 while ((p - s) < len) {
395 if (pSubtag == NULL) {
398 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
402 } else if (pSubtag == NULL) {
407 if (pSubtag == NULL) {
410 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
414 _isLDMLKey(const char* s, int32_t len) {
416 len = (int32_t)uprv_strlen(s);
418 if (len == 2 && _isAlphaNumericString(s, len)) {
425 _isLDMLType(const char* s, int32_t len) {
427 len = (int32_t)uprv_strlen(s);
429 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
436 * -------------------------------------------------
440 * -------------------------------------------------
444 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
447 if (*first == NULL) {
451 VariantListEntry *prev, *cur;
454 /* variants order should be preserved */
464 /* Checking for duplicate variant */
465 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
467 /* duplicated variant */
480 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
483 if (*first == NULL) {
487 AttributeListEntry *prev, *cur;
490 /* reorder variants in alphabetical order */
499 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
510 /* duplicated variant */
524 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
527 if (*first == NULL) {
531 ExtensionListEntry *prev, *cur;
534 /* reorder variants in alphabetical order */
544 /* special handling for locale to bcp conversion */
547 len = (int32_t)uprv_strlen(ext->key);
548 curlen = (int32_t)uprv_strlen(cur->key);
550 if (len == 1 && curlen == 1) {
551 if (*(ext->key) == *(cur->key)) {
553 } else if (*(ext->key) == PRIVATEUSE) {
555 } else if (*(cur->key) == PRIVATEUSE) {
558 cmp = *(ext->key) - *(cur->key);
560 } else if (len == 1) {
561 cmp = *(ext->key) - LDMLEXT;
562 } else if (curlen == 1) {
563 cmp = LDMLEXT - *(cur->key);
565 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
568 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
580 /* duplicated extension key */
593 _initializeULanguageTag(ULanguageTag* langtag) {
598 langtag->language = EMPTY;
599 for (i = 0; i < MAXEXTLANG; i++) {
600 langtag->extlang[i] = NULL;
603 langtag->script = EMPTY;
604 langtag->region = EMPTY;
606 langtag->variants = NULL;
607 langtag->extensions = NULL;
609 langtag->grandfathered = EMPTY;
610 langtag->privateuse = EMPTY;
613 #define KEYTYPEDATA "keyTypeData"
614 #define KEYMAP "keyMap"
615 #define TYPEMAP "typeMap"
616 #define TYPEALIAS "typeAlias"
617 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
618 #define MAX_LDML_KEY_LEN 22
619 #define MAX_LDML_TYPE_LEN 32
622 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
623 char* bcpKey, int32_t bcpKeyCapacity,
624 UErrorCode *status) {
626 char keyBuf[MAX_LDML_KEY_LEN];
627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
628 int32_t resultLen = 0;
630 UErrorCode tmpStatus = U_ZERO_ERROR;
631 const UChar *uBcpKey;
635 keyLen = (int32_t)uprv_strlen(key);
638 if (keyLen >= sizeof(keyBuf)) {
639 /* no known valid LDML key exceeding 21 */
640 *status = U_ILLEGAL_ARGUMENT_ERROR;
644 uprv_memcpy(keyBuf, key, keyLen);
648 for (i = 0; i < keyLen; i++) {
649 keyBuf[i] = uprv_tolower(keyBuf[i]);
652 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
653 ures_getByKey(rb, KEYMAP, rb, status);
655 if (U_FAILURE(*status)) {
660 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
661 if (U_SUCCESS(tmpStatus)) {
662 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
663 bcpKeyBuf[bcpKeyLen] = 0;
664 resultLen = bcpKeyLen;
666 if (_isLDMLKey(key, keyLen)) {
667 uprv_memcpy(bcpKeyBuf, key, keyLen);
668 bcpKeyBuf[keyLen] = 0;
671 /* mapping not availabe */
672 *status = U_ILLEGAL_ARGUMENT_ERROR;
677 if (U_FAILURE(*status)) {
681 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
682 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
686 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
687 char* key, int32_t keyCapacity,
688 UErrorCode *status) {
690 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
691 int32_t resultLen = 0;
693 const char *resKey = NULL;
694 UResourceBundle *mapData;
697 bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
700 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
701 *status = U_ILLEGAL_ARGUMENT_ERROR;
705 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
706 bcpKeyBuf[bcpKeyLen] = 0;
709 for (i = 0; i < bcpKeyLen; i++) {
710 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
713 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
714 ures_getByKey(rb, KEYMAP, rb, status);
715 if (U_FAILURE(*status)) {
720 mapData = ures_getNextResource(rb, NULL, status);
721 while (U_SUCCESS(*status)) {
722 const UChar *uBcpKey;
723 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
724 int32_t tmpBcpKeyLen;
726 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
727 if (U_FAILURE(*status)) {
730 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
731 tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
732 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
733 /* found a matching BCP47 key */
734 resKey = ures_getKey(mapData);
735 resultLen = (int32_t)uprv_strlen(resKey);
738 if (!ures_hasNext(rb)) {
741 ures_getNextResource(rb, mapData, status);
746 if (U_FAILURE(*status)) {
750 if (resKey == NULL) {
752 resultLen = bcpKeyLen;
755 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
756 return u_terminateChars(key, keyCapacity, resultLen, status);
760 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
761 const char* type, int32_t typeLen,
762 char* bcpType, int32_t bcpTypeCapacity,
763 UErrorCode *status) {
764 UResourceBundle *rb, *keyTypeData, *typeMapForKey;
765 char keyBuf[MAX_LDML_KEY_LEN];
766 char typeBuf[MAX_LDML_TYPE_LEN];
767 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
768 int32_t resultLen = 0;
770 UErrorCode tmpStatus = U_ZERO_ERROR;
771 const UChar *uBcpType, *uCanonicalType;
772 int32_t bcpTypeLen, canonicalTypeLen;
773 UBool isTimezone = FALSE;
776 keyLen = (int32_t)uprv_strlen(key);
778 if (keyLen >= sizeof(keyBuf)) {
779 /* no known valid LDML key exceeding 21 */
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
783 uprv_memcpy(keyBuf, key, keyLen);
787 for (i = 0; i < keyLen; i++) {
788 keyBuf[i] = uprv_tolower(keyBuf[i]);
790 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
795 typeLen = (int32_t)uprv_strlen(type);
797 if (typeLen >= sizeof(typeBuf)) {
798 *status = U_ILLEGAL_ARGUMENT_ERROR;
803 /* replace '/' with ':' */
804 for (i = 0; i < typeLen; i++) {
805 if (*(type + i) == '/') {
808 typeBuf[i] = *(type + i);
811 typeBuf[typeLen] = 0;
815 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
816 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
817 if (U_FAILURE(*status)) {
819 ures_close(keyTypeData);
823 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
824 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
825 if (U_SUCCESS(tmpStatus)) {
826 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
827 resultLen = bcpTypeLen;
828 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
829 /* is this type alias? */
830 tmpStatus = U_ZERO_ERROR;
831 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
832 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
833 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
834 if (U_SUCCESS(tmpStatus)) {
835 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
837 /* replace '/' with ':' */
838 for (i = 0; i < canonicalTypeLen; i++) {
839 if (typeBuf[i] == '/') {
844 typeBuf[canonicalTypeLen] = 0;
846 /* look up the canonical type */
847 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
848 if (U_SUCCESS(tmpStatus)) {
849 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
850 resultLen = bcpTypeLen;
853 if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
854 if (_isLDMLType(type, typeLen)) {
855 uprv_memcpy(bcpTypeBuf, type, typeLen);
858 /* mapping not availabe */
859 *status = U_ILLEGAL_ARGUMENT_ERROR;
866 ures_close(typeMapForKey);
867 ures_close(keyTypeData);
869 if (U_FAILURE(*status)) {
873 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
874 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
878 _bcp47ToLDMLType(const char* key, int32_t keyLen,
879 const char* bcpType, int32_t bcpTypeLen,
880 char* type, int32_t typeCapacity,
881 UErrorCode *status) {
883 char keyBuf[MAX_LDML_KEY_LEN];
884 char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
885 int32_t resultLen = 0;
887 const char *resType = NULL;
888 UResourceBundle *mapData;
889 UErrorCode tmpStatus = U_ZERO_ERROR;
893 keyLen = (int32_t)uprv_strlen(key);
896 if (keyLen >= sizeof(keyBuf)) {
897 /* no known valid LDML key exceeding 21 */
898 *status = U_ILLEGAL_ARGUMENT_ERROR;
901 uprv_memcpy(keyBuf, key, keyLen);
905 for (i = 0; i < keyLen; i++) {
906 keyBuf[i] = uprv_tolower(keyBuf[i]);
910 if (bcpTypeLen < 0) {
911 bcpTypeLen = (int32_t)uprv_strlen(bcpType);
915 for (i = 0; i < bcpTypeLen; i++) {
916 if (bcpType[i] == SEP) {
917 if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
918 *status = U_ILLEGAL_ARGUMENT_ERROR;
927 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
928 bcpTypeBuf[bcpTypeLen] = 0;
931 for (i = 0; i < bcpTypeLen; i++) {
932 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
935 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
936 ures_getByKey(rb, TYPEMAP, rb, status);
937 if (U_FAILURE(*status)) {
942 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
943 mapData = ures_getNextResource(rb, NULL, &tmpStatus);
944 while (U_SUCCESS(tmpStatus)) {
945 const UChar *uBcpType;
946 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
947 int32_t tmpBcpTypeLen;
949 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
950 if (U_FAILURE(tmpStatus)) {
953 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
954 tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
955 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
956 /* found a matching BCP47 type */
957 resType = ures_getKey(mapData);
958 resultLen = (int32_t)uprv_strlen(resType);
961 if (!ures_hasNext(rb)) {
964 ures_getNextResource(rb, mapData, &tmpStatus);
969 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
974 if (resType == NULL) {
975 resType = bcpTypeBuf;
976 resultLen = bcpTypeLen;
979 copyLen = uprv_min(resultLen, typeCapacity);
980 uprv_memcpy(type, resType, copyLen);
982 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
983 for (i = 0; i < copyLen; i++) {
984 if (*(type + i) == ':') {
990 return u_terminateChars(type, typeCapacity, resultLen, status);
994 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
995 char buf[ULOC_LANG_CAPACITY];
996 UErrorCode tmpStatus = U_ZERO_ERROR;
1000 if (U_FAILURE(*status)) {
1004 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1005 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1007 *status = U_ILLEGAL_ARGUMENT_ERROR;
1013 /* Note: returned language code is in lower case letters */
1016 if (reslen < capacity) {
1017 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1019 reslen += LANG_UND_LEN;
1020 } else if (!_isLanguageSubtag(buf, len)) {
1021 /* invalid language code */
1023 *status = U_ILLEGAL_ARGUMENT_ERROR;
1026 if (reslen < capacity) {
1027 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1029 reslen += LANG_UND_LEN;
1031 /* resolve deprecated */
1032 for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) {
1033 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1034 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1035 len = (int32_t)uprv_strlen(buf);
1039 if (reslen < capacity) {
1040 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1044 u_terminateChars(appendAt, capacity, reslen, status);
1049 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1050 char buf[ULOC_SCRIPT_CAPACITY];
1051 UErrorCode tmpStatus = U_ZERO_ERROR;
1055 if (U_FAILURE(*status)) {
1059 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1060 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1062 *status = U_ILLEGAL_ARGUMENT_ERROR;
1068 if (!_isScriptSubtag(buf, len)) {
1069 /* invalid script code */
1071 *status = U_ILLEGAL_ARGUMENT_ERROR;
1075 if (reslen < capacity) {
1076 *(appendAt + reslen) = SEP;
1080 if (reslen < capacity) {
1081 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1086 u_terminateChars(appendAt, capacity, reslen, status);
1091 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1092 char buf[ULOC_COUNTRY_CAPACITY];
1093 UErrorCode tmpStatus = U_ZERO_ERROR;
1097 if (U_FAILURE(*status)) {
1101 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1102 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1104 *status = U_ILLEGAL_ARGUMENT_ERROR;
1110 if (!_isRegionSubtag(buf, len)) {
1111 /* invalid region code */
1113 *status = U_ILLEGAL_ARGUMENT_ERROR;
1117 if (reslen < capacity) {
1118 *(appendAt + reslen) = SEP;
1122 if (reslen < capacity) {
1123 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1128 u_terminateChars(appendAt, capacity, reslen, status);
1133 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1134 char buf[ULOC_FULLNAME_CAPACITY];
1135 UErrorCode tmpStatus = U_ZERO_ERROR;
1139 if (U_FAILURE(*status)) {
1143 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1144 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1146 *status = U_ILLEGAL_ARGUMENT_ERROR;
1154 VariantListEntry *var;
1155 VariantListEntry *varFirst = NULL;
1160 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1164 *p = 0; /* terminate */
1168 *status = U_ILLEGAL_ARGUMENT_ERROR;
1171 /* ignore empty variant */
1173 /* ICU uses upper case letters for variants, but
1174 the canonical format is lowercase in BCP47 */
1175 for (i = 0; *(pVar + i) != 0; i++) {
1176 *(pVar + i) = uprv_tolower(*(pVar + i));
1180 if (_isVariantSubtag(pVar, -1)) {
1181 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
1182 /* emit the variant to the list */
1183 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1185 *status = U_MEMORY_ALLOCATION_ERROR;
1188 var->variant = pVar;
1189 if (!_addVariantToList(&varFirst, var)) {
1190 /* duplicated variant */
1193 *status = U_ILLEGAL_ARGUMENT_ERROR;
1198 /* Special handling for POSIX variant, need to remember that we had it and then */
1199 /* treat it like an extension later. */
1202 } else if (strict) {
1203 *status = U_ILLEGAL_ARGUMENT_ERROR;
1205 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1206 /* Handle private use subtags separately */
1210 /* reset variant starting position */
1212 } else if (pVar == NULL) {
1218 if (U_SUCCESS(*status)) {
1219 if (varFirst != NULL) {
1222 /* write out validated/normalized variants to the target */
1224 while (var != NULL) {
1225 if (reslen < capacity) {
1226 *(appendAt + reslen) = SEP;
1229 varLen = (int32_t)uprv_strlen(var->variant);
1230 if (reslen < capacity) {
1231 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1241 while (var != NULL) {
1242 VariantListEntry *tmpVar = var->next;
1247 if (U_FAILURE(*status)) {
1252 u_terminateChars(appendAt, capacity, reslen, status);
1257 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1258 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1259 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1260 int32_t attrBufLength = 0;
1261 UBool isAttribute = FALSE;
1262 UEnumeration *keywordEnum = NULL;
1265 keywordEnum = uloc_openKeywords(localeID, status);
1266 if (U_FAILURE(*status) && !hadPosix) {
1267 uenum_close(keywordEnum);
1270 if (keywordEnum != NULL || hadPosix) {
1271 /* reorder extensions */
1274 ExtensionListEntry *firstExt = NULL;
1275 ExtensionListEntry *ext;
1276 AttributeListEntry *firstAttr = NULL;
1277 AttributeListEntry *attr;
1279 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1280 char *pExtBuf = extBuf;
1281 int32_t extBufCapacity = sizeof(extBuf);
1282 const char *bcpKey, *bcpValue;
1283 UErrorCode tmpStatus = U_ZERO_ERROR;
1285 UBool isLDMLKeyword;
1288 isAttribute = FALSE;
1289 key = uenum_next(keywordEnum, NULL, status);
1293 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1294 if (U_FAILURE(tmpStatus)) {
1296 *status = U_ILLEGAL_ARGUMENT_ERROR;
1299 /* ignore this keyword */
1300 tmpStatus = U_ZERO_ERROR;
1304 keylen = (int32_t)uprv_strlen(key);
1305 isLDMLKeyword = (keylen > 1);
1307 /* special keyword used for representing Unicode locale attributes */
1308 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1314 for (; i < len; i++) {
1315 if (buf[i] != '-') {
1316 attrBuf[attrBufLength++] = buf[i];
1322 if (attrBufLength > 0) {
1323 attrBuf[attrBufLength] = 0;
1325 } else if (i >= len){
1329 /* create AttributeListEntry */
1330 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1332 *status = U_MEMORY_ALLOCATION_ERROR;
1335 attrValue = (char*)uprv_malloc(attrBufLength + 1);
1336 if (attrValue == NULL) {
1337 *status = U_MEMORY_ALLOCATION_ERROR;
1340 uprv_strcpy(attrValue, attrBuf);
1341 attr->attribute = attrValue;
1343 if (!_addAttributeToList(&firstAttr, attr)) {
1345 uprv_free(attrValue);
1347 *status = U_ILLEGAL_ARGUMENT_ERROR;
1353 } else if (isLDMLKeyword) {
1356 /* transform key and value to bcp47 style */
1357 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1358 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1360 *status = U_ILLEGAL_ARGUMENT_ERROR;
1363 tmpStatus = U_ZERO_ERROR;
1368 pExtBuf += (modKeyLen + 1);
1369 extBufCapacity -= (modKeyLen + 1);
1371 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1372 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1374 *status = U_ILLEGAL_ARGUMENT_ERROR;
1377 tmpStatus = U_ZERO_ERROR;
1381 pExtBuf += (len + 1);
1382 extBufCapacity -= (len + 1);
1384 if (*key == PRIVATEUSE) {
1385 if (!_isPrivateuseValueSubtags(buf, len)) {
1387 *status = U_ILLEGAL_ARGUMENT_ERROR;
1393 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1395 *status = U_ILLEGAL_ARGUMENT_ERROR;
1402 if ((len + 1) < extBufCapacity) {
1403 uprv_memcpy(pExtBuf, buf, len);
1411 extBufCapacity -= (len + 1);
1413 *status = U_ILLEGAL_ARGUMENT_ERROR;
1419 /* create ExtensionListEntry */
1420 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1422 *status = U_MEMORY_ALLOCATION_ERROR;
1426 ext->value = bcpValue;
1428 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1431 *status = U_ILLEGAL_ARGUMENT_ERROR;
1438 /* Special handling for POSIX variant - add the keywords for POSIX */
1440 /* create ExtensionListEntry for POSIX */
1441 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1443 *status = U_MEMORY_ALLOCATION_ERROR;
1446 ext->key = POSIX_KEY;
1447 ext->value = POSIX_VALUE;
1449 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1454 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1455 UBool startLDMLExtension = FALSE;
1460 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1461 /* write LDML singleton extension */
1462 if (reslen < capacity) {
1463 *(appendAt + reslen) = SEP;
1466 if (reslen < capacity) {
1467 *(appendAt + reslen) = LDMLEXT;
1471 startLDMLExtension = TRUE;
1474 /* write out the sorted BCP47 attributes, extensions and private use */
1475 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1476 if (reslen < capacity) {
1477 *(appendAt + reslen) = SEP;
1480 len = (int32_t)uprv_strlen(ext->key);
1481 if (reslen < capacity) {
1482 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1485 if (reslen < capacity) {
1486 *(appendAt + reslen) = SEP;
1489 len = (int32_t)uprv_strlen(ext->value);
1490 if (reslen < capacity) {
1491 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1497 /* write the value for the attributes */
1498 if (reslen < capacity) {
1499 *(appendAt + reslen) = SEP;
1502 len = (int32_t)uprv_strlen(attr->attribute);
1503 if (reslen < capacity) {
1504 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1510 } while (attr != NULL || ext != NULL);
1515 while (ext != NULL) {
1516 ExtensionListEntry *tmpExt = ext->next;
1522 while (attr != NULL) {
1523 AttributeListEntry *tmpAttr = attr->next;
1524 char *pValue = (char *)attr->attribute;
1530 uenum_close(keywordEnum);
1532 if (U_FAILURE(*status)) {
1537 return u_terminateChars(appendAt, capacity, reslen, status);
1541 * Append keywords parsed from LDML extension value
1542 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1543 * Note: char* buf is used for storing keywords
1546 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1547 const char *pTag; /* beginning of current subtag */
1548 const char *pKwds; /* beginning of key-type pairs */
1549 UBool variantExists = *posixVariant;
1551 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1552 ExtensionListEntry *kwd, *nextKwd;
1554 AttributeListEntry *attrFirst = NULL; /* first attribute */
1555 AttributeListEntry *attr, *nextAttr;
1560 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561 int32_t attrBufIdx = 0;
1563 /* Reset the posixVariant value */
1564 *posixVariant = FALSE;
1569 /* Iterate through u extension attributes */
1571 /* locate next separator char */
1572 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1574 if (_isLDMLKey(pTag, len)) {
1579 /* add this attribute to the list */
1580 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1582 *status = U_MEMORY_ALLOCATION_ERROR;
1586 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1587 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1588 attrBuf[attrBufIdx + len] = 0;
1589 attr->attribute = &attrBuf[attrBufIdx];
1590 attrBufIdx += (len + 1);
1592 *status = U_ILLEGAL_ARGUMENT_ERROR;
1596 if (!_addAttributeToList(&attrFirst, attr)) {
1597 *status = U_ILLEGAL_ARGUMENT_ERROR;
1605 /* next to the separator */
1611 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1613 if (attrBufIdx > bufSize) {
1614 /* attrBufIdx == <total length of attribute subtag> + 1 */
1615 *status = U_ILLEGAL_ARGUMENT_ERROR;
1619 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1621 *status = U_MEMORY_ALLOCATION_ERROR;
1625 kwd->key = LOCALE_ATTRIBUTE_KEY;
1628 /* attribute subtags sorted in alphabetical order as type */
1630 while (attr != NULL) {
1631 nextAttr = attr->next;
1633 /* buffer size check is done above */
1634 if (attr != attrFirst) {
1635 *(buf + bufIdx) = SEP;
1639 len = uprv_strlen(attr->attribute);
1640 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1645 *(buf + bufIdx) = 0;
1648 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1649 *status = U_ILLEGAL_ARGUMENT_ERROR;
1654 /* once keyword entry is created, delete the attribute list */
1656 while (attr != NULL) {
1657 nextAttr = attr->next;
1665 const char *pBcpKey = NULL; /* u extenstion key subtag */
1666 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1667 int32_t bcpKeyLen = 0;
1668 int32_t bcpTypeLen = 0;
1669 UBool isDone = FALSE;
1672 /* BCP47 representation of LDML key/type pairs */
1674 const char *pNextBcpKey = NULL;
1675 int32_t nextBcpKeyLen;
1676 UBool emitKeyword = FALSE;
1679 /* locate next separator char */
1680 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1682 if (_isLDMLKey(pTag, len)) {
1686 nextBcpKeyLen = len;
1692 U_ASSERT(pBcpKey != NULL);
1693 /* within LDML type subtags */
1695 bcpTypeLen += (len + 1);
1705 /* next to the separator */
1709 /* processing last one */
1715 const char *pKey = NULL; /* LDML key */
1716 const char *pType = NULL; /* LDML type */
1718 U_ASSERT(pBcpKey != NULL);
1720 /* u extension key to LDML key */
1721 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1722 if (U_FAILURE(*status)) {
1725 pKey = buf + bufIdx;
1727 *(buf + bufIdx) = 0;
1731 /* BCP type to locale type */
1732 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1733 if (U_FAILURE(*status)) {
1736 pType = buf + bufIdx;
1738 *(buf + bufIdx) = 0;
1741 /* typeless - default type value is "yes" */
1742 pType = LOCALE_TYPE_YES;
1745 /* Special handling for u-va-posix, since we want to treat this as a variant,
1747 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1748 *posixVariant = TRUE;
1750 /* create an ExtensionListEntry for this keyword */
1751 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1753 *status = U_MEMORY_ALLOCATION_ERROR;
1760 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1761 *status = U_ILLEGAL_ARGUMENT_ERROR;
1767 pBcpKey = pNextBcpKey;
1768 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1776 while (kwd != NULL) {
1777 nextKwd = kwd->next;
1778 _addExtensionToList(appendTo, kwd, FALSE);
1786 while (attr != NULL) {
1787 nextAttr = attr->next;
1793 while (kwd != NULL) {
1794 nextKwd = kwd->next;
1802 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1806 ExtensionListEntry *kwdFirst = NULL;
1807 ExtensionListEntry *kwd;
1808 const char *key, *type;
1809 char *kwdBuf = NULL;
1810 int32_t kwdBufLength = capacity;
1811 UBool posixVariant = FALSE;
1813 if (U_FAILURE(*status)) {
1817 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1818 if (kwdBuf == NULL) {
1819 *status = U_MEMORY_ALLOCATION_ERROR;
1823 /* Determine if variants already exists */
1824 if (ultag_getVariantsSize(langtag)) {
1825 posixVariant = TRUE;
1828 n = ultag_getExtensionsSize(langtag);
1830 /* resolve locale keywords and reordering keys */
1831 for (i = 0; i < n; i++) {
1832 key = ultag_getExtensionKey(langtag, i);
1833 type = ultag_getExtensionValue(langtag, i);
1834 if (*key == LDMLEXT) {
1835 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1836 if (U_FAILURE(*status)) {
1840 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1842 *status = U_MEMORY_ALLOCATION_ERROR;
1847 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1849 *status = U_ILLEGAL_ARGUMENT_ERROR;
1855 if (U_SUCCESS(*status)) {
1856 type = ultag_getPrivateUse(langtag);
1857 if ((int32_t)uprv_strlen(type) > 0) {
1858 /* add private use as a keyword */
1859 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1861 *status = U_MEMORY_ALLOCATION_ERROR;
1863 kwd->key = PRIVATEUSE_KEY;
1865 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1867 *status = U_ILLEGAL_ARGUMENT_ERROR;
1873 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1875 if (U_SUCCESS(*status) && posixVariant) {
1876 len = (int32_t) uprv_strlen(_POSIX);
1877 if (reslen < capacity) {
1878 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1883 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1884 /* write out the sorted keywords */
1885 UBool firstValue = TRUE;
1888 if (reslen < capacity) {
1891 *(appendAt + reslen) = LOCALE_EXT_SEP;
1895 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1901 len = (int32_t)uprv_strlen(kwd->key);
1902 if (reslen < capacity) {
1903 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1908 if (reslen < capacity) {
1909 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1914 len = (int32_t)uprv_strlen(kwd->value);
1915 if (reslen < capacity) {
1916 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1926 while (kwd != NULL) {
1927 ExtensionListEntry *tmpKwd = kwd->next;
1934 if (U_FAILURE(*status)) {
1938 return u_terminateChars(appendAt, capacity, reslen, status);
1942 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1943 char buf[ULOC_FULLNAME_CAPACITY];
1944 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1945 UErrorCode tmpStatus = U_ZERO_ERROR;
1949 if (U_FAILURE(*status)) {
1953 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1954 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1956 *status = U_ILLEGAL_ARGUMENT_ERROR;
1964 UBool firstValue = TRUE;
1971 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1975 *p = 0; /* terminate */
1977 if (pPriv != NULL) {
1978 /* Private use in the canonical format is lowercase in BCP47 */
1979 for (i = 0; *(pPriv + i) != 0; i++) {
1980 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1984 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1986 if (!_isVariantSubtag(pPriv, -1)) {
1992 } else if (strict) {
1993 *status = U_ILLEGAL_ARGUMENT_ERROR;
2000 if (reslen < capacity) {
2001 tmpAppend[reslen++] = SEP;
2005 if (reslen < capacity) {
2006 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
2009 if (reslen < capacity) {
2010 tmpAppend[reslen++] = SEP;
2013 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
2014 if (reslen < capacity) {
2015 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
2019 if (reslen < capacity) {
2020 tmpAppend[reslen++] = SEP;
2026 len = (int32_t)uprv_strlen(pPriv);
2027 if (reslen < capacity) {
2028 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
2033 /* reset private use starting position */
2035 } else if (pPriv == NULL) {
2041 if (U_FAILURE(*status)) {
2046 if (U_SUCCESS(*status)) {
2048 if (reslen < capacity) {
2049 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2053 u_terminateChars(appendAt, capacity, reslen, status);
2059 * -------------------------------------------------
2063 * -------------------------------------------------
2066 /* Bit flags used by the parser */
2076 static ULanguageTag*
2077 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2081 char *pSubtag, *pNext, *pLastGoodPosition;
2084 ExtensionListEntry *pExtension;
2085 char *pExtValueSubtag, *pExtValueSubtagEnd;
2087 UBool privateuseVar = FALSE;
2088 int32_t grandfatheredLen = 0;
2090 if (parsedLen != NULL) {
2094 if (U_FAILURE(*status)) {
2099 tagLen = (int32_t)uprv_strlen(tag);
2102 /* copy the entire string */
2103 tagBuf = (char*)uprv_malloc(tagLen + 1);
2104 if (tagBuf == NULL) {
2105 *status = U_MEMORY_ALLOCATION_ERROR;
2108 uprv_memcpy(tagBuf, tag, tagLen);
2109 *(tagBuf + tagLen) = 0;
2111 /* create a ULanguageTag */
2112 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2115 *status = U_MEMORY_ALLOCATION_ERROR;
2118 _initializeULanguageTag(t);
2121 if (tagLen < MINLEN) {
2122 /* the input tag is too short - return empty ULanguageTag */
2126 /* check if the tag is grandfathered */
2127 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
2128 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2129 int32_t newTagLength;
2131 grandfatheredLen = tagLen; /* back up for output parsedLen */
2132 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
2133 if (tagLen < newTagLength) {
2135 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2136 if (tagBuf == NULL) {
2137 *status = U_MEMORY_ALLOCATION_ERROR;
2141 tagLen = newTagLength;
2143 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2149 * langtag = language
2158 pNext = pLastGoodPosition = tagBuf;
2161 pExtValueSubtag = NULL;
2162 pExtValueSubtagEnd = NULL;
2169 /* locate next separator char */
2183 subtagLen = (int32_t)(pSep - pSubtag);
2186 if (_isLanguageSubtag(pSubtag, subtagLen)) {
2187 *pSep = 0; /* terminate */
2188 t->language = T_CString_toLowerCase(pSubtag);
2190 pLastGoodPosition = pSep;
2191 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2196 if (_isExtlangSubtag(pSubtag, subtagLen)) {
2198 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2200 pLastGoodPosition = pSep;
2201 if (extlangIdx < 3) {
2202 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2204 next = SCRT | REGN | VART | EXTS | PRIV;
2210 if (_isScriptSubtag(pSubtag, subtagLen)) {
2216 *p = uprv_toupper(*p);
2219 *p = uprv_tolower(*p);
2222 t->script = pSubtag;
2224 pLastGoodPosition = pSep;
2225 next = REGN | VART | EXTS | PRIV;
2230 if (_isRegionSubtag(pSubtag, subtagLen)) {
2232 t->region = T_CString_toUpperCase(pSubtag);
2234 pLastGoodPosition = pSep;
2235 next = VART | EXTS | PRIV;
2240 if (_isVariantSubtag(pSubtag, subtagLen) ||
2241 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2242 VariantListEntry *var;
2245 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2247 *status = U_MEMORY_ALLOCATION_ERROR;
2251 var->variant = T_CString_toUpperCase(pSubtag);
2252 isAdded = _addVariantToList(&(t->variants), var);
2254 /* duplicated variant entry */
2258 pLastGoodPosition = pSep;
2259 next = VART | EXTS | PRIV;
2264 if (_isExtensionSingleton(pSubtag, subtagLen)) {
2265 if (pExtension != NULL) {
2266 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2267 /* the previous extension is incomplete */
2268 uprv_free(pExtension);
2273 /* terminate the previous extension value */
2274 *pExtValueSubtagEnd = 0;
2275 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2277 /* insert the extension to the list */
2278 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2279 pLastGoodPosition = pExtValueSubtagEnd;
2281 /* stop parsing here */
2282 uprv_free(pExtension);
2288 /* create a new extension */
2289 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2290 if (pExtension == NULL) {
2291 *status = U_MEMORY_ALLOCATION_ERROR;
2295 pExtension->key = T_CString_toLowerCase(pSubtag);
2296 pExtension->value = NULL; /* will be set later */
2299 * reset the start and the end location of extension value
2300 * subtags for this extension
2302 pExtValueSubtag = NULL;
2303 pExtValueSubtagEnd = NULL;
2310 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2311 if (pExtValueSubtag == NULL) {
2312 /* if the start postion of this extension's value is not yet,
2313 this one is the first value subtag */
2314 pExtValueSubtag = pSubtag;
2317 /* Mark the end of this subtag */
2318 pExtValueSubtagEnd = pSep;
2319 next = EXTS | EXTV | PRIV;
2325 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2328 if (pExtension != NULL) {
2329 /* Process the last extension */
2330 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2331 /* the previous extension is incomplete */
2332 uprv_free(pExtension);
2336 /* terminate the previous extension value */
2337 *pExtValueSubtagEnd = 0;
2338 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2340 /* insert the extension to the list */
2341 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2342 pLastGoodPosition = pExtValueSubtagEnd;
2345 /* stop parsing here */
2346 uprv_free(pExtension);
2353 /* The rest of part will be private use value subtags */
2354 if (pNext == NULL) {
2355 /* empty private use subtag */
2358 /* back up the private use value start position */
2359 pPrivuseVal = pNext;
2361 /* validate private use value subtags */
2377 subtagLen = (int32_t)(pSep - pSubtag);
2379 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2382 privateuseVar = TRUE;
2384 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2385 pLastGoodPosition = pSep;
2395 if (pLastGoodPosition - pPrivuseVal > 0) {
2396 *pLastGoodPosition = 0;
2397 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2399 /* No more subtags, exiting the parse loop */
2405 /* If we fell through here, it means this subtag is illegal - quit parsing */
2409 if (pExtension != NULL) {
2410 /* Process the last extension */
2411 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2412 /* the previous extension is incomplete */
2413 uprv_free(pExtension);
2415 /* terminate the previous extension value */
2416 *pExtValueSubtagEnd = 0;
2417 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2418 /* insert the extension to the list */
2419 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2420 pLastGoodPosition = pExtValueSubtagEnd;
2422 uprv_free(pExtension);
2427 if (parsedLen != NULL) {
2428 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2439 ultag_close(ULanguageTag* langtag) {
2441 if (langtag == NULL) {
2445 uprv_free(langtag->buf);
2447 if (langtag->variants) {
2448 VariantListEntry *curVar = langtag->variants;
2450 VariantListEntry *nextVar = curVar->next;
2456 if (langtag->extensions) {
2457 ExtensionListEntry *curExt = langtag->extensions;
2459 ExtensionListEntry *nextExt = curExt->next;
2469 ultag_getLanguage(const ULanguageTag* langtag) {
2470 return langtag->language;
2475 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2477 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2478 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2479 return DEPRECATEDLANGS[i + 1];
2482 return langtag->language;
2487 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2488 if (idx >= 0 && idx < MAXEXTLANG) {
2489 return langtag->extlang[idx];
2495 ultag_getExtlangSize(const ULanguageTag* langtag) {
2498 for (i = 0; i < MAXEXTLANG; i++) {
2499 if (langtag->extlang[i]) {
2507 ultag_getScript(const ULanguageTag* langtag) {
2508 return langtag->script;
2512 ultag_getRegion(const ULanguageTag* langtag) {
2513 return langtag->region;
2517 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2518 const char *var = NULL;
2519 VariantListEntry *cur = langtag->variants;
2533 ultag_getVariantsSize(const ULanguageTag* langtag) {
2535 VariantListEntry *cur = langtag->variants;
2547 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2548 const char *key = NULL;
2549 ExtensionListEntry *cur = langtag->extensions;
2563 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2564 const char *val = NULL;
2565 ExtensionListEntry *cur = langtag->extensions;
2579 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2581 ExtensionListEntry *cur = langtag->extensions;
2593 ultag_getPrivateUse(const ULanguageTag* langtag) {
2594 return langtag->privateuse;
2599 ultag_getGrandfathered(const ULanguageTag* langtag) {
2600 return langtag->grandfathered;
2606 * -------------------------------------------------
2608 * Locale/BCP47 conversion APIs, exposed as uloc_*
2610 * -------------------------------------------------
2612 U_CAPI int32_t U_EXPORT2
2613 uloc_toLanguageTag(const char* localeID,
2615 int32_t langtagCapacity,
2617 UErrorCode* status) {
2618 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2619 char canonical[256];
2621 UErrorCode tmpStatus = U_ZERO_ERROR;
2622 UBool hadPosix = FALSE;
2623 const char* pKeywordStart;
2625 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2627 if (uprv_strlen(localeID) > 0) {
2628 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2629 if (tmpStatus != U_ZERO_ERROR) {
2630 *status = U_ILLEGAL_ARGUMENT_ERROR;
2635 /* For handling special case - private use only tag */
2636 pKeywordStart = locale_getKeywordsStart(canonical);
2637 if (pKeywordStart == canonical) {
2638 UEnumeration *kwdEnum;
2642 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2643 if (kwdEnum != NULL) {
2644 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2649 key = uenum_next(kwdEnum, &len, &tmpStatus);
2650 if (len == 1 && *key == PRIVATEUSE) {
2651 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2652 buf[0] = PRIVATEUSE;
2654 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2655 if (U_SUCCESS(tmpStatus)) {
2656 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2657 /* return private use only tag */
2659 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2660 u_terminateChars(langtag, langtagCapacity, reslen, status);
2662 } else if (strict) {
2663 *status = U_ILLEGAL_ARGUMENT_ERROR;
2666 /* if not strict mode, then "und" will be returned */
2668 *status = U_ILLEGAL_ARGUMENT_ERROR;
2673 uenum_close(kwdEnum);
2680 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2681 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2682 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2683 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2684 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2685 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2691 U_CAPI int32_t U_EXPORT2
2692 uloc_forLanguageTag(const char* langtag,
2694 int32_t localeIDCapacity,
2695 int32_t* parsedLength,
2696 UErrorCode* status) {
2699 const char *subtag, *p;
2702 UBool noRegion = TRUE;
2704 lt = ultag_parse(langtag, -1, parsedLength, status);
2705 if (U_FAILURE(*status)) {
2710 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2711 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2712 len = (int32_t)uprv_strlen(subtag);
2714 if (reslen < localeIDCapacity) {
2715 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2722 subtag = ultag_getScript(lt);
2723 len = (int32_t)uprv_strlen(subtag);
2725 if (reslen < localeIDCapacity) {
2726 *(localeID + reslen) = LOCALE_SEP;
2730 /* write out the script in title case */
2733 if (reslen < localeIDCapacity) {
2735 *(localeID + reslen) = uprv_toupper(*p);
2737 *(localeID + reslen) = *p;
2746 subtag = ultag_getRegion(lt);
2747 len = (int32_t)uprv_strlen(subtag);
2749 if (reslen < localeIDCapacity) {
2750 *(localeID + reslen) = LOCALE_SEP;
2753 /* write out the retion in upper case */
2756 if (reslen < localeIDCapacity) {
2757 *(localeID + reslen) = uprv_toupper(*p);
2766 n = ultag_getVariantsSize(lt);
2769 if (reslen < localeIDCapacity) {
2770 *(localeID + reslen) = LOCALE_SEP;
2775 for (i = 0; i < n; i++) {
2776 subtag = ultag_getVariant(lt, i);
2777 if (reslen < localeIDCapacity) {
2778 *(localeID + reslen) = LOCALE_SEP;
2781 /* write out the variant in upper case */
2784 if (reslen < localeIDCapacity) {
2785 *(localeID + reslen) = uprv_toupper(*p);
2794 n = ultag_getExtensionsSize(lt);
2795 subtag = ultag_getPrivateUse(lt);
2796 if (n > 0 || uprv_strlen(subtag) > 0) {
2797 if (reslen == 0 && n > 0) {
2798 /* need a language */
2799 if (reslen < localeIDCapacity) {
2800 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2802 reslen += LANG_UND_LEN;
2804 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2809 return u_terminateChars(localeID, localeIDCapacity, reslen, status);