2 **********************************************************************
3 * Copyright (C) 1997-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
47 #include <stdio.h> /* for sprintf */
49 /* ### Declarations **************************************************/
51 /* Locale stuff from locid.cpp */
52 U_CFUNC void locale_set_default(const char *id);
53 U_CFUNC const char *locale_get_default(void);
55 locale_getKeywords(const char *localeID,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
62 /* ### Data tables **************************************************/
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
93 * The range qaa-qtz is reserved for local use
95 static const char * const LANGUAGES[] = {
96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
97 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
99 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
101 "bai", "bal", "ban", "bas", "bat", "be", "bej",
102 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
105 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
108 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
109 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
110 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
111 "enm", "eo", "es", "et", "eu", "ewo", "fa",
112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
113 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
114 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
116 "grc", "gsw", "gu", "gv", "gwi",
117 "ha", "hai", "haw", "he", "hi", "hil", "him",
118 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
121 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
123 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
129 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",
130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
131 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
134 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
140 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
141 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
142 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
146 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
150 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
151 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
152 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
153 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
154 "zu", "zun", "zxx", "zza",
156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
159 static const char* const DEPRECATED_LANGUAGES[]={
160 "in", "iw", "ji", "jw", NULL, NULL
162 static const char* const REPLACEMENT_LANGUAGES[]={
163 "id", "he", "yi", "jv", NULL, NULL
167 * Table of 3-letter language codes.
169 * This is a lookup table used to convert 3-letter language codes to
170 * their 2-letter equivalent, where possible. It must be kept in sync
171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
172 * same language as LANGUAGES_3[i]. The commented-out lines are
173 * copied from LANGUAGES to make eyeballing this baby easier.
175 * Where a 3-letter language code has no 2-letter equivalent, the
176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
178 * This table should be terminated with a NULL entry, followed by a
179 * second list, and another NULL entry. The two lists correspond to
180 * the two lists in LANGUAGES.
182 static const char * const LANGUAGES_3[] = {
183 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
185 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
186 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
187 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
189 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
191 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
192 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
193 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
194 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
195 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
197 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
199 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
200 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
201 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
203 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
205 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
206 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
207 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
208 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
209 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
210 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
211 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
212 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
213 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
215 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
217 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
219 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
221 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
222 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
223 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
224 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
225 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
227 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
229 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
230 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
231 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
233 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
234 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
235 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
237 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
239 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
241 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
243 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
245 /* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */
246 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
247 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
249 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
250 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
251 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
253 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
255 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
256 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
257 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
259 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
261 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
263 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
265 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
267 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
268 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
269 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
270 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
271 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
272 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
273 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
275 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
277 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
279 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
280 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
281 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
283 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */
284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
285 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
287 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
288 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
289 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
290 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
291 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
292 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
293 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
294 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
295 /* "zu", "zun", "zxx", "zza", */
296 "zul", "zun", "zxx", "zza",
298 /* "in", "iw", "ji", "jw", "sh", */
299 "ind", "heb", "yid", "jaw", "srp",
304 * Table of 2-letter country codes.
306 * This list must be in sorted order. This list is returned directly
307 * to the user by some API.
309 * This list must be kept in sync with COUNTRIES_3, with corresponding
312 * This table should be terminated with a NULL entry, followed by a
313 * second list, and another NULL entry. The first list is visible to
314 * user code when this array is returned by API. The second list
315 * contains codes we support, but do not expose through user API.
319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
321 * new codes keeping the old ones for compatibility updated to include
322 * 1999/12/03 revisions *CWB*
324 * RO(ROM) is now RO(ROU) according to
325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
327 static const char * const COUNTRIES[] = {
328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
357 "WS", "YE", "YT", "ZA", "ZM", "ZW",
359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
363 static const char* const DEPRECATED_COUNTRIES[] ={
364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
366 static const char* const REPLACEMENT_COUNTRIES[] = {
367 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */
372 * Table of 3-letter country codes.
374 * This is a lookup table used to convert 3-letter country codes to
375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
376 * For all valid i, COUNTRIES[i] must refer to the same country as
377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
378 * to make eyeballing this baby easier.
380 * This table should be terminated with a NULL entry, followed by a
381 * second list, and another NULL entry. The two lists correspond to
382 * the two lists in COUNTRIES.
384 static const char * const COUNTRIES_3[] = {
385 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
387 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
389 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
391 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
393 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
395 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
397 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
399 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
401 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
403 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
405 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
407 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
409 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
411 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
413 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
415 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
417 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
419 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
421 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
423 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
425 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
427 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
429 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
431 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
433 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
435 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
437 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
439 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
441 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
443 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
446 /* "FX", "CS", "RO", "TP", "YU", "ZR", */
447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
451 typedef struct CanonicalizationMap {
452 const char *id; /* input ID */
453 const char *canonicalID; /* canonicalized output ID */
454 const char *keyword; /* keyword, or NULL if none */
455 const char *value; /* keyword value, or NULL if kw==NULL */
456 } CanonicalizationMap;
459 * A map to canonicalize locale IDs. This handles a variety of
460 * different semantic kinds of transformations.
462 static const CanonicalizationMap CANONICALIZE_MAP[] = {
463 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
464 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
465 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
466 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
468 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
470 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
471 { "de_1901", "de__1901", NULL, NULL }, /* registered name */
472 { "de_1906", "de__1906", NULL, NULL }, /* registered name */
473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
478 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
479 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
480 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
481 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
482 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
483 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
484 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
485 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
486 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
487 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
488 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
489 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
490 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
491 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
492 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
493 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
494 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
495 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
496 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
497 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
498 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
508 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
510 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
511 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
512 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
513 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
514 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
515 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
518 typedef struct VariantMap {
519 const char *variant; /* input ID */
520 const char *keyword; /* keyword, or NULL if none */
521 const char *value; /* keyword value, or NULL if kw==NULL */
524 static const VariantMap VARIANT_MAP[] = {
525 { "EURO", "currency", "EUR" },
526 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
527 { "STROKE", "collation", "stroke" } /* Solaris variant */
530 /* ### BCP47 Conversion *******************************************/
531 /* Test if the locale id has BCP47 u extension and does not have '@' */
532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
534 #define _ConvertBCP47(finalID, id, buffer, length,err) \
535 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
540 /* Gets the size of the shortest subtag in the given localeID. */
541 static int32_t getShortestSubtagLength(const char *localeID) {
542 int32_t localeIDLength = uprv_strlen(localeID);
543 int32_t length = localeIDLength;
544 int32_t tmpLength = 0;
548 for (i = 0; i < localeIDLength; i++) {
549 if (localeID[i] != '_' && localeID[i] != '-') {
556 if (tmpLength != 0 && tmpLength < length) {
566 /* ### Keywords **************************************************/
568 #define ULOC_KEYWORD_BUFFER_LEN 25
569 #define ULOC_MAX_NO_KEYWORDS 25
571 U_CAPI const char * U_EXPORT2
572 locale_getKeywordsStart(const char *localeID) {
573 const char *result = NULL;
574 if((result = uprv_strchr(localeID, '@')) != NULL) {
577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
579 /* We do this because the @ sign is variant, and the @ sign used on one
580 EBCDIC machine won't be compiled the same way on other EBCDIC based
582 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
583 const uint8_t *charToFind = ebcdicSigns;
585 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
596 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
597 * @param keywordName incoming name to be canonicalized
598 * @param status return status (keyword too long)
599 * @return length of the keyword name
601 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
604 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
606 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
607 /* keyword name too long for internal buffer */
608 *status = U_INTERNAL_PROGRAM_ERROR;
612 /* normalize the keyword name */
613 for(i = 0; i < keywordNameLen; i++) {
614 buf[i] = uprv_tolower(keywordName[i]);
618 return keywordNameLen;
622 char keyword[ULOC_KEYWORD_BUFFER_LEN];
624 const char *valueStart;
628 static int32_t U_CALLCONV
629 compareKeywordStructs(const void *context, const void *left, const void *right) {
630 const char* leftString = ((const KeywordStruct *)left)->keyword;
631 const char* rightString = ((const KeywordStruct *)right)->keyword;
632 return uprv_strcmp(leftString, rightString);
636 * Both addKeyword and addValue must already be in canonical form.
637 * Either both addKeyword and addValue are NULL, or neither is NULL.
638 * If they are not NULL they must be zero terminated.
639 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
642 _getKeywords(const char *localeID,
644 char *keywords, int32_t keywordCapacity,
645 char *values, int32_t valuesCapacity, int32_t *valLen,
647 const char* addKeyword,
648 const char* addValue,
651 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
653 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
654 int32_t numKeywords = 0;
655 const char* pos = localeID;
656 const char* equalSign = NULL;
657 const char* semicolon = NULL;
659 int32_t keywordsLen = 0;
660 int32_t valuesLen = 0;
662 if(prev == '@') { /* start of keyword definition */
663 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
665 UBool duplicate = FALSE;
666 /* skip leading spaces */
670 if (!*pos) { /* handle trailing "; " */
673 if(numKeywords == maxKeywords) {
674 *status = U_INTERNAL_PROGRAM_ERROR;
677 equalSign = uprv_strchr(pos, '=');
678 semicolon = uprv_strchr(pos, ';');
679 /* lack of '=' [foo@currency] is illegal */
680 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
681 if(!equalSign || (semicolon && semicolon<equalSign)) {
682 *status = U_INVALID_FORMAT_ERROR;
685 /* need to normalize both keyword and keyword name */
686 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
687 /* keyword name too long for internal buffer */
688 *status = U_INTERNAL_PROGRAM_ERROR;
691 for(i = 0, n = 0; i < equalSign - pos; ++i) {
693 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
697 /* zero-length keyword is an error. */
699 *status = U_INVALID_FORMAT_ERROR;
703 keywordList[numKeywords].keyword[n] = 0;
704 keywordList[numKeywords].keywordLen = n;
705 /* now grab the value part. First we skip the '=' */
707 /* then we leading spaces */
708 while(*equalSign == ' ') {
712 /* Premature end or zero-length value */
713 if (!equalSign || equalSign == semicolon) {
714 *status = U_INVALID_FORMAT_ERROR;
718 keywordList[numKeywords].valueStart = equalSign;
723 while(*(pos - i - 1) == ' ') {
726 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
729 i = (int32_t)uprv_strlen(equalSign);
730 while(i && equalSign[i-1] == ' ') {
733 keywordList[numKeywords].valueLen = i;
735 /* If this is a duplicate keyword, then ignore it */
736 for (j=0; j<numKeywords; ++j) {
737 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
747 /* Handle addKeyword/addValue. */
748 if (addKeyword != NULL) {
749 UBool duplicate = FALSE;
750 U_ASSERT(addValue != NULL);
751 /* Search for duplicate; if found, do nothing. Explicit keyword
752 overrides addKeyword. */
753 for (j=0; j<numKeywords; ++j) {
754 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
760 if (numKeywords == maxKeywords) {
761 *status = U_INTERNAL_PROGRAM_ERROR;
764 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
765 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
766 keywordList[numKeywords].valueStart = addValue;
767 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
771 U_ASSERT(addValue == NULL);
774 /* now we have a list of keywords */
775 /* we need to sort it */
776 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
778 /* Now construct the keyword part */
779 for(i = 0; i < numKeywords; i++) {
780 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
781 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
783 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
785 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
788 keywordsLen += keywordList[i].keywordLen + 1;
790 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
791 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
793 keywordsLen += keywordList[i].valueLen;
795 if(i < numKeywords - 1) {
796 if(keywordsLen < keywordCapacity) {
797 keywords[keywordsLen] = ';';
803 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
804 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
805 values[valuesLen + keywordList[i].valueLen] = 0;
807 valuesLen += keywordList[i].valueLen + 1;
811 values[valuesLen] = 0;
816 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
823 locale_getKeywords(const char *localeID,
825 char *keywords, int32_t keywordCapacity,
826 char *values, int32_t valuesCapacity, int32_t *valLen,
828 UErrorCode *status) {
829 return _getKeywords(localeID, prev, keywords, keywordCapacity,
830 values, valuesCapacity, valLen, valuesToo,
834 U_CAPI int32_t U_EXPORT2
835 uloc_getKeywordValue(const char* localeID,
836 const char* keywordName,
837 char* buffer, int32_t bufferCapacity,
840 const char* startSearchHere = NULL;
841 const char* nextSeparator = NULL;
842 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
843 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
847 if(status && U_SUCCESS(*status) && localeID) {
848 char tempBuffer[ULOC_FULLNAME_CAPACITY];
849 const char* tmpLocaleID;
851 if (_hasBCP47Extension(localeID)) {
852 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
854 tmpLocaleID=localeID;
857 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
858 if(startSearchHere == NULL) {
859 /* no keywords, return at once */
863 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
864 if(U_FAILURE(*status)) {
868 /* find the first keyword */
869 while(startSearchHere) {
871 /* skip leading spaces (allowed?) */
872 while(*startSearchHere == ' ') {
875 nextSeparator = uprv_strchr(startSearchHere, '=');
876 /* need to normalize both keyword and keyword name */
880 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
881 /* keyword name too long for internal buffer */
882 *status = U_INTERNAL_PROGRAM_ERROR;
885 for(i = 0; i < nextSeparator - startSearchHere; i++) {
886 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
888 /* trim trailing spaces */
889 while(startSearchHere[i-1] == ' ') {
892 localeKeywordNameBuffer[i] = 0;
894 startSearchHere = uprv_strchr(nextSeparator, ';');
896 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
898 while(*nextSeparator == ' ') {
901 /* we actually found the keyword. Copy the value */
902 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
903 while(*(startSearchHere-1) == ' ') {
906 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
907 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
908 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
909 i = (int32_t)uprv_strlen(nextSeparator);
910 while(nextSeparator[i - 1] == ' ') {
913 uprv_strncpy(buffer, nextSeparator, i);
914 result = u_terminateChars(buffer, bufferCapacity, i, status);
916 /* give a bigger buffer, please */
917 *status = U_BUFFER_OVERFLOW_ERROR;
918 if(startSearchHere) {
919 result = (int32_t)(startSearchHere - nextSeparator);
921 result = (int32_t)uprv_strlen(nextSeparator);
931 U_CAPI int32_t U_EXPORT2
932 uloc_setKeywordValue(const char* keywordName,
933 const char* keywordValue,
934 char* buffer, int32_t bufferCapacity,
937 /* TODO: sorting. removal. */
938 int32_t keywordNameLen;
939 int32_t keywordValueLen;
942 int32_t foundValueLen;
943 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
944 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
945 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
948 char* nextSeparator = NULL;
949 char* nextEqualsign = NULL;
950 char* startSearchHere = NULL;
951 char* keywordStart = NULL;
952 char *insertHere = NULL;
953 if(U_FAILURE(*status)) {
956 if(bufferCapacity>1) {
957 bufLen = (int32_t)uprv_strlen(buffer);
959 *status = U_ILLEGAL_ARGUMENT_ERROR;
962 if(bufferCapacity<bufLen) {
963 /* The capacity is less than the length?! Is this NULL terminated? */
964 *status = U_ILLEGAL_ARGUMENT_ERROR;
967 if(keywordValue && !*keywordValue) {
971 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
975 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
976 if(U_FAILURE(*status)) {
979 startSearchHere = (char*)locale_getKeywordsStart(buffer);
980 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
981 if(!keywordValue) { /* no keywords = nothing to remove */
985 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
986 if(startSearchHere) { /* had a single @ */
987 needLen--; /* already had the @ */
988 /* startSearchHere points at the @ */
990 startSearchHere=buffer+bufLen;
992 if(needLen >= bufferCapacity) {
993 *status = U_BUFFER_OVERFLOW_ERROR;
994 return needLen; /* no change */
996 *startSearchHere = '@';
998 uprv_strcpy(startSearchHere, keywordNameBuffer);
999 startSearchHere += keywordNameLen;
1000 *startSearchHere = '=';
1002 uprv_strcpy(startSearchHere, keywordValue);
1003 startSearchHere+=keywordValueLen;
1005 } /* end shortcut - no @ */
1007 keywordStart = startSearchHere;
1008 /* search for keyword */
1009 while(keywordStart) {
1011 /* skip leading spaces (allowed?) */
1012 while(*keywordStart == ' ') {
1015 nextEqualsign = uprv_strchr(keywordStart, '=');
1016 /* need to normalize both keyword and keyword name */
1017 if(!nextEqualsign) {
1020 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1021 /* keyword name too long for internal buffer */
1022 *status = U_INTERNAL_PROGRAM_ERROR;
1025 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1026 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1028 /* trim trailing spaces */
1029 while(keywordStart[i-1] == ' ') {
1032 localeKeywordNameBuffer[i] = 0;
1034 nextSeparator = uprv_strchr(nextEqualsign, ';');
1035 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1038 while(*nextEqualsign == ' ') {
1041 /* we actually found the keyword. Change the value */
1042 if (nextSeparator) {
1044 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1047 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1049 if(keywordValue) { /* adding a value - not removing */
1050 if(foundValueLen == keywordValueLen) {
1051 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1052 return bufLen; /* no change in size */
1053 } else if(foundValueLen > keywordValueLen) {
1054 int32_t delta = foundValueLen - keywordValueLen;
1055 if(nextSeparator) { /* RH side */
1056 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1062 } else { /* FVL < KVL */
1063 int32_t delta = keywordValueLen - foundValueLen;
1064 if((bufLen+delta) >= bufferCapacity) {
1065 *status = U_BUFFER_OVERFLOW_ERROR;
1066 return bufLen+delta;
1068 if(nextSeparator) { /* RH side */
1069 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1071 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1076 } else { /* removing a keyword */
1078 /* zero out the ';' or '@' just before startSearchhere */
1079 keywordStart[-1] = 0;
1080 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1082 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1083 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1084 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1087 } else if(rc<0){ /* end match keyword */
1088 /* could insert at this location. */
1089 insertHere = keywordStart;
1091 keywordStart = nextSeparator;
1092 } /* end loop searching */
1095 return bufLen; /* removal of non-extant keyword - no change */
1098 /* we know there is at least one keyword. */
1099 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1100 if(needLen >= bufferCapacity) {
1101 *status = U_BUFFER_OVERFLOW_ERROR;
1102 return needLen; /* no change */
1106 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1107 keywordStart = insertHere;
1109 keywordStart = buffer+bufLen;
1110 *keywordStart = ';';
1113 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1114 keywordStart += keywordNameLen;
1115 *keywordStart = '=';
1117 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1118 keywordStart+=keywordValueLen;
1120 *keywordStart = ';';
1127 /* ### ID parsing implementation **************************************************/
1129 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1131 /*returns TRUE if one of the special prefixes is here (s=string)
1133 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1135 /* Dot terminates it because of POSIX form where dot precedes the codepage
1136 * except for variant
1138 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1140 static char* _strnchr(const char* str, int32_t len, char c) {
1141 U_ASSERT(str != 0 && len >= 0);
1142 while (len-- != 0) {
1146 } else if (d == 0) {
1155 * Lookup 'key' in the array 'list'. The array 'list' should contain
1156 * a NULL entry, followed by more entries, and a second NULL entry.
1158 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1161 static int16_t _findIndex(const char* const* list, const char* key)
1163 const char* const* anchor = list;
1166 /* Make two passes through two NULL-terminated arrays at 'list' */
1167 while (pass++ < 2) {
1169 if (uprv_strcmp(key, *list) == 0) {
1170 return (int16_t)(list - anchor);
1174 ++list; /* skip final NULL *CWB*/
1179 /* count the length of src while copying it to dest; return strlen(src) */
1180 static U_INLINE int32_t
1181 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1188 return (int32_t)(src-anchor);
1190 if(destCapacity<=0) {
1191 return (int32_t)((src-anchor)+uprv_strlen(src));
1200 uloc_getCurrentCountryID(const char* oldID){
1201 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1203 return REPLACEMENT_COUNTRIES[offset];
1208 uloc_getCurrentLanguageID(const char* oldID){
1209 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1211 return REPLACEMENT_LANGUAGES[offset];
1216 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1217 * avoid duplicating code to handle the earlier locale ID pieces
1218 * in the functions for the later ones by
1219 * setting the *pEnd pointer to where they stopped parsing
1221 * TODO try to use this in Locale
1224 ulocimp_getLanguage(const char *localeID,
1225 char *language, int32_t languageCapacity,
1226 const char **pEnd) {
1229 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1231 /* if it starts with i- or x- then copy that prefix */
1232 if(_isIDPrefix(localeID)) {
1233 if(i<languageCapacity) {
1234 language[i]=(char)uprv_tolower(*localeID);
1236 if(i<languageCapacity) {
1243 /* copy the language as far as possible and count its length */
1244 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1245 if(i<languageCapacity) {
1246 language[i]=(char)uprv_tolower(*localeID);
1249 lang[i]=(char)uprv_tolower(*localeID);
1256 /* convert 3 character code to 2 character code if possible *CWB*/
1257 offset=_findIndex(LANGUAGES_3, lang);
1259 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1270 ulocimp_getScript(const char *localeID,
1271 char *script, int32_t scriptCapacity,
1280 /* copy the second item as far as possible and count its length */
1281 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1285 /* If it's exactly 4 characters long, then it's a script and not a country. */
1289 *pEnd = localeID+idLen;
1291 if(idLen > scriptCapacity) {
1292 idLen = scriptCapacity;
1295 script[0]=(char)uprv_toupper(*(localeID++));
1297 for (i = 1; i < idLen; i++) {
1298 script[i]=(char)uprv_tolower(*(localeID++));
1308 ulocimp_getCountry(const char *localeID,
1309 char *country, int32_t countryCapacity,
1313 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1316 /* copy the country as far as possible and count its length */
1317 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1318 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1319 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1324 /* the country should be either length 2 or 3 */
1325 if (idLen == 2 || idLen == 3) {
1326 UBool gotCountry = FALSE;
1327 /* convert 3 character code to 2 character code if possible *CWB*/
1329 offset=_findIndex(COUNTRIES_3, cnty);
1331 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1337 for (i = 0; i < idLen; i++) {
1338 if (i < countryCapacity) {
1339 country[i]=(char)uprv_toupper(localeID[i]);
1356 * @param needSeparator if true, then add leading '_' if any variants
1357 * are added to 'variant'
1360 _getVariantEx(const char *localeID,
1362 char *variant, int32_t variantCapacity,
1363 UBool needSeparator) {
1366 /* get one or more variant tags and separate them with '_' */
1367 if(_isIDSeparator(prev)) {
1368 /* get a variant string after a '-' or '_' */
1369 while(!_isTerminator(*localeID)) {
1370 if (needSeparator) {
1371 if (i<variantCapacity) {
1375 needSeparator = FALSE;
1377 if(i<variantCapacity) {
1378 variant[i]=(char)uprv_toupper(*localeID);
1379 if(variant[i]=='-') {
1388 /* if there is no variant tag after a '-' or '_' then look for '@' */
1392 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1393 ++localeID; /* point after the '@' */
1397 while(!_isTerminator(*localeID)) {
1398 if (needSeparator) {
1399 if (i<variantCapacity) {
1403 needSeparator = FALSE;
1405 if(i<variantCapacity) {
1406 variant[i]=(char)uprv_toupper(*localeID);
1407 if(variant[i]=='-' || variant[i]==',') {
1420 _getVariant(const char *localeID,
1422 char *variant, int32_t variantCapacity) {
1423 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1427 * Delete ALL instances of a variant from the given list of one or
1428 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1429 * @param variants the source string of one or more variants,
1430 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1431 * terminated; if it is, trailing zero will NOT be maintained.
1432 * @param variantsLen length of variants
1433 * @param toDelete variant to delete, without separators, e.g. "EURO"
1434 * or "PREEURO"; not zero terminated
1435 * @param toDeleteLen length of toDelete
1436 * @return number of characters deleted from variants
1439 _deleteVariant(char* variants, int32_t variantsLen,
1440 const char* toDelete, int32_t toDeleteLen)
1442 int32_t delta = 0; /* number of chars deleted */
1445 if (variantsLen < toDeleteLen) {
1448 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1449 (variantsLen == toDeleteLen ||
1450 (flag=(variants[toDeleteLen] == '_'))))
1452 int32_t d = toDeleteLen + (flag?1:0);
1455 if (variantsLen > 0) {
1456 uprv_memmove(variants, variants+d, variantsLen);
1459 char* p = _strnchr(variants, variantsLen, '_');
1464 variantsLen -= (int32_t)(p - variants);
1470 /* Keyword enumeration */
1472 typedef struct UKeywordsContext {
1477 static void U_CALLCONV
1478 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1479 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1480 uprv_free(enumerator->context);
1481 uprv_free(enumerator);
1484 static int32_t U_CALLCONV
1485 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1486 char *kw = ((UKeywordsContext *)en->context)->keywords;
1490 kw += uprv_strlen(kw)+1;
1495 static const char* U_CALLCONV
1496 uloc_kw_nextKeyword(UEnumeration* en,
1497 int32_t* resultLength,
1498 UErrorCode* status) {
1499 const char* result = ((UKeywordsContext *)en->context)->current;
1502 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1503 ((UKeywordsContext *)en->context)->current += len+1;
1508 *resultLength = len;
1513 static void U_CALLCONV
1514 uloc_kw_resetKeywords(UEnumeration* en,
1515 UErrorCode* status) {
1516 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1519 static const UEnumeration gKeywordsEnum = {
1522 uloc_kw_closeKeywords,
1523 uloc_kw_countKeywords,
1525 uloc_kw_nextKeyword,
1526 uloc_kw_resetKeywords
1529 U_CAPI UEnumeration* U_EXPORT2
1530 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1532 UKeywordsContext *myContext = NULL;
1533 UEnumeration *result = NULL;
1535 if(U_FAILURE(*status)) {
1538 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1539 /* Null pointer test */
1540 if (result == NULL) {
1541 *status = U_MEMORY_ALLOCATION_ERROR;
1544 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1545 myContext = uprv_malloc(sizeof(UKeywordsContext));
1546 if (myContext == NULL) {
1547 *status = U_MEMORY_ALLOCATION_ERROR;
1551 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1552 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1553 myContext->keywords[keywordListSize] = 0;
1554 myContext->current = myContext->keywords;
1555 result->context = myContext;
1559 U_CAPI UEnumeration* U_EXPORT2
1560 uloc_openKeywords(const char* localeID,
1565 int32_t keywordsCapacity = 256;
1566 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1567 const char* tmpLocaleID;
1569 if(status==NULL || U_FAILURE(*status)) {
1573 if (_hasBCP47Extension(localeID)) {
1574 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1576 if (localeID==NULL) {
1577 localeID=uloc_getDefault();
1579 tmpLocaleID=localeID;
1582 /* Skip the language */
1583 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1584 if(_isIDSeparator(*tmpLocaleID)) {
1585 const char *scriptID;
1586 /* Skip the script if available */
1587 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1588 if(scriptID != tmpLocaleID+1) {
1589 /* Found optional script */
1590 tmpLocaleID = scriptID;
1592 /* Skip the Country */
1593 if (_isIDSeparator(*tmpLocaleID)) {
1594 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1595 if(_isIDSeparator(*tmpLocaleID)) {
1596 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1601 /* keywords are located after '@' */
1602 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1603 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1607 return uloc_openKeywordList(keywords, i, status);
1614 /* bit-flags for 'options' parameter of _canonicalize */
1615 #define _ULOC_STRIP_KEYWORDS 0x2
1616 #define _ULOC_CANONICALIZE 0x1
1618 #define OPTION_SET(options, mask) ((options & mask) != 0)
1620 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1621 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1624 * Canonicalize the given localeID, to level 1 or to level 2,
1625 * depending on the options. To specify level 1, pass in options=0.
1626 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1628 * This is the code underlying uloc_getName and uloc_canonicalize.
1631 _canonicalize(const char* localeID,
1633 int32_t resultCapacity,
1636 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1637 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1638 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1639 const char* origLocaleID;
1640 const char* tmpLocaleID;
1641 const char* keywordAssign = NULL;
1642 const char* separatorIndicator = NULL;
1643 const char* addKeyword = NULL;
1644 const char* addValue = NULL;
1646 char* variant = NULL; /* pointer into name, or NULL */
1648 if (U_FAILURE(*err)) {
1652 if (_hasBCP47Extension(localeID)) {
1653 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1655 if (localeID==NULL) {
1656 localeID=uloc_getDefault();
1658 tmpLocaleID=localeID;
1661 origLocaleID=tmpLocaleID;
1663 /* if we are doing a full canonicalization, then put results in
1664 localeBuffer, if necessary; otherwise send them to result. */
1665 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1666 (result == NULL || resultCapacity < sizeof(localeBuffer))) {
1667 name = localeBuffer;
1668 nameCapacity = sizeof(localeBuffer);
1671 nameCapacity = resultCapacity;
1674 /* get all pieces, one after another, and separate with '_' */
1675 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1677 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1678 const char *d = uloc_getDefault();
1680 len = (int32_t)uprv_strlen(d);
1683 uprv_strncpy(name, d, len);
1685 } else if(_isIDSeparator(*tmpLocaleID)) {
1686 const char *scriptID;
1689 if(len<nameCapacity) {
1694 scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);
1695 if(scriptSize > 0) {
1696 /* Found optional script */
1697 tmpLocaleID = scriptID;
1700 if (_isIDSeparator(*tmpLocaleID)) {
1701 /* If there is something else, then we add the _ */
1702 if(len<nameCapacity) {
1709 if (_isIDSeparator(*tmpLocaleID)) {
1710 const char *cntryID;
1711 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID);
1712 if (cntrySize > 0) {
1713 /* Found optional country */
1714 tmpLocaleID = cntryID;
1717 if(_isIDSeparator(*tmpLocaleID)) {
1718 /* If there is something else, then we add the _ if we found country before.*/
1719 if (cntrySize > 0) {
1721 if(len<nameCapacity) {
1727 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);
1728 if (variantSize > 0) {
1731 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1737 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1738 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1741 char c = *tmpLocaleID;
1748 if (len<nameCapacity) {
1758 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1759 After this, tmpLocaleID either points to '@' or is NULL */
1760 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1761 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1762 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1765 /* Copy POSIX-style variant, if any [mr@FOO] */
1766 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1767 tmpLocaleID != NULL && keywordAssign == NULL) {
1769 char c = *tmpLocaleID;
1773 if (len<nameCapacity) {
1781 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1782 /* Handle @FOO variant if @ is present and not followed by = */
1783 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1784 int32_t posixVariantSize;
1785 /* Add missing '_' if needed */
1786 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1788 if(len<nameCapacity) {
1793 } while(fieldCount<2);
1795 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1796 (UBool)(variantSize > 0));
1797 if (posixVariantSize > 0) {
1798 if (variant == NULL) {
1801 len += posixVariantSize;
1802 variantSize += posixVariantSize;
1806 /* Handle generic variants first */
1808 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1809 const char* variantToCompare = VARIANT_MAP[j].variant;
1810 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1811 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1813 if (variantLen > 0) {
1814 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1817 addKeyword = VARIANT_MAP[j].keyword;
1818 addValue = VARIANT_MAP[j].value;
1822 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1827 /* Look up the ID in the canonicalization map */
1828 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1829 const char* id = CANONICALIZE_MAP[j].id;
1830 int32_t n = (int32_t)uprv_strlen(id);
1831 if (len == n && uprv_strncmp(name, id, n) == 0) {
1832 if (n == 0 && tmpLocaleID != NULL) {
1833 break; /* Don't remap "" if keywords present */
1835 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1836 if (CANONICALIZE_MAP[j].keyword) {
1837 addKeyword = CANONICALIZE_MAP[j].keyword;
1838 addValue = CANONICALIZE_MAP[j].value;
1845 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1846 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1847 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1848 if(len<nameCapacity) {
1853 len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1854 addKeyword, addValue, err);
1855 } else if (addKeyword != NULL) {
1856 U_ASSERT(addValue != NULL);
1857 /* inelegant but works -- later make _getKeywords do this? */
1858 len += _copyCount(name+len, nameCapacity-len, "@");
1859 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1860 len += _copyCount(name+len, nameCapacity-len, "=");
1861 len += _copyCount(name+len, nameCapacity-len, addValue);
1865 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1866 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1869 return u_terminateChars(result, resultCapacity, len, err);
1872 /* ### ID parsing API **************************************************/
1874 U_CAPI int32_t U_EXPORT2
1875 uloc_getParent(const char* localeID,
1877 int32_t parentCapacity,
1880 const char *lastUnderscore;
1883 if (U_FAILURE(*err))
1886 if (localeID == NULL)
1887 localeID = uloc_getDefault();
1889 lastUnderscore=uprv_strrchr(localeID, '_');
1890 if(lastUnderscore!=NULL) {
1891 i=(int32_t)(lastUnderscore-localeID);
1896 if(i>0 && parent != localeID) {
1897 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1899 return u_terminateChars(parent, parentCapacity, i, err);
1902 U_CAPI int32_t U_EXPORT2
1903 uloc_getLanguage(const char* localeID,
1905 int32_t languageCapacity,
1908 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1911 if (err==NULL || U_FAILURE(*err)) {
1915 if(localeID==NULL) {
1916 localeID=uloc_getDefault();
1919 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1920 return u_terminateChars(language, languageCapacity, i, err);
1923 U_CAPI int32_t U_EXPORT2
1924 uloc_getScript(const char* localeID,
1926 int32_t scriptCapacity,
1931 if(err==NULL || U_FAILURE(*err)) {
1935 if(localeID==NULL) {
1936 localeID=uloc_getDefault();
1939 /* skip the language */
1940 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1941 if(_isIDSeparator(*localeID)) {
1942 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1944 return u_terminateChars(script, scriptCapacity, i, err);
1947 U_CAPI int32_t U_EXPORT2
1948 uloc_getCountry(const char* localeID,
1950 int32_t countryCapacity,
1955 if(err==NULL || U_FAILURE(*err)) {
1959 if(localeID==NULL) {
1960 localeID=uloc_getDefault();
1963 /* Skip the language */
1964 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1965 if(_isIDSeparator(*localeID)) {
1966 const char *scriptID;
1967 /* Skip the script if available */
1968 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1969 if(scriptID != localeID+1) {
1970 /* Found optional script */
1971 localeID = scriptID;
1973 if(_isIDSeparator(*localeID)) {
1974 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1977 return u_terminateChars(country, countryCapacity, i, err);
1980 U_CAPI int32_t U_EXPORT2
1981 uloc_getVariant(const char* localeID,
1983 int32_t variantCapacity,
1986 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1987 const char* tmpLocaleID;
1990 if(err==NULL || U_FAILURE(*err)) {
1994 if (_hasBCP47Extension(localeID)) {
1995 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1997 if (localeID==NULL) {
1998 localeID=uloc_getDefault();
2000 tmpLocaleID=localeID;
2003 /* Skip the language */
2004 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2005 if(_isIDSeparator(*tmpLocaleID)) {
2006 const char *scriptID;
2007 /* Skip the script if available */
2008 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2009 if(scriptID != tmpLocaleID+1) {
2010 /* Found optional script */
2011 tmpLocaleID = scriptID;
2013 /* Skip the Country */
2014 if (_isIDSeparator(*tmpLocaleID)) {
2015 const char *cntryID;
2016 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2017 if (cntryID != tmpLocaleID+1) {
2018 /* Found optional country */
2019 tmpLocaleID = cntryID;
2021 if(_isIDSeparator(*tmpLocaleID)) {
2022 /* If there was no country ID, skip a possible extra IDSeparator */
2023 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2026 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2031 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2032 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2034 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2035 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2038 return u_terminateChars(variant, variantCapacity, i, err);
2041 U_CAPI int32_t U_EXPORT2
2042 uloc_getName(const char* localeID,
2044 int32_t nameCapacity,
2047 return _canonicalize(localeID, name, nameCapacity, 0, err);
2050 U_CAPI int32_t U_EXPORT2
2051 uloc_getBaseName(const char* localeID,
2053 int32_t nameCapacity,
2056 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2059 U_CAPI int32_t U_EXPORT2
2060 uloc_canonicalize(const char* localeID,
2062 int32_t nameCapacity,
2065 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2068 U_CAPI const char* U_EXPORT2
2069 uloc_getISO3Language(const char* localeID)
2072 char lang[ULOC_LANG_CAPACITY];
2073 UErrorCode err = U_ZERO_ERROR;
2075 if (localeID == NULL)
2077 localeID = uloc_getDefault();
2079 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2082 offset = _findIndex(LANGUAGES, lang);
2085 return LANGUAGES_3[offset];
2088 U_CAPI const char* U_EXPORT2
2089 uloc_getISO3Country(const char* localeID)
2092 char cntry[ULOC_LANG_CAPACITY];
2093 UErrorCode err = U_ZERO_ERROR;
2095 if (localeID == NULL)
2097 localeID = uloc_getDefault();
2099 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2102 offset = _findIndex(COUNTRIES, cntry);
2106 return COUNTRIES_3[offset];
2109 U_CAPI uint32_t U_EXPORT2
2110 uloc_getLCID(const char* localeID)
2112 UErrorCode status = U_ZERO_ERROR;
2113 char langID[ULOC_FULLNAME_CAPACITY];
2115 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2116 if (U_FAILURE(status)) {
2120 return uprv_convertToLCID(langID, localeID, &status);
2123 U_CAPI int32_t U_EXPORT2
2124 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2128 const char *posix = uprv_convertToPosix(hostid, status);
2129 if (U_FAILURE(*status) || posix == NULL) {
2132 length = (int32_t)uprv_strlen(posix);
2133 if (length+1 > localeCapacity) {
2134 *status = U_BUFFER_OVERFLOW_ERROR;
2137 uprv_strcpy(locale, posix);
2142 /* ### Default locale **************************************************/
2144 U_CAPI const char* U_EXPORT2
2147 return locale_get_default();
2150 U_CAPI void U_EXPORT2
2151 uloc_setDefault(const char* newDefaultLocale,
2154 if (U_FAILURE(*err))
2156 /* the error code isn't currently used for anything by this function*/
2158 /* propagate change to C++ */
2159 locale_set_default(newDefaultLocale);
2163 * Returns a list of all language codes defined in ISO 639. This is a pointer
2164 * to an array of pointers to arrays of char. All of these pointers are owned
2165 * by ICU-- do not delete them, and do not write through them. The array is
2166 * terminated with a null pointer.
2168 U_CAPI const char* const* U_EXPORT2
2169 uloc_getISOLanguages()
2175 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2176 * pointer to an array of pointers to arrays of char. All of these pointers are
2177 * owned by ICU-- do not delete them, and do not write through them. The array is
2178 * terminated with a null pointer.
2180 U_CAPI const char* const* U_EXPORT2
2181 uloc_getISOCountries()
2187 /* this function to be moved into cstring.c later */
2188 static char gDecimal = 0;
2193 _uloc_strtod(const char *start, char **end) {
2200 /* For machines that decide to change the decimal on you,
2201 and try to be too smart with localization.
2202 This normally should be just a '.'. */
2203 sprintf(rep, "%+1.1f", 1.0);
2207 if(gDecimal == '.') {
2208 return uprv_strtod(start, end); /* fall through to OS */
2210 uprv_strncpy(buf, start, 29);
2212 decimal = uprv_strchr(buf, '.');
2214 *decimal = gDecimal;
2216 return uprv_strtod(start, end); /* no decimal point */
2218 rv = uprv_strtod(buf, &myEnd);
2220 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2228 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2232 static int32_t U_CALLCONV
2233 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2235 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2236 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2240 rc = -1; /* A > B */
2241 } else if(bb->q > aa->q) {
2248 rc = uprv_stricmp(aa->locale, bb->locale);
2251 #if defined(ULOC_DEBUG)
2252 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2262 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2265 U_CAPI int32_t U_EXPORT2
2266 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2267 const char *httpAcceptLanguage,
2268 UEnumeration* availableLocales,
2272 _acceptLangItem smallBuffer[30];
2274 char tmp[ULOC_FULLNAME_CAPACITY +1];
2276 const char *itemEnd;
2277 const char *paramEnd;
2282 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2284 char *tempstr; /* Use for null pointer check */
2287 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2288 if(U_FAILURE(*status)) {
2292 for(s=httpAcceptLanguage;s&&*s;) {
2293 while(isspace(*s)) /* eat space at the beginning */
2295 itemEnd=uprv_strchr(s,',');
2296 paramEnd=uprv_strchr(s,';');
2298 itemEnd = httpAcceptLanguage+l; /* end of string */
2300 if(paramEnd && paramEnd<itemEnd) {
2301 /* semicolon (;) is closer than end (,) */
2306 while(isspace(*t)) {
2312 while(isspace(*t)) {
2315 j[n].q = (float)_uloc_strtod(t,NULL);
2317 /* no semicolon - it's 1.0 */
2322 /* eat spaces prior to semi */
2323 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2325 /* Check for null pointer from uprv_strndup */
2326 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2327 if (tempstr == NULL) {
2328 *status = U_MEMORY_ALLOCATION_ERROR;
2331 j[n].locale = tempstr;
2332 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2333 if(strcmp(j[n].locale,tmp)) {
2334 uprv_free(j[n].locale);
2335 j[n].locale=uprv_strdup(tmp);
2337 #if defined(ULOC_DEBUG)
2338 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2342 while(*s==',') { /* eat duplicate commas */
2346 if(j==smallBuffer) { /* overflowed the small buffer. */
2347 j = uprv_malloc(sizeof(j[0])*(jSize*2));
2349 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2351 #if defined(ULOC_DEBUG)
2352 fprintf(stderr,"malloced at size %d\n", jSize);
2355 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2356 #if defined(ULOC_DEBUG)
2357 fprintf(stderr,"re-alloced at size %d\n", jSize);
2362 *status = U_MEMORY_ALLOCATION_ERROR;
2367 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2368 if(U_FAILURE(*status)) {
2369 if(j != smallBuffer) {
2370 #if defined(ULOC_DEBUG)
2371 fprintf(stderr,"freeing j %p\n", j);
2377 strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2378 /* Check for null pointer */
2380 uprv_free(j); /* Free to avoid memory leak */
2381 *status = U_MEMORY_ALLOCATION_ERROR;
2385 #if defined(ULOC_DEBUG)
2386 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2388 strs[i]=j[i].locale;
2390 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2391 (const char**)strs, n, availableLocales, status);
2396 if(j != smallBuffer) {
2397 #if defined(ULOC_DEBUG)
2398 fprintf(stderr,"freeing j %p\n", j);
2406 U_CAPI int32_t U_EXPORT2
2407 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2408 UAcceptResult *outResult, const char **acceptList,
2409 int32_t acceptListCount,
2410 UEnumeration* availableLocales,
2416 char tmp[ULOC_FULLNAME_CAPACITY+1];
2418 char **fallbackList;
2419 if(U_FAILURE(*status)) {
2422 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2423 if(fallbackList==NULL) {
2424 *status = U_MEMORY_ALLOCATION_ERROR;
2427 for(i=0;i<acceptListCount;i++) {
2428 #if defined(ULOC_DEBUG)
2429 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2431 while((l=uenum_next(availableLocales, NULL, status))) {
2432 #if defined(ULOC_DEBUG)
2433 fprintf(stderr," %s\n", l);
2435 len = (int32_t)uprv_strlen(l);
2436 if(!uprv_strcmp(acceptList[i], l)) {
2438 *outResult = ULOC_ACCEPT_VALID;
2440 #if defined(ULOC_DEBUG)
2441 fprintf(stderr, "MATCH! %s\n", l);
2444 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2447 uprv_free(fallbackList[j]);
2449 uprv_free(fallbackList);
2450 return u_terminateChars(result, resultAvailable, len, status);
2456 uenum_reset(availableLocales, status);
2457 /* save off parent info */
2458 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2459 fallbackList[i] = uprv_strdup(tmp);
2465 for(maxLen--;maxLen>0;maxLen--) {
2466 for(i=0;i<acceptListCount;i++) {
2467 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2468 #if defined(ULOC_DEBUG)
2469 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2471 while((l=uenum_next(availableLocales, NULL, status))) {
2472 #if defined(ULOC_DEBUG)
2473 fprintf(stderr," %s\n", l);
2475 len = (int32_t)uprv_strlen(l);
2476 if(!uprv_strcmp(fallbackList[i], l)) {
2478 *outResult = ULOC_ACCEPT_FALLBACK;
2480 #if defined(ULOC_DEBUG)
2481 fprintf(stderr, "fallback MATCH! %s\n", l);
2484 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2486 for(j=0;j<acceptListCount;j++) {
2487 uprv_free(fallbackList[j]);
2489 uprv_free(fallbackList);
2490 return u_terminateChars(result, resultAvailable, len, status);
2493 uenum_reset(availableLocales, status);
2495 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2496 uprv_free(fallbackList[i]);
2497 fallbackList[i] = uprv_strdup(tmp);
2499 uprv_free(fallbackList[i]);
2505 *outResult = ULOC_ACCEPT_FAILED;
2508 for(i=0;i<acceptListCount;i++) {
2509 uprv_free(fallbackList[i]);
2511 uprv_free(fallbackList);