5 #define New_N(type,n) ((type*)GC_MALLOC((n)*sizeof(type)))
9 #ifdef HAVE_LANGINFO_CODESET
13 wc_locale WcLocale = 0;
18 } lang_ces_table[] = {
19 { "cs", WC_CES_ISO_8859_2 }, /* cs_CZ */
20 { "el", WC_CES_ISO_8859_7 }, /* el_GR */
21 { "iw", WC_CES_ISO_8859_8 }, /* iw_IL */
22 { "ja", WC_CES_EUC_JP }, /* ja_JP */
23 { "ko", WC_CES_EUC_KR }, /* ko_KR */
24 { "hu", WC_CES_ISO_8859_2 }, /* hu_HU */
25 { "pl", WC_CES_ISO_8859_2 }, /* pl_PL */
26 { "ro", WC_CES_ISO_8859_2 }, /* ro_RO */
27 { "ru", WC_CES_ISO_8859_5 }, /* ru_SU */
28 { "sk", WC_CES_ISO_8859_2 }, /* sk_SK */
29 { "sl", WC_CES_ISO_8859_2 }, /* sl_CS */
30 { "tr", WC_CES_ISO_8859_9 }, /* tr_TR */
31 { "zh", WC_CES_EUC_CN }, /* zh_CN */
36 wc_guess_charset(char *charset, wc_ces orig)
40 if (charset == NULL || *charset == '\0')
42 guess = wc_charset_to_ces(charset);
43 return guess ? guess : orig;
47 wc_guess_charset_short(char *charset, wc_ces orig)
51 if (charset == NULL || *charset == '\0')
53 guess = wc_charset_short_to_ces(charset);
54 return guess ? guess : orig;
58 wc_guess_locale_charset(char *locale, wc_ces orig)
62 if (locale == NULL || *locale == '\0')
64 guess = wc_locale_to_ces(locale);
65 return guess ? guess : orig;
69 wc_charset_to_ces(char *charset)
75 if (tolower(*p) == 'x' && *(p+1) == '-')
77 for (n = 0; *p && n < 15; p++) {
78 if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
79 buf[n++] = tolower(*p);
85 if (! strncmp(p, "euc", 3)) {
88 case 'j': return WC_CES_EUC_JP;
89 case 'c': return WC_CES_EUC_CN;
90 case 't': return WC_CES_EUC_TW;
91 case 'k': return WC_CES_EUC_KR;
94 case WC_LOCALE_JA_JP: return WC_CES_EUC_JP;
95 case WC_LOCALE_ZH_CN: return WC_CES_EUC_CN;
96 case WC_LOCALE_ZH_TW: return WC_CES_EUC_TW;
97 case WC_LOCALE_ZH_HK: return WC_CES_EUC_CN;
98 case WC_LOCALE_KO_KR: return WC_CES_EUC_KR;
100 return WC_CES_EUC_JP;
104 if (! strncmp(p, "iso2022", 7)) {
108 if (! strncmp(p, "jp2", 3))
109 return WC_CES_ISO_2022_JP_2;
110 if (! strncmp(p, "jp3", 3))
111 return WC_CES_ISO_2022_JP_3;
112 return WC_CES_ISO_2022_JP;
113 case 'c': return WC_CES_ISO_2022_CN;
114 case 'k': return WC_CES_ISO_2022_KR;
116 return WC_CES_ISO_2022_JP;
117 } else if (! strncmp(p, "iso8859", 7)) {
119 if (n >= 1 && n <= 16 && n != 12)
120 return (WC_CES_E_ISO_8859 | n);
121 return WC_CES_ISO_8859_1;
125 if (! strncmp(p, "johab", 5))
127 if (! strncmp(p, "jis", 3))
128 return WC_CES_ISO_2022_JP;
131 if (! strncmp(p, "shiftjisx0213", 13) ||
132 ! strncmp(p, "sjisx0213", 9))
133 return WC_CES_SHIFT_JISX0213;
134 if (! strncmp(p, "shiftjis", 8) ||
135 ! strncmp(p, "sjis", 4))
136 return WC_CES_SHIFT_JIS;
139 if (! strncmp(p, "gb18030", 7) ||
140 ! strncmp(p, "gbk2k", 5))
141 return WC_CES_GB18030;
142 if (! strncmp(p, "gbk", 3))
144 if (! strncmp(p, "gb2312", 6))
145 return WC_CES_EUC_CN;
148 if (! strncmp(p, "big5hkscs", 9))
150 if (! strncmp(p, "big5", 4))
154 if (! strncmp(p, "hz", 2))
155 return WC_CES_HZ_GB_2312;
156 if (! strncmp(p, "hkscs", 5))
160 if (! strncmp(p, "koi8r", 5))
161 return WC_CES_KOI8_R;
162 if (! strncmp(p, "koi8u", 5))
163 return WC_CES_KOI8_U;
164 if (! strncmp(p, "ksx1001", 7))
165 return WC_CES_EUC_KR;
166 if (! strncmp(p, "ksc5601", 7))
167 return WC_CES_EUC_KR;
170 if (! strncmp(p, "tis620", 6))
171 return WC_CES_TIS_620;
172 if (! strncmp(p, "tcvn", 4))
173 return WC_CES_TCVN_5712;
176 if (! strncmp(p, "next", 4))
177 return WC_CES_NEXTSTEP;
180 if (! strncmp(p, "viet", 4)) {
182 if (! strncmp(p, "tcvn", 4))
183 return WC_CES_TCVN_5712;
185 if (! strncmp(p, "viscii", 6))
186 return WC_CES_VISCII_11;
187 if (! strncmp(p, "vps", 3))
192 if (! strncmp(p, "utf8", 4))
194 if (! strncmp(p, "utf7", 4))
197 if (! strncmp(p, "uhc", 3))
199 if (! strncmp(p, "ujis", 4))
200 return WC_CES_EUC_JP;
201 if (! strncmp(p, "usascii", 7))
202 return WC_CES_US_ASCII;
205 if (! strncmp(p, "ascii", 5))
206 return WC_CES_US_ASCII;
209 if (! strncmp(p, "cngb", 4))
210 return WC_CES_EUC_CN;
215 case 437: return WC_CES_CP437;
216 case 737: return WC_CES_CP737;
217 case 775: return WC_CES_CP775;
218 case 850: return WC_CES_CP850;
219 case 852: return WC_CES_CP852;
220 case 855: return WC_CES_CP855;
221 case 856: return WC_CES_CP856;
222 case 857: return WC_CES_CP857;
223 case 860: return WC_CES_CP860;
224 case 861: return WC_CES_CP861;
225 case 862: return WC_CES_CP862;
226 case 863: return WC_CES_CP863;
227 case 864: return WC_CES_CP864;
228 case 865: return WC_CES_CP865;
229 case 866: return WC_CES_CP866;
230 case 869: return WC_CES_CP869;
231 case 874: return WC_CES_CP874;
232 case 932: return WC_CES_CP932; /* CP932 = Shift_JIS */
233 case 936: return WC_CES_CP936; /* CP936 = GBK > EUC_CN */
234 case 949: return WC_CES_CP949; /* CP949 = UHC > EUC_KR */
235 case 950: return WC_CES_CP950; /* CP950 = Big5 */
236 case 1006: return WC_CES_CP1006;
237 case 1250: return WC_CES_CP1250;
238 case 1251: return WC_CES_CP1251;
239 case 1252: return WC_CES_CP1252;
240 case 1253: return WC_CES_CP1253;
241 case 1254: return WC_CES_CP1254;
242 case 1255: return WC_CES_CP1255;
243 case 1256: return WC_CES_CP1256;
244 case 1257: return WC_CES_CP1257;
245 case 1258: return WC_CES_CP1258;
249 if (strncmp(p, "windows", 7))
251 if (! strncmp(p, "31j", 3))
255 case 1250: return WC_CES_CP1250;
256 case 1251: return WC_CES_CP1251;
257 case 1252: return WC_CES_CP1252;
258 case 1253: return WC_CES_CP1253;
259 case 1254: return WC_CES_CP1254;
260 case 1255: return WC_CES_CP1255;
261 case 1256: return WC_CES_CP1256;
262 case 1257: return WC_CES_CP1257;
263 case 1258: return WC_CES_CP1258;
271 wc_charset_short_to_ces(char *charset)
278 ces = wc_charset_to_ces(charset);
282 for (n = 0; *p && n < 15; p++) {
283 if ((unsigned char)*p > 0x20 && *p != '_' && *p != '-')
284 buf[n++] = tolower(*p);
291 case 'j': return WC_CES_EUC_JP;
292 case 'c': return WC_CES_EUC_CN;
293 case 't': return WC_CES_EUC_TW;
294 case 'k': return WC_CES_EUC_KR;
296 return WC_CES_EUC_JP;
304 return WC_CES_ISO_2022_JP_2;
306 return WC_CES_ISO_2022_JP_3;
307 return WC_CES_ISO_2022_JP;
309 return WC_CES_SHIFT_JIS;
311 return WC_CES_EUC_CN;
317 return WC_CES_HZ_GB_2312;
320 return WC_CES_KOI8_R;
321 return WC_CES_ISO_2022_KR;
324 if (n >= 1 && n <= 16 && n != 12)
325 return (WC_CES_E_ISO_8859 | n);
326 return WC_CES_ISO_8859_1;
329 return WC_CES_TCVN_5712;
330 return WC_CES_TIS_620;
332 return WC_CES_NEXTSTEP;
336 return WC_CES_VISCII_11;
344 return WC_CES_US_ASCII;
346 return WC_CES_ISO_2022_CN;
350 case 1250: return WC_CES_CP1250;
351 case 1251: return WC_CES_CP1251;
352 case 1252: return WC_CES_CP1252;
353 case 1253: return WC_CES_CP1253;
354 case 1254: return WC_CES_CP1254;
355 case 1255: return WC_CES_CP1255;
356 case 1256: return WC_CES_CP1256;
357 case 1257: return WC_CES_CP1257;
358 case 1258: return WC_CES_CP1258;
368 wc_locale_to_ces(char *locale)
374 if (*p == 'C' && *(p+1) == '\0')
375 return WC_CES_US_ASCII;
376 #ifdef HAVE_LANGINFO_CODESET
378 char *cs = nl_langinfo(CODESET);
379 if (cs && strcmp(cs, "US-ASCII"))
380 return wc_charset_to_ces(cs);
383 for (n = 0; *p && *p != '.' && n < 5; p++) {
384 if ((unsigned char)*p > 0x20)
385 buf[n++] = tolower(*p);
390 if (! strcasecmp(p, "euc")) {
393 WcLocale = WC_LOCALE_JA_JP;
396 WcLocale = WC_LOCALE_KO_KR;
399 if (!strcmp(buf, "zh_tw"))
400 WcLocale = WC_LOCALE_ZH_TW;
401 else if (!strcmp(buf, "zh_hk"))
402 WcLocale = WC_LOCALE_ZH_HK;
404 WcLocale = WC_LOCALE_ZH_CN;
411 return wc_charset_to_ces(p);
414 if (!strcmp(buf, "japanese"))
415 return WC_CES_SHIFT_JIS;
416 if (!strcmp(buf, "zh_tw") ||
417 !strcmp(buf, "zh_hk"))
419 for (n = 0; lang_ces_table[n].lang; n++) {
420 if (!strncmp(buf, lang_ces_table[n].lang, 2))
421 return lang_ces_table[n].ces;
423 return WC_CES_ISO_8859_1;
427 wc_ces_to_charset(wc_ces ces)
429 if (ces == WC_CES_WTF)
431 return WcCesInfo[WC_CES_INDEX(ces)].name;
435 wc_ces_to_charset_desc(wc_ces ces)
437 if (ces == WC_CES_WTF)
438 return "W3M Transfer Format";
439 return WcCesInfo[WC_CES_INDEX(ces)].desc;
443 wc_guess_8bit_charset(wc_ces orig)
446 case WC_CES_ISO_2022_JP:
447 case WC_CES_ISO_2022_JP_2:
448 case WC_CES_ISO_2022_JP_3:
449 return WC_CES_EUC_JP;
450 case WC_CES_ISO_2022_KR:
451 return WC_CES_EUC_KR;
452 case WC_CES_ISO_2022_CN:
453 case WC_CES_HZ_GB_2312:
454 return WC_CES_EUC_CN;
455 case WC_CES_US_ASCII:
456 return WC_CES_ISO_8859_1;
462 wc_check_ces(wc_ces ces)
464 size_t i = WC_CES_INDEX(ces);
466 return (i <= WC_CES_END && WcCesInfo[i].id == ces);
470 wc_ces_list_cmp(const void *a, const void *b)
472 return strcasecmp(((wc_ces_list *)a)->desc, ((wc_ces_list *)b)->desc);
475 static wc_ces_list *list = NULL;
478 wc_get_ces_list(void)
485 for (info = WcCesInfo, n = 0; info->id; info++) {
486 if (info->name != NULL)
489 list = New_N(wc_ces_list, n + 1);
490 for (info = WcCesInfo, n = 0; info->id; info++) {
491 if (info->name != NULL) {
492 list[n].id = info->id;
493 list[n].name = info->name;
494 list[n].desc = info->desc;
501 qsort(list, n, sizeof(wc_ces_list), wc_ces_list_cmp);