18 wc_uint8 WTF_WIDTH_MAP[ 0x100 ] = {
19 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
20 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
21 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
22 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
24 1,2,1,2,1,1,1,2, 1,2,1,2,1,1,1,1, 0,0,0,0,0,0,0,0, 0,0,0,0,1,1,1,1,
25 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
26 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
27 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
30 wc_uint8 WTF_LEN_MAP[ 0x100 ] = {
31 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
32 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
33 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
34 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
36 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1,
37 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
38 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
39 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
42 wc_uint8 WTF_TYPE_MAP[ 0x100 ] = {
43 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
44 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
45 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
46 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,
48 2, 0xA,2, 0xA, 2, 0x12,2, 0xA, 2, 0xA,2, 0xA, 0x20,0x20,0x20,0x20,
49 4, 0xC,4, 0xC, 4, 0x20,4, 0xC, 4, 0xC,4, 0xC, 0x20,0x20,0x20,0x20,
50 0x20,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
51 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
52 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
55 static wc_uint16 CCS_MAP[ 33 ] = {
56 WC_CCS_A_CS94 >> 8, WC_CCS_A_CS94W >> 8,
57 WC_CCS_A_CS96 >> 8, WC_CCS_A_CS96W >> 8,
58 WC_CCS_A_CS942 >> 8, WC_CCS_A_UNKNOWN >> 8,
59 WC_CCS_A_PCS >> 8, WC_CCS_A_PCSW >> 8,
60 WC_CCS_A_WCS16 >> 8, WC_CCS_A_WCS16W >> 8,
61 WC_CCS_A_WCS32 >> 8, WC_CCS_A_WCS32W >> 8,
64 WC_CCS_A_CS94_C >> 8, WC_CCS_A_CS94W_C >> 8,
65 WC_CCS_A_CS96_C >> 8, WC_CCS_A_CS96W_C >> 8,
66 WC_CCS_A_CS942_C >> 8, 0,
67 WC_CCS_A_PCS_C >> 8, WC_CCS_A_PCSW_C >> 8,
68 WC_CCS_A_WCS16_C >> 8, WC_CCS_A_WCS16W_C >> 8,
69 WC_CCS_A_WCS32_C >> 8, WC_CCS_A_WCS32W_C >> 8,
75 wc_ccs wtf_gr_ccs = 0;
76 static wc_ces wtf_major_ces = WC_CES_US_ASCII;
77 static wc_status wtf_major_st;
80 wtf_init(wc_ces ces1, wc_ces ces2)
85 if (wc_check_ces(ces2))
88 if (! wc_check_ces(ces1))
90 gset = WcCesInfo[WC_CES_INDEX(ces1)].gset;
91 if (gset == NULL || gset[1].ccs == 0 ||
92 gset[1].ccs & (WC_CCS_A_WCS16|WC_CCS_A_WCS32))
94 wtf_gr_ccs = gset[1].ccs;
96 if (WC_CCS_IS_WIDE(wtf_gr_ccs)) {
97 for (i = 0xa1; i <= 0xff; i++) {
100 WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1W;
103 for (i = 0xa1; i <= 0xff; i++) {
104 WTF_WIDTH_MAP[i] = 1;
106 WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1;
113 wtf_width(wc_uchar *p)
115 return (int)WTF_WIDTH_MAP[*p];
120 wtf_strwidth(wc_uchar *p)
126 p += WTF_LEN_MAP[*p];
133 wtf_len1(wc_uchar *p)
135 return (size_t)WTF_LEN_MAP[*p];
144 q += WTF_LEN_MAP[*q];
145 while (*q && ! WTF_WIDTH_MAP[*q])
146 q += WTF_LEN_MAP[*q];
152 wtf_type(wc_uchar *p)
154 return (int)WTF_TYPE_MAP[*p];
158 #define wcs16_to_wtf(c, p) \
159 ((p)[0] = (((c) >> 14) & 0x03) | 0x80), \
160 ((p)[1] = (((c) >> 7) & 0x7f) | 0x80), \
161 ((p)[2] = ( (c) & 0x7f) | 0x80)
162 #define wcs32_to_wtf(c, p) \
163 ((p)[0] = (((c) >> 28) & 0x0f) | 0x80), \
164 ((p)[1] = (((c) >> 21) & 0x7f) | 0x80), \
165 ((p)[2] = (((c) >> 14) & 0x7f) | 0x80), \
166 ((p)[3] = (((c) >> 7) & 0x7f) | 0x80), \
167 ((p)[4] = ( (c) & 0x7f) | 0x80)
168 #define wtf_to_wcs16(p) \
169 ((wc_uint32)((p)[0] & 0x03) << 14) \
170 | ((wc_uint32)((p)[1] & 0x7f) << 7) \
171 | ((wc_uint32)((p)[2] & 0x7f) )
172 #define wtf_to_wcs32(p) \
173 ((wc_uint32)((p)[0] & 0x0f) << 28) \
174 | ((wc_uint32)((p)[1] & 0x7f) << 21) \
175 | ((wc_uint32)((p)[2] & 0x7f) << 14) \
176 | ((wc_uint32)((p)[3] & 0x7f) << 7) \
177 | ((wc_uint32)((p)[4] & 0x7f) )
180 wtf_push(Str os, wc_ccs ccs, wc_uint32 code)
186 if (ccs == WC_CCS_US_ASCII) {
187 Strcat_char(os, (char)(code & 0x7f));
192 if (WcOption.pre_conv && !(cc.ccs & WC_CCS_A_UNKNOWN)) {
193 if ((ccs == WC_CCS_JOHAB || ccs == WC_CCS_JOHAB_1 ||
194 ccs == WC_CCS_JOHAB_2 || ccs == WC_CCS_JOHAB_3) &&
195 (wtf_major_ces == WC_CES_EUC_KR ||
196 wtf_major_ces == WC_CES_ISO_2022_KR)) {
197 cc2 = wc_johab_to_ksx1001(cc);
198 if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
200 } else if (ccs == WC_CCS_KS_X_1001 &&
201 wtf_major_ces == WC_CES_JOHAB) {
202 cc2 = wc_ksx1001_to_johab(cc);
203 if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
207 else if (WcOption.ucs_conv) {
208 wc_bool fix_width_conv = WcOption.fix_width_conv;
209 WcOption.fix_width_conv = WC_FALSE;
210 wc_output_init(wtf_major_ces, &wtf_major_st);
211 if (! wc_ces_has_ccs(WC_CCS_SET(ccs), &wtf_major_st)) {
212 cc2 = wc_any_to_any_ces(cc, &wtf_major_st);
213 if (cc2.ccs == WC_CCS_US_ASCII) {
214 Strcat_char(os, (char)(cc2.code & 0x7f));
217 if (!WC_CCS_IS_UNKNOWN(cc2.ccs) &&
218 cc2.ccs != WC_CCS_CP1258_2 &&
219 cc2.ccs != WC_CCS_TCVN_5712_3)
222 WcOption.fix_width_conv = fix_width_conv;
227 switch (WC_CCS_TYPE(cc.ccs)) {
229 if (cc.ccs == wtf_gr_ccs) {
230 s[0] = (cc.code & 0x7f) | 0x80;
234 if (cc.ccs == WC_CCS_JIS_X_0201K && !WcOption.use_jisx0201k) {
235 cc2 = wc_jisx0201k_to_jisx0208(cc);
236 if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) {
237 wtf_push(os, cc2.ccs, cc2.code);
242 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
243 s[2] = (cc.code & 0x7f) | 0x80;
247 if (cc.ccs == wtf_gr_ccs) {
248 s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
249 s[1] = ( cc.code & 0x7f) | 0x80;
254 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
255 s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
256 s[3] = ( cc.code & 0x7f) | 0x80;
260 if (WcOption.use_combining && wc_is_combining(cc))
262 else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
263 s[0] = (cc.code & 0x7f) | 0x80;
268 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
269 s[2] = (cc.code & 0x7f) | 0x80;
273 if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
274 s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
275 s[1] = ( cc.code & 0x7f) | 0x80;
280 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
281 s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
282 s[3] = ( cc.code & 0x7f) | 0x80;
286 if (cc.ccs == wtf_gr_ccs) {
287 s[0] = (cc.code & 0x7f) | 0x80;
292 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
293 s[2] = (cc.code & 0x7f) | 0x80;
297 if (WcOption.use_combining && wc_is_combining(cc))
299 else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
300 s[0] = (cc.code & 0x7f) | 0x80;
305 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
306 s[2] = (cc.code & 0x7f) | 0x80;
311 case WC_CCS_SJIS_EXT:
312 cc = wc_sjis_ext_to_cs94w(cc);
315 cc = wc_gbk_to_cs128w(cc);
318 cc = wc_gbk_ext_to_cs128w(cc);
321 cc = wc_big5_to_cs94w(cc);
324 cc = wc_hkscs_to_cs128w(cc);
327 cc = wc_johab_to_cs128w(cc);
330 cc = wc_uhc_to_cs128w(cc);
333 if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
334 s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
335 s[1] = ( cc.code & 0x7f) | 0x80;
340 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
341 s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
342 s[3] = ( cc.code & 0x7f) | 0x80;
346 s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS16W : WTF_C_WCS16)
347 | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
348 wcs16_to_wtf(cc.code, s + 1);
349 s[1] |= (WC_CCS_INDEX(cc.ccs) << 2);
353 s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS32W : WTF_C_WCS32)
354 | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
355 wcs32_to_wtf(cc.code, s + 1);
356 s[1] |= (WC_CCS_INDEX(cc.ccs) << 4);
360 s[0] = WTF_C_UNKNOWN;
361 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
362 s[2] = (cc.code & 0x7f) | 0x80;
366 Strcat_charp_n(os, (char *)s, n);
370 wtf_push_unknown(Str os, wc_uchar *p, size_t len)
374 wtf_push(os, WC_CCS_UNKNOWN, *p);
376 Strcat_char(os, (char)*p);
381 wtf_parse1(wc_uchar **p)
387 cc.ccs = WC_CCS_US_ASCII;
389 } else if (*q > 0xa0) {
391 if (WC_CCS_IS_WIDE(cc.ccs)) {
392 cc.code = ((wc_uint32)*q << 8) | *(q+1);
397 cc.ccs = (wc_uint32)CCS_MAP[*(q++) - 0x80] << 8;
398 switch (WC_CCS_TYPE(cc.ccs)) {
403 case WC_CCS_A_UNKNOWN:
404 cc.ccs |= *(q++) & 0x7f;
410 cc.ccs |= *(q++) & 0x7f;
411 cc.code = ((wc_uint32)*q << 8) | *(q+1);
415 case WC_CCS_A_WCS16W:
416 cc.ccs |= (*q & 0x7c) >> 2;
417 cc.code = wtf_to_wcs16(q);
421 case WC_CCS_A_WCS32W:
422 cc.ccs |= (*q & 0x70) >> 4;
423 cc.code = wtf_to_wcs32(q);
428 cc.ccs = WC_CCS_US_ASCII;
429 cc.code = (wc_uint32)' ';
436 case WC_CCS_SJIS_EXT_1:
437 case WC_CCS_SJIS_EXT_2:
438 return wc_cs94w_to_sjis_ext(cc);
441 return wc_cs128w_to_gbk(cc);
442 case WC_CCS_GBK_EXT_1:
443 case WC_CCS_GBK_EXT_2:
444 return wc_cs128w_to_gbk_ext(cc);
447 return wc_cs94w_to_big5(cc);
450 return wc_cs128w_to_hkscs(cc);
454 return wc_cs128w_to_johab(cc);
457 return wc_cs128w_to_uhc(cc);
463 wtf_parse(wc_uchar **p)
470 cc.ccs = WC_CCS_US_ASCII;
474 if ((! WcOption.use_combining) || WTF_WIDTH_MAP[**p])
478 cc2 = wtf_parse1(&q);
479 if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_CP1258_1) &&
480 WC_CCS_SET(cc2.ccs) == WC_CCS_CP1258_1) {
481 cc2.code = wc_cp1258_precompose(cc.code, cc2.code);
483 cc2.ccs = WC_CCS_CP1258_2;
487 } else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_TCVN_5712_1) &&
488 WC_CCS_SET(cc2.ccs) == WC_CCS_TCVN_5712_1) {
489 cc2.code = wc_tcvn5712_precompose(cc.code, cc2.code);
491 cc2.ccs = WC_CCS_TCVN_5712_3;
497 else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_ISO_8859_1 ||
498 WC_CCS_IS_UNICODE(cc.ccs)) && WC_CCS_IS_UNICODE(cc2.ccs)) {
500 ucs = (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
501 ? wc_ucs_tag_to_ucs(cc.code) : cc.code;
502 ucs2 = (WC_CCS_SET(cc2.ccs) == WC_CCS_UCS_TAG)
503 ? wc_ucs_tag_to_ucs(cc2.code) : cc2.code;
504 ucs = wc_ucs_precompose(ucs, ucs2);
505 if (ucs == WC_C_UCS4_ERROR)
507 if (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
508 cc.code = wc_ucs_to_ucs_tag(ucs, wc_ucs_tag_to_tag(cc.code));
510 cc.ccs = wc_ucs_to_ccs(ucs);
514 if (! WTF_WIDTH_MAP[*q])
516 cc2 = wtf_parse1(&q);
517 if (! WC_CCS_IS_UNICODE(cc2.ccs))
526 wtf_get_ccs(wc_uchar *p)
528 return wtf_parse1(&p).ccs;
532 wtf_get_code(wc_uchar *p)
534 return wtf_parse1(&p).code;
538 wtf_is_hangul(wc_uchar *p)
541 return (wtf_gr_ccs == WC_CCS_KS_X_1001 || wtf_gr_ccs == WC_CCS_JOHAB_1);
542 else if (*p == WTF_C_CS94W)
543 return ((*(p + 1) & 0x7f) == WC_F_KS_X_1001);
544 else if (*p == WTF_C_PCSW) {
545 wc_uchar f = *(p + 1) & 0x7f;
546 return (f == WC_F_JOHAB_1 || f == WC_F_JOHAB_2 || f == WC_F_JOHAB_3 ||
547 f == WC_F_UHC_1 || f == WC_F_UHC_2);
550 else if (*p == WTF_C_WCS16W) {
551 wc_uchar f = (*(++p) & 0x7f) >> 2;
553 return wc_is_ucs_hangul(wtf_to_wcs16(p));
554 } else if (*p == WTF_C_WCS32W) {
555 wc_uchar f = (*(++p) & 0x7f) >> 4;
556 if (f == WC_F_UCS_TAG)
557 return wc_is_ucs_hangul(wc_ucs_tag_to_ucs(wtf_to_wcs32(p)));
564 wtf_conv_fit(char *s, wc_ces ces)
570 wc_bool pre_conv, ucs_conv;
572 if (ces == WC_CES_WTF || ces == WC_CES_US_ASCII)
575 for (p = (wc_uchar *)s; *p && *p < 0x80; p++)
580 os = Strnew_size(strlen(s));
581 if (p > (wc_uchar *)s)
582 Strcopy_charp_n(os, s, (int)(p - (wc_uchar *)s));
584 major_ces = wtf_major_ces;
585 pre_conv = WcOption.pre_conv;
586 ucs_conv = WcOption.ucs_conv;
588 WcOption.pre_conv = WC_TRUE;
589 WcOption.ucs_conv = WC_TRUE;
592 wtf_push(os, cc.ccs, cc.code);
594 wtf_major_ces = major_ces;
595 WcOption.pre_conv = pre_conv;
596 WcOption.ucs_conv = ucs_conv;