2 * AnthyÆâÉô¤Ç»È¤¦Ê¸»úÎó¤Î½èÍý
3 * typedef struct xstr_ {
7 * malloc(0);¤Î°ÕÌ£¤Ï¹Í¤¨¤Ê¤¤¤Ç0ʸ»ú¤Îʸ»úÎó¤ò°·¤¨¤ë¤è¤¦¤Ê
8 * ¥³¡¼¥Ç¥£¥ó¥°¤ò¤¹¤ë¡£free(0)¤ÏÎɤ¤¡£
10 * ¥Ç¥Õ¥©¥ë¥È¤ÎÀßÄê¤Ç¤Ï
11 * cstr¤ÏC¤ÎÉáÄ̤ÎEUCʸ»úÎó
13 * Copyright (C) 2000-2007 TABATA Yusuke
17 This library is free software; you can redistribute it and/or
18 modify it under the terms of the GNU Lesser General Public
19 License as published by the Free Software Foundation; either
20 version 2 of the License, or (at your option) any later version.
22 This library is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 Lesser General Public License for more details.
27 You should have received a copy of the GNU Lesser General Public
28 License along with this library; if not, write to the Free Software
29 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36 /* for ANTHY_*_ENCODING */
37 #include <anthy/anthy.h>
39 #include <anthy/xstr.h>
40 #include <anthy/xchar.h>
41 #include "diclib_inner.h"
43 /* ²èÌ̤˽ÐÎϤ¹¤ë¤È¤¤Î¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° */
44 static int print_encoding;
46 #define MAX_BYTES_PER_XCHAR 10
54 /** C¤Îʸ»úÎó¤ËÂбþ¤¹¤ëxstr¤ÎŤµ¤ò·×»»¤¹¤ë
57 xlengthofcstr(const char *c)
62 for (i = 0; i < len; i++) {
72 anthy_utf8_to_ucs4_xchar(const char *s, xchar *res)
74 const unsigned char *str = (const unsigned char *)s;
80 } else if (str[0] < 0xe0) {
83 } else if (str[0] < 0xf0) {
86 } else if (str[0] < 0xf8) {
89 } else if (str[0] < 0xfc) {
97 for (i = 1; i < len; i++) {
99 cur |= (str[0] & 0x3f);
103 return (const char *)str;
107 utf8_to_ucs4_xstr(const char *s)
109 const unsigned char *str = (const unsigned char *)s;
111 res.str = (xchar *)alloca(sizeof(xchar) * strlen(s));
116 str = (const unsigned char *)anthy_utf8_to_ucs4_xchar((const char *)str,
118 res.str[res.len] = cur;
121 return anthy_xstr_dup(&res);
125 put_xchar_to_utf8_str(xchar xc, char *buf_)
128 unsigned char *buf = (unsigned char *)buf_;
132 } else if (xc < 0x800) {
135 } else if (xc < 0x10000) {
138 } else if (xc < 0x200000) {
141 } else if (xc < 0x400000) {
148 for (i = len - 1; i > 0; i--) {
149 buf[i] = (xc & 0x3f) | 0x80;
158 ucs4_xstr_to_utf8(xstr *xs)
160 char *buf = alloca(xs->len * 6 + 1);
163 for (i = 0; i < xs->len; i++) {
164 xchar xc = xs->str[i];
165 put_xchar_to_utf8_str(xc, &buf[t]);
171 /** C¤Îʸ»úÎó¤òxstr¤ËÊѹ¹¤¹¤ë
174 anthy_cstr_to_xstr(const char *c, int encoding)
178 if (encoding == ANTHY_UTF8_ENCODING) {
179 return utf8_to_ucs4_xstr(c);
181 l = xlengthofcstr(c);
182 x = (xstr *)malloc(sizeof(struct xstr_));
187 x->str = malloc(sizeof(xchar)*l);
188 for (i = 0, j = 0; i < l; i++) {
193 unsigned char *p = (unsigned char *)&c[j];
194 x->str[i] = (p[1] | (p[0]<<8)) | 0x8080;
195 x->str[i] = anthy_euc_to_ucs(x->str[i]);
204 anthy_xstr_to_cstr(xstr *s, int encoding)
209 if (encoding == ANTHY_UTF8_ENCODING) {
210 return ucs4_xstr_to_utf8(s);
214 for (i = 0; i < s->len; i++) {
215 int ec = anthy_ucs_to_euc(s->str[i]);
220 p = (char *)malloc(l + 1);
223 for (i = 0; i < s->len; i++) {
224 int ec = anthy_ucs_to_euc(s->str[i]);
239 anthy_xstr_dup(xstr *s)
242 xstr *x = (xstr *)malloc(sizeof(xstr));
245 x->str = malloc(sizeof(xchar)*s->len);
249 for (i = 0; i < x->len; i++) {
250 x->str[i] = s->str[i];
256 anthy_xstr_dup_str(xstr *s)
261 c = malloc(sizeof(xchar)*s->len);
265 for (i = 0; i < s->len; i++) {
272 anthy_free_xstr(xstr *x)
283 anthy_free_xstr_str(xstr *x)
292 anthy_sputxchar(char *buf, xchar x, int encoding)
294 if (!xc_isprint(x)) {
298 if (encoding == ANTHY_UTF8_ENCODING) {
299 return put_xchar_to_utf8_str(x, buf);
301 x = anthy_ucs_to_euc(x);
308 buf[1] = 0x80 | (x & 255);
309 buf[0] = 0x80 | ((x>>8) & 255);
314 anthy_sputxstr(char *buf, xstr *x, int encoding)
316 char b[MAX_BYTES_PER_XCHAR];
318 for (i = 0; i < x->len; i++) {
319 anthy_sputxchar(b, x->str[i], encoding);
320 sprintf(&buf[l], "%s", b);
327 anthy_snputxstr(char *buf, int n, xstr *x, int encoding)
329 char b[MAX_BYTES_PER_XCHAR];
331 for (i = 0; i < x->len; i++) {
332 anthy_sputxchar(b, x->str[i], encoding);
333 if ((int)strlen(b) + l >= n) {
336 n -= sprintf(&buf[l], "%s", b);
343 anthy_putxchar(xchar x)
345 char buf[MAX_BYTES_PER_XCHAR];
346 if (!xc_isprint(x)) {
350 anthy_sputxchar(buf, x, print_encoding);
355 anthy_putxstr(xstr *x)
358 for (i = 0; i < x->len; i++) {
359 anthy_putxchar(x->str[i]);
364 anthy_putxstrln(xstr *x)
371 anthy_xstrcpy(xstr *dest, xstr *src)
375 dest->len = src->len;
376 for (i = 0; i < src->len; i++) {
377 dest->str[i] = src->str[i];
382 /* ÊÖ¤êÃͤÎÉä¹æ¤Ïstrcmp¤ÈƱ¤¸ */
384 anthy_xstrcmp(xstr *x1, xstr *x2)
387 if (x1->len < x2->len) {
392 for (i = 0 ; i < m ; i++) {
393 if (x1->str[i] < x2->str[i]) {
396 if (x1->str[i] > x2->str[i]) {
400 if (x1->len < x2->len) {
403 if (x1->len > x2->len) {
409 /* ÊÖ¤êÃͤÎÉä¹æ¤Ïstrncmp¤ÈƱ¤¸ */
411 anthy_xstrncmp(xstr *x1, xstr *x2, int n)
414 if (x1->len < x2->len) {
420 for (i = 0 ; i < m ; i++) {
421 if (x1->str[i] < x2->str[i]) {
424 if (x1->str[i] > x2->str[i]) {
428 if (x2->len <= n && x1->len < x2->len) {
431 if (x1->len <= n && x1->len > x2->len) {
439 anthy_xstrcat(xstr *s, xstr *a)
443 s = malloc(sizeof(xstr));
449 if (l < 1) { /* ¼½ñ¤â¤·¤¯¤Ï³Ø½¬¥Ç¡¼¥¿¤¬²õ¤ì¤Æ¤¤¤¿»þ¤ÎÂкö */
456 s->str = realloc(s->str, sizeof(xchar)*l);
457 for (i = 0; i < a->len; i ++) {
458 s->str[s->len+i] = a->str[i];
465 anthy_xstrappend(xstr *xs, xchar xc)
472 return anthy_xstrcat(xs, &p);
476 anthy_xstrtoll(xstr *x)
480 long long n = 0;/* ¿ô */
481 if (!x->len || x->len > 16) {
484 if (!anthy_get_xstr_type(x) & (XCT_NUM | XCT_WIDENUM)) {
487 for (i = 0; i < x->len; i++) {
490 n += anthy_xchar_to_num(c);
495 /** Á´³Ñ¤Î¿ô»ú¤òȾ³Ñ¤Ë¤¹¤ë
498 anthy_xstr_wide_num_to_num(xstr* src_xs)
502 dst_xs = anthy_xstr_dup(src_xs);
503 for (i = 0; i < src_xs->len; ++i) {
504 dst_xs->str[i] = anthy_xchar_wide_num_to_num(src_xs->str[i]);
509 /** Ê¿²¾Ì¾¤ò¥«¥¿¥«¥Ê¤ËÊÑ´¹¤¹¤ë
512 anthy_xstr_hira_to_kata(xstr *src_xs)
516 dst_xs = anthy_xstr_dup(src_xs);
518 for (i = 0 ,j = 0; i < dst_xs->len; i++, j++) {
519 /* ¡Ö¤¦¡«¡×¤Î¥Á¥§¥Ã¥¯ */
520 if (i < dst_xs->len - 1 && dst_xs->str[i] == HK_U
521 && dst_xs->str[i+1] == HK_DDOT) {
522 dst_xs->str[j] = KK_VU;/* ¥ô */
527 dst_xs->str[j] = dst_xs->str[i];
528 if ((anthy_ucs_to_euc(dst_xs->str[j]) & 0xff00) == 0xa400) {
529 /* ¤Ò¤é¤¬¤Ê¤À¤Ã¤¿¤é256¤¹ */
530 dst_xs->str[j] = anthy_ucs_to_euc(dst_xs->str[j]);
531 dst_xs->str[j] += 256;
532 dst_xs->str[j] = anthy_euc_to_ucs(dst_xs->str[j]);
540 anthy_xstr_hira_to_half_kata(xstr *src_xs)
542 int len = src_xs->len;
545 for (i = 0; i < src_xs->len; i++) {
546 const struct half_kana_table *tab = anthy_find_half_kana(src_xs->str[i]);
547 if (tab && tab->mod) {
551 xs = malloc(sizeof(xstr));
553 xs->str = malloc(sizeof(xchar) * len);
555 for (i = 0; i < src_xs->len; i++) {
556 const struct half_kana_table *tab = anthy_find_half_kana(src_xs->str[i]);
558 xs->str[j] = anthy_euc_to_ucs(tab->dst);
561 xs->str[j] = anthy_euc_to_ucs(tab->mod);
564 xs->str[j] = src_xs->str[i];
572 anthy_conv_half_wide(xstr *xs)
576 for (i = 0; i < xs->len; i++) {
577 if (!anthy_lookup_half_wide(xs->str[i])) {
581 res = anthy_xstr_dup(xs);
582 for (i = 0; i < xs->len; i++) {
583 res->str[i] = anthy_lookup_half_wide(xs->str[i]);
589 anthy_xstr_hash(xstr *xs)
593 for (i = 0 ;i < xs->len ;i++) {
605 conv_cstr(const char *s, int from, int to)
608 xstr *xs = anthy_cstr_to_xstr(s, from);
612 res = anthy_xstr_to_cstr(xs, to);
618 anthy_conv_euc_to_utf8(const char *s)
620 return conv_cstr(s, ANTHY_EUC_JP_ENCODING, ANTHY_UTF8_ENCODING);
624 anthy_conv_utf8_to_euc(const char *s)
626 return conv_cstr(s, ANTHY_UTF8_ENCODING, ANTHY_EUC_JP_ENCODING);
630 anthy_xstr_set_print_encoding(int encoding)
632 print_encoding = encoding;
636 anthy_init_xstr(void)
641 void anthy_quit_xstr(void)