2 * "123" "ABC" ¤Î¤è¤¦¤Ê¼½ñ¤Ë¤Î¤Ã¤Æ¤Ê¤¤
3 * ʸ»úÎó¤ËÂФ¹¤ëÌä¹ç¤»¤Î¾ì¹ç¤ÏÁ´¤Æ¤Î¸õÊä¤ò¤³¤³¤ÇÀ¸À®¤¹¤ë
4 * ¾åµ¤Î¾¤Ë͹ÊØÈÖ¹æ¤Ø¤Î¥¢¥¯¥»¥¹¤â¹Ô¤Ê¤¦
6 * Copyright (C) 2001-2005 TABATA Yusuke
7 * Copyright (C) 2004-2005 YOSHIDA Yuichi
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include <anthy/anthy.h> /* for ANTHY_*_ENCODING */
29 #include <anthy/conf.h>
30 #include <anthy/xstr.h>
31 #include <anthy/xchar.h>
36 static struct seq_ent unkseq_ent;/*̤ÃÎʸ»úÎ󤿤Ȥ¨¤Ð±Ñʸ»úÎó¤È¤«*/
37 static struct seq_ent num_ent;/*¿ô»ú¤Ê¤É*/
38 static struct seq_ent sep_ent;/*¥»¥Ñ¥ì¡¼¥¿¤Ê¤É¡£*/
40 static wtype_t wt_num;
42 static xchar narrow_wide_tab[]= {WIDE_0, WIDE_1, WIDE_2,
43 WIDE_3, WIDE_4, WIDE_5,
44 WIDE_6, WIDE_7, WIDE_8, WIDE_9};
45 static int kj_num_tab[]={KJ_0, KJ_1, KJ_2, KJ_3, KJ_4,
46 KJ_5, KJ_6, KJ_7, KJ_8, KJ_9};
55 pushback_place_name(struct zipcode_line *zl, char *pn)
60 zl->strs = realloc(zl->strs, sizeof(xstr *) * (zl->nr + 1));
61 zl->strs[zl->nr] = anthy_cstr_to_xstr(pn, ANTHY_EUC_JP_ENCODING);
65 /* ͹ÊØÈֹ漽ñ¤ò¥Ñ¡¼¥¹¤·¤Æ¥¹¥Ú¡¼¥¹¶èÀÚ¤ê¤ò¸¡½Ð¤¹¤ë */
67 parse_zipcode_line(struct zipcode_line *zl, char *ln)
79 } else if (*ln == ' ') {
82 pushback_place_name(zl, buf);
90 pushback_place_name(zl, buf);
93 /* ͹ÊØÈֹ漽ñ¤«¤éõ¤¹ */
95 search_zipcode_dict(struct zipcode_line *zl, xstr* xs)
105 fp = fopen(anthy_conf_get_str("ZIPDICT_EUC"), "r");
110 /* Ⱦ³Ñ¡¢Á´³Ñ¤òµÛ¼ý¤¹¤ë */
111 temp = anthy_xstr_wide_num_to_num(xs);
112 index = anthy_xstr_to_cstr(temp, 0);
116 while (fgets(buf, 1000, fp)) {
117 /* 3ʸ»ú¤Î͹ÊØÈֹ椬7ʸ»ú¤Î͹ÊØÈÖ¹æ¤ÎƬ¤Ë¥Þ¥Ã¥Á¤·¤Ê¤¤¤è¤¦¤Ë */
118 if (!strncmp(buf, index, len) && buf[len] == ' ') {
120 buf[strlen(buf) - 1] = 0;
121 parse_zipcode_line(zl, &buf[len + 1]);
124 anthy_free_xstr(temp); /* ¥á¥â¥ê¥ê¡¼¥¯¤Î½¤Àµ */
129 /* ͹ÊØÈֹ漽ñ¤Î¾ðÊó¤ò²òÊü¤¹¤ë */
131 free_zipcode_line(struct zipcode_line *zl)
134 for (i = 0; i < zl->nr; i++) {
135 anthy_free_xstr(zl->strs[i]);
141 gen_zipcode(xstr* xs, xstr *dest, int nth)
143 struct zipcode_line zl;
145 /* ͹ÊØÈֹ漽ñ¤«¤éÃÏ̾¤òÆɤ߼è¤ë */
146 search_zipcode_dict(&zl, xs);
150 dest->len = zl.strs[nth]->len;
151 dest->str = anthy_xstr_dup_str(zl.strs[nth]);
152 free_zipcode_line(&zl);
155 free_zipcode_line(&zl);
162 /* Ⱦ³Ñ¤Î¿ô»ú¤«¤éÁ´³Ñ¤Î¿ô»ú¤òµá¤á¤ë */
164 narrow_num_to_wide_num(xchar xc)
166 if (xc > '9' || xc < '0') {
169 return narrow_wide_tab[(int)(xc - '0')];
172 /* Á´³Ñ¤Î¿ô»ú¤«¤éȾ³Ñ¤Î¿ô»ú¤òµá¤á¤ë */
174 wide_num_to_narrow_num(xchar xc)
177 for (i = 0; i < 10; i++) {
178 if (xc == narrow_wide_tab[i]) {
185 * °ì·å¤ÎÀ°¿ô¤ò´Á¿ô»ú¤ËÊÑ´¹¤¹¤ë
190 if (n > 9 || n < 1) {
193 return kj_num_tab[n];
197 * 4·åʬ¤ÎÀ°¿ô¤ò´Á»úʸ»úÎó¤È¤·¤Æ¤òÀ¸À®¤¹¤ë
200 compose_num_component(xstr *xs, long long num)
203 int a[4] = { 0 , KJ_10, KJ_100, KJ_1000};
204 for (i = 0; i < 4; i++) {
205 n[i] = num-(num/10)*10;
208 /* 10,100,1000¤Î°Ì */
209 for (i = 3; i > 0; i--) {
212 anthy_xstrappend(xs, get_kj_num(n[i]));
214 anthy_xstrappend(xs, a[i]);
219 anthy_xstrappend(xs, get_kj_num(n[0]));
223 /** ´Á¿ô»ú¤Îʸ»úÎó¤òºî¤ë */
225 gen_kanji_num(long long num, xstr *dest)
229 if (num < 1 || num >= 10000000000000000LL) {
232 /* 4·å¤º¤ÄÇÛÎón¤Ë¤Ä¤á¤ë */
233 for (i = 0; i < 10; i ++) {
234 n[i] = num-(num/10000)*10000;
242 compose_num_component(dest, n[3]);
243 anthy_xstrappend(dest, KJ_1000000000000);
247 compose_num_component(dest, n[2]);
248 anthy_xstrappend(dest, KJ_100000000);
252 compose_num_component(dest, n[1]);
253 anthy_xstrappend(dest, KJ_10000);
256 compose_num_component(dest, n[0]);
261 get_nr_zipcode(xstr* xs)
263 struct zipcode_line zl;
265 if (xs->len != 3 && xs->len != 7) {
268 /* ͹ÊØÈֹ漽ñ¤«¤éÃÏ̾¤òÆɤ߼è¤ë */
269 search_zipcode_dict(&zl, xs);
272 free_zipcode_line(&zl);
277 get_nr_num_ents(long long num)
279 if (num > 0 && num < 10000000000000000LL) {
281 /* ¥¢¥é¥Ó¥¢¿ô»ú(¤½¤Î¤Þ¤Þ)¡¢¥¢¥é¥Ó¥¢¿ô»ú(Á´³ÑȾ³ÑÀÚÂؤ¨)¡¢
282 ´Á¿ô»ú¡¢3·å¶èÀÚ¤ê(Á´³Ñ¡¢È¾³Ñ) */
285 /* ¥¢¥é¥Ó¥¢¿ô»ú(¤½¤Î¤Þ¤Þ)¡¢Á´³ÑȾ³ÑÀÚÂؤ¨¡¢´Á¿ô»ú */
289 /* ¥¢¥é¥Ó¥¢¿ô»ú(¤½¤Î¤Þ¤Þ)¡¢Á´³ÑȾ³ÑÀÚÂؤ¨ */
296 * ¤¤¤¯¤Ä¤Î¹çÀ®¤Î¥¨¥ó¥È¥ê¡¼¤¬¤¢¤ë¤«
299 anthy_get_nr_dic_ents_of_ext_ent(seq_ent_t se, xstr *xs)
301 if (se == &unkseq_ent) {
304 if (anthy_get_xstr_type(xs) & (XCT_NUM|XCT_WIDENUM)) {
305 long long num = anthy_xstrtoll(xs);
306 return get_nr_num_ents(num) + get_nr_zipcode(xs);
311 /* ʸ»úÎó¤ÎÁ´³ÑȾ³Ñ¤ò¸ò´¹¤¹¤ë */
313 toggle_wide_narrow(xstr *dest, xstr *src)
316 dest->len = src->len;
317 dest->str = anthy_xstr_dup_str(src);
318 f = anthy_get_xstr_type(src) & XCT_WIDENUM;
319 for (i = 0; i < dest->len; i++) {
321 dest->str[i] = wide_num_to_narrow_num(src->str[i]);
323 dest->str[i] = narrow_num_to_wide_num(src->str[i]);
328 /* 3·å¤Ë¶èÀڤä¿¿ô»ú¤òÀ¸À®¤¹¤ë */
330 gen_separated_num(long long num, xstr *dest, int full)
332 int width = 0, dot_count;
341 for (tmp = num; tmp != 0; tmp /= 10) {
345 dot_count = (width - 1) / 3;
346 /* ³ÊǼ¤¹¤ë¤Î¤ËɬÍפÊʸ»úÎó¤òÍÑ°Õ¤¹¤ë */
347 dest->len = dot_count + width;
348 dest->str = malloc(sizeof(xchar)*dest->len);
350 /* ±¦¤Î·å¤«¤é½ç¤Ë·è¤á¤Æ¤¤¤¯ */
351 for (i = 0, pos = dest->len - 1; i < width; i++, pos --) {
354 if (i > 0 && (i % 3) == 0) {
356 dest->str[pos] = WIDE_COMMA;
358 dest->str[pos] = ',';
364 dest->str[pos] = narrow_wide_tab[n];
367 dest->str[pos] = 48 + n;
375 * nth¸Ä¤á¤Î¸õÊä¤ò¼è¤ê½Ð¤¹
378 anthy_get_nth_dic_ent_str_of_ext_ent(seq_ent_t se, xstr *xs,
381 dest->str = NULL; /* ÉÔÀµ¤Ê¥á¥â¥ê¥¢¥¯¥»¥¹¤ä¥á¥â¥ê¤Î¿½Å²òÊü¤ò¤¹¤ë¥Ð¥°¤Î½¤Àµ */
386 dest->str = anthy_xstr_dup_str(xs);
389 if (se == &unkseq_ent) {
392 /* Á´³Ñ¡¢È¾³Ñ¤Î¥È¥°¥ë */
396 if (anthy_get_xstr_type(xs) & (XCT_NUM|XCT_WIDENUM)) {
397 long long num = anthy_xstrtoll(xs);
398 const int base_ents = get_nr_num_ents(num); /* £³·å͹ÊØÈÖ¹æ¤Ø¤ÎÂбþ */
399 /* ´Á¿ô»ú¡¢¥¢¥é¥Ó¥¢¿ô»ú¡¢Á´³ÑȾ³ÑÀÚÂؤ¨ */
402 /* Á´³ÑȾ³Ñ¤òÆþ¤ì´¹¤¨¤¿¤â¤Î */
403 toggle_wide_narrow(dest, xs);
407 if (!gen_kanji_num(num, dest)) {
412 /* 3·å¤Ç¶èÀڤä¿¿ô»ú */
413 if (!gen_separated_num(num, dest, 0)) {
418 /* 3·å¤Ç¶èÀڤä¿¿ô»ú(Á´³Ñ) */
419 if (!gen_separated_num(num, dest, 1)) {
425 if (base_ents <= nth) { /* £³·å͹ÊØÈÖ¹æ¤Ø¤ÎÂбþ */
426 if (xs->len == 3 || xs->len == 7) {
427 if (!gen_zipcode(xs, dest, nth - base_ents)) { /* £³·å͹ÊØÈÖ¹æ¤Ø¤ÎÂбþ */
440 anthy_get_ext_seq_ent_indep(struct seq_ent *se)
442 if (se == &num_ent || se == &unkseq_ent) {
450 anthy_get_ext_seq_ent_ct(struct seq_ent *se, int pos, int ct)
452 if (anthy_get_ext_seq_ent_pos(se, pos) && ct == CT_NONE) {
453 /* Éʻ줬¹ç¤Ã¤Æ¤¤¤Æ¤«¤Ä̵³èÍѤξì¹ç
454 (ext_ent¤Ï³èÍѤ·¤Ê¤¤) */
462 anthy_get_ext_seq_ent_pos(struct seq_ent *se, int pos)
464 /* ext_ent¤Ï̾»ì¤Î¤ß */
465 if (se == &num_ent && pos == POS_NOUN) {
468 if ((se == &unkseq_ent) && pos == POS_NOUN) {
475 * ¼½ñ¤Ë¤Î¤Ã¤Æ¤¤¤Ê¤¤¥·¡¼¥±¥ó¥¹¤ò²òÀÏ
478 anthy_get_ext_seq_ent_from_xstr(xstr *x, int is_reverse)
480 int t = anthy_get_xstr_type(x);
482 /* ¿ô»ú¤Î¤ß¤Ç¹½À®¤µ¤ì¤Æ¤¤¤ì¤Ð num_ent */
483 if (t & (XCT_NUM | XCT_WIDENUM)) {
494 /* µÕÊÑ´¹Ãæ¤Ï´Á»ú¸õÊä¤Ïºî¤é¤Ê¤¤ */
500 /* ¼½ñ¤Ë¤Î¤Ã¤Æ¤Ê¤¯¤Æ1ʸ»ú¤Ê¤é¥»¥Ñ¥ì¡¼¥¿ */
507 anthy_get_nth_dic_ent_wtype_of_ext_ent(xstr *xs, int nth,
512 type = anthy_get_xstr_type(xs);
513 if (type & (XCT_NUM | XCT_WIDENUM)) {
517 if (type & XCT_KATA) {
518 *wt = anthy_get_wtype(POS_NOUN, COS_NONE, SCOS_NONE, CC_NONE,
526 anthy_get_nth_dic_ent_freq_of_ext_ent(struct seq_ent *se, int nth)
534 anthy_get_ext_seq_ent_wtype(struct seq_ent *se, wtype_t w)
536 if (se == &num_ent) {
537 if (anthy_wtype_include(w, wt_num)) {
543 if (anthy_wtype_get_pos(w) == POS_NOUN &&
544 anthy_wtype_get_cos(w) == COS_NONE &&
545 anthy_wtype_get_scos(w) == SCOS_NONE) {
546 /* ̾»ì¡¢ÉûÉÊ»ì¤Ê¤·¡¢Éû¡¹ÉÊ»ì̵¤·¤Ë¥Þ¥Ã¥Á */
553 anthy_init_ext_ent(void)
556 unkseq_ent.seq_type = 0;
557 unkseq_ent.nr_dic_ents = 0;
558 num_ent.seq_type = 0;
559 num_ent.nr_dic_ents = 0;
560 sep_ent.seq_type = 0;
561 sep_ent.nr_dic_ents = 0;
563 wt_num = anthy_init_wtype_by_name("¿ô»ì");