2 * Comments in this program are written in Japanese,
3 * because this program is a Japanese input method.
4 * (many Japanese gramatical terms will appear.)
6 * Kana-Kanji conversion engine Anthy.
7 * ²¾Ì¾´Á»úÊÑ´¹¥¨¥ó¥¸¥óAnthy(¥¢¥ó¥·¡¼)
9 * Funded by IPA̤Ƨ¥½¥Õ¥È¥¦¥§¥¢ÁϤ»ö¶È 2001 9/22
10 * Funded by IPA̤Ƨ¥½¥Õ¥È¥¦¥§¥¢ÁϤ»ö¶È 2005
11 * Copyright (C) 2000-2007 TABATA Yusuke, UGAWA Tomoharu
12 * Copyright (C) 2004-2006 YOSHIDA Yuichi
13 * Copyright (C) 2000-2007 KMC(Kyoto University Micro Computer Club)
14 * Copyright (C) 2001-2002 TAKAI Kosuke, Nobuoka Takahiro
18 This library is free software; you can redistribute it and/or
19 modify it under the terms of the GNU Lesser General Public
20 License as published by the Free Software Foundation; either
21 version 2 of the License, or (at your option) any later version.
23 This library is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 Lesser General Public License for more details.
28 You should have received a copy of the GNU Lesser General Public
29 License along with this library; if not, write to the Free Software
30 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 * Anthy¤ÎÊÑ´¹µ¡Ç½¤Ï¥é¥¤¥Ö¥é¥ê¤È¤·¤Æ¹½À®¤µ¤ì¤Æ¤ª¤ê¡¢¤³¤Î
34 * ¥Õ¥¡¥¤¥ë¤Ë¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÄ󶡤¹¤ë´Ø¿ô(API)¤¬µ½Ò¤µ¤ì¤Æ¤¤¤Þ¤¹¡£
36 * ¥é¥¤¥Ö¥é¥ê¤ÎÄ󶡤¹¤ë´Ø¿ô¤Ï²¼µ¤Î¤è¤¦¤Ê¤â¤Î¤¬¤¢¤ê¤Þ¤¹
37 * (1)¥é¥¤¥Ö¥é¥êÁ´ÂΤνé´ü²½¡¢½ªÎ»¡¢ÀßÄê
38 * (2)ÊÑ´¹¥³¥ó¥Æ¥¥¹¥È¤ÎºîÀ®¡¢²òÊü
39 * (3)ÊÑ´¹¥³¥ó¥Æ¥¥¹¥È¤ËÂФ¹¤ëʸ»úÎó¤ÎÀßÄꡢʸÀáŤÎÊѹ¹¡¢¸õÊä¤Î¼èÆÀÅù
41 * ¥¤¥ó¥¿¡¼¥Õ¥§¥¤¥¹¤Ë´Ø¤·¤Æ¤Ï doc/LIB¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤
42 * Anthy¤Î¥³¡¼¥É¤òÍý²ò¤·¤è¤¦¤È¤¹¤ë¾ì¹ç¤Ï
43 * doc/GLOSSARY ¤ÇÍѸì¤òÇÄ°®¤¹¤ë¤³¤È¤ò´«¤á¤Þ¤¹
49 #include <anthy/dic.h>
50 #include <anthy/splitter.h>
51 #include <anthy/conf.h>
52 #include <anthy/ordering.h>
53 #include <anthy/logger.h>
54 #include <anthy/record.h>
55 #include <anthy/anthy.h>
56 #include <anthy/record.h>
57 #include <anthy/xchar.h> /* for KK_VU */
62 /** Anthy¤Î½é´ü²½¤¬´°Î»¤·¤¿¤«¤É¤¦¤«¤Î¥Õ¥é¥° */
63 static int is_init_ok;
64 /** ¥³¥ó¥Æ¥¥¹¥ÈÀ¸À®»þ¤Î¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° */
65 static int default_encoding;
67 static char *history_file;
69 /** (API) Á´ÂΤνé´ü²½ */
75 /* 2ÅÙ½é´ü²½¤·¤Ê¤¤¤è¤¦¤Ë */
79 /* ³Æ¥µ¥Ö¥·¥¹¥Æ¥à¤ò½ç¤Ë½é´ü²½¤¹¤ë */
80 if (anthy_init_dic()) {
81 anthy_log(0, "Failed to initialize dictionary.\n");
85 if (anthy_init_splitter()) {
86 anthy_log(0, "Failed to init splitter.\n");
89 anthy_init_contexts();
90 anthy_init_personality();
91 anthy_infosort_init();
92 anthy_relation_init();
95 default_encoding = ANTHY_EUC_JP_ENCODING;
98 hfn = getenv("ANTHY_HISTORY_FILE");
100 history_file = strdup(hfn);
107 /** (API) Á´¥Ç¡¼¥¿¤Î²òÊü */
114 anthy_quit_contexts();
115 anthy_quit_personality();
116 anthy_quit_splitter();
117 /* ¿¤¯¤Î¥Ç¡¼¥¿¹½Â¤¤Ï¤³¤³¤Çallocator¤Ë¤è¤Ã¤Æ²òÊü¤µ¤ì¤ë */
128 /** (API) ÀßÄê¹àÌܤξå½ñ¤ */
130 anthy_conf_override(const char *var, const char *val)
132 anthy_do_conf_override(var, val);
135 /** (API) personality¤ÎÀßÄê */
137 anthy_set_personality(const char *id)
139 return anthy_do_set_personality(id);
142 /** (API) ÊÑ´¹context¤ÎºîÀ® */
143 struct anthy_context *
144 anthy_create_context(void)
149 return anthy_do_create_context(default_encoding);
152 /** (API) ÊÑ´¹context¤Î¥ê¥»¥Ã¥È */
154 anthy_reset_context(struct anthy_context *ac)
156 anthy_do_reset_context(ac);
159 /** (API) ÊÑ´¹context¤Î²òÊü */
161 anthy_release_context(struct anthy_context *ac)
163 anthy_do_release_context(ac);
167 * ºÆÊÑ´¹¤¬É¬Íפ«¤É¤¦¤«¤ÎȽÄê
170 need_reconvert(struct anthy_context *ac, xstr *xs)
174 if (ac->reconversion_mode == ANTHY_RECONVERT_ALWAYS) {
177 if (ac->reconversion_mode == ANTHY_RECONVERT_DISABLE) {
181 for (i = 0; i < xs->len; ++i) {
182 xchar xc = xs->str[i];
183 int type = anthy_get_xchar_type(xc);
185 /* ¤³¤ì¤é¤Îʸ»ú¼ï¤Î¾ì¹ç¤ÏµÕÊÑ´¹¤¹¤ë
186 * ¡Ö¥ô¡×¤Ï¥Õ¥í¥ó¥È¥¨¥ó¥É¤¬Ê¿²¾Ì¾¥â¡¼¥É¤Îʸ»úÎó¤È¤·¤ÆÁ÷¤Ã¤Æ¤¯¤ë¤Î¤Ç¡¢
187 * µÕÊÑ´¹¤ÎÂоݤȤϤ·¤Ê¤¤
189 if (!(type & (XCT_HIRA | XCT_SYMBOL | XCT_NUM |
190 XCT_WIDENUM | XCT_OPEN | XCT_CLOSE |
200 /** (API) ÊÑ´¹Ê¸»úÎó¤ÎÀßÄê */
202 anthy_set_string(struct anthy_context *ac, const char *s)
212 anthy_do_reset_context(ac);
214 /* ¼½ñ¥»¥Ã¥·¥ç¥ó¤Î³«»Ï */
215 if (!ac->dic_session) {
216 ac->dic_session = anthy_dic_create_session();
217 if (!ac->dic_session) {
222 anthy_dic_activate_session(ac->dic_session);
223 /* ÊÑ´¹¤ò³«»Ï¤¹¤ëÁ°¤Ë¸Ä¿Í¼½ñ¤òreload¤¹¤ë */
224 anthy_reload_record();
226 xs = anthy_cstr_to_xstr(s, ac->encoding);
228 if (!need_reconvert(ac, xs)) {
230 retval = anthy_do_context_set_str(ac, xs, 0);
232 /* ´Á»ú¤ä¥«¥¿¥«¥Ê¤¬º®¤¸¤Ã¤Æ¤¤¤¿¤éºÆÊÑ´¹¤·¤Æ¤ß¤ë */
233 struct anthy_conv_stat stat;
237 /* Í¿¤¨¤é¤ì¤¿Ê¸»úÎó¤ËÊÑ´¹¤ò¤«¤±¤ë */
238 retval = anthy_do_context_set_str(ac, xs, 1);
240 /* ³ÆʸÀá¤ÎÂè°ì¸õÊä¤ò¼èÆÀ¤·¤ÆÊ¿²¾Ì¾Îó¤òÆÀ¤ë */
241 anthy_get_stat(ac, &stat);
243 for (i = 0; i < stat.nr_segment; ++i) {
244 seg = anthy_get_nth_segment(&ac->seg_list, i);
245 hira_xs = anthy_xstrcat(hira_xs, &seg->cands[0]->str);
247 /* ²þ¤á¤ÆÊÑ´¹¤ò¹Ô¤Ê¤¦ */
248 anthy_release_segment_list(ac);
249 retval = anthy_do_context_set_str(ac, hira_xs, 0);
250 anthy_free_xstr(hira_xs);
257 /** (API) ʸÀáŤÎÊѹ¹ */
259 anthy_resize_segment(struct anthy_context *ac, int nth, int resize)
261 anthy_dic_activate_session(ac->dic_session);
262 anthy_do_resize_segment(ac, nth, resize);
265 /** (API) ÊÑ´¹¤Î¾õÂ֤μèÆÀ */
267 anthy_get_stat(struct anthy_context *ac, struct anthy_conv_stat *s)
269 s->nr_segment = ac->seg_list.nr_segments;
273 /** (API) ʸÀá¤Î¾õÂ֤μèÆÀ */
275 anthy_get_segment_stat(struct anthy_context *ac, int n,
276 struct anthy_segment_stat *s)
279 seg = anthy_get_nth_segment(&ac->seg_list, n);
281 s->nr_candidate = seg->nr_cands;
282 s->seg_len = seg->str.len;
289 get_special_candidate_index(int nth, struct seg_ent *seg)
296 if (nth == NTH_UNCONVERTED_CANDIDATE ||
297 nth == NTH_HALFKANA_CANDIDATE) {
300 if (nth == NTH_KATAKANA_CANDIDATE) {
302 } else if (nth == NTH_HIRAGANA_CANDIDATE) {
305 for (i = 0; i < seg->nr_cands; i++) {
306 if (anthy_get_xstr_type(&seg->cands[i]->str) & mask) {
310 return NTH_UNCONVERTED_CANDIDATE;
313 /** (API) ʸÀá¤Î¼èÆÀ */
315 anthy_get_segment(struct anthy_context *ac, int nth_seg,
316 int nth_cand, char *buf, int buflen)
323 if (nth_seg < 0 || nth_seg >= ac->seg_list.nr_segments) {
326 seg = anthy_get_nth_segment(&ac->seg_list, nth_seg);
328 /* ʸÀᤫ¤é¸õÊä¤ò¼è¤ê½Ð¤¹ */
331 nth_cand = get_special_candidate_index(nth_cand, seg);
333 if (nth_cand == NTH_HALFKANA_CANDIDATE) {
334 xstr *xs = anthy_xstr_hira_to_half_kata(&seg->str);
335 p = anthy_xstr_to_cstr(xs, ac->encoding);
337 } else if (nth_cand == NTH_UNCONVERTED_CANDIDATE) {
338 /* ÊÑ´¹Á°¤Îʸ»úÎó¤ò¼èÆÀ¤¹¤ë */
339 p = anthy_xstr_to_cstr(&seg->str, ac->encoding);
340 } else if (nth_cand >= 0 && nth_cand < seg->nr_cands) {
341 p = anthy_xstr_to_cstr(&seg->cands[nth_cand]->str, ac->encoding);
347 /* ¥Ð¥Ã¥Õ¥¡¤Ë½ñ¤¹þ¤à */
353 if (len + 1 > buflen) {
354 /* ¥Ð¥Ã¥Õ¥¡¤¬Â¤ê¤Þ¤»¤ó */
363 /* ¤¹¤Ù¤Æ¤ÎʸÀ᤬¥³¥ß¥Ã¥È¤µ¤ì¤¿¤«check¤¹¤ë */
365 commit_all_segment_p(struct anthy_context *ac)
369 for (i = 0; i < ac->seg_list.nr_segments; i++) {
370 se = anthy_get_nth_segment(&ac->seg_list, i);
371 if (se->committed < 0) {
378 /** (API) ʸÀá¤Î³ÎÄê */
380 anthy_commit_segment(struct anthy_context *ac, int s, int c)
386 if (s < 0 || s >= ac->seg_list.nr_segments) {
389 if (commit_all_segment_p(ac)) {
390 /* ¤¹¤Ç¤ËÁ´¤Æ¤Î¥»¥°¥á¥ó¥È¤¬¥³¥ß¥Ã¥È¤µ¤ì¤Æ¤¤¤ë */
394 anthy_dic_activate_session(ac->dic_session);
395 seg = anthy_get_nth_segment(&ac->seg_list, s);
397 c = get_special_candidate_index(c, seg);
399 if (c == NTH_UNCONVERTED_CANDIDATE) {
401 * ÊÑ´¹Á°¤Îʸ»úÎ󤬥³¥ß¥Ã¥È¤µ¤ì¤¿¤Î¤Ç¡¤¤½¤ì¤ËÂбþ¤¹¤ë¸õÊä¤ÎÈÖ¹æ¤òõ¤¹
404 for (i = 0; i < seg->nr_cands; i++) {
405 if (!anthy_xstrcmp(&seg->str, &seg->cands[i]->str)) {
410 if (c < 0 || c >= seg->nr_cands) {
415 if (commit_all_segment_p(ac)) {
416 /* º£¡¢¤¹¤Ù¤Æ¤Î¥»¥°¥á¥ó¥È¤¬¥³¥ß¥Ã¥È¤µ¤ì¤¿ */
417 anthy_proc_commit(&ac->seg_list, &ac->split_info);
419 anthy_save_history(history_file, ac);
424 /** (API) ͽ¬¤·¤Æ¤Û¤·¤¤Ê¸»úÎó¤ÎÀßÄê */
426 anthy_set_prediction_string(struct anthy_context *ac, const char* s)
431 anthy_dic_activate_session(ac->dic_session);
432 /* ͽ¬¤ò³«»Ï¤¹¤ëÁ°¤Ë¸Ä¿Í¼½ñ¤òreload¤¹¤ë */
433 anthy_reload_record();
436 xs = anthy_cstr_to_xstr(s, ac->encoding);
438 retval = anthy_do_set_prediction_str(ac, xs);
445 /** (API) ͽ¬ÊÑ´¹¤Î¾õÂ֤μèÆÀ */
447 anthy_get_prediction_stat(struct anthy_context *ac, struct anthy_prediction_stat * ps)
449 ps->nr_prediction = ac->prediction.nr_prediction;
453 /** (API) ͽ¬ÊÑ´¹¤Î¸õÊä¤Î¼èÆÀ */
455 anthy_get_prediction(struct anthy_context *ac, int nth, char* buf, int buflen)
457 struct prediction_cache* prediction = &ac->prediction;
458 int nr_prediction = prediction->nr_prediction;
462 if (nth < 0 || nr_prediction <= nth) {
466 p = anthy_xstr_to_cstr(prediction->predictions[nth].str, ac->encoding);
468 /* ¥Ð¥Ã¥Õ¥¡¤Ë½ñ¤¹þ¤à */
474 if (len + 1 > buflen) {
484 /** (API) ͽ¬¤Î·ë²Ì¤ò³ÎÄꤹ¤ë
487 anthy_commit_prediction(struct anthy_context *ac, int nth)
489 struct prediction_cache* pc = &ac->prediction;
490 if (nth < 0 || nth >= pc->nr_prediction) {
493 anthy_do_commit_prediction(pc->predictions[nth].src_str,
494 pc->predictions[nth].str);
500 anthy_print_context(struct anthy_context *ac)
502 anthy_do_print_context(ac, default_encoding);
505 /** (API) Anthy ¥é¥¤¥Ö¥é¥ê¤Î¥Ð¡¼¥¸¥ç¥ó¤òɽ¤¹Ê¸»úÎó¤òÊÖ¤¹
506 * ¶¦Í¥é¥¤¥Ö¥é¥ê¤Ç¤Ï³°ÉôÊÑ¿ô¤Î¥¨¥¯¥¹¥Ý¡¼¥È¤Ï¹¥¤Þ¤·¤¯¤Ê¤¤¤Î¤Ç´Ø¿ô¤Ë¤·¤Æ¤¢¤ë
509 anthy_get_version_string (void)
513 #else /* just in case */
520 anthy_context_set_encoding(struct anthy_context *ac, int encoding)
523 return ANTHY_EUC_JP_ENCODING;
525 if (encoding == ANTHY_UTF8_ENCODING ||
526 encoding == ANTHY_EUC_JP_ENCODING) {
527 ac->encoding = encoding;
534 anthy_set_reconversion_mode(anthy_context_t ac, int mode)
537 return ANTHY_RECONVERT_AUTO;
539 if (mode == ANTHY_RECONVERT_AUTO ||
540 mode == ANTHY_RECONVERT_DISABLE ||
541 mode == ANTHY_RECONVERT_ALWAYS) {
542 ac->reconversion_mode = mode;
544 return ac->reconversion_mode;