2 * ÊÑ´¹¤äʸÀá¤Î¿½Ì¤Ê¤É¤ÎÁàºî¤¬¿Ê¹ÔÃæ¤Îʸ»úÎó¤ä¸õÊä¤Ê¤É¤ò
3 * ¤Þ¤È¤á¤ÆÊÑ´¹¥³¥ó¥Æ¥¥¹¥È¤È¸Æ¤Ö¡£
4 * Anthy¤Î¥³¥ó¥Æ¥¥¹¥È¤ËÂФ¹¤ëÁàºî¤ÏÁ´¤Æ¤³¤³¤«¤é¸Æ¤Ð¤ì¤ë¡£
5 * ³ÆÁàºî¤ËÂФ·¤ÆÊÑ´¹¥Ñ¥¤¥×¥é¥¤¥ó¤ÎɬÍפʥ⥸¥å¡¼¥ë¤ò½ç¤Ë¸Æ¤Ó¤À¤¹¡£
7 * personality¤Î´ÉÍý¤â¤¹¤ë¡£
9 * Funded by IPA̤Ƨ¥½¥Õ¥È¥¦¥§¥¢ÁϤ»ö¶È 2001 10/29
10 * Copyright (C) 2000-2007 TABATA Yusuke
12 * $Id: context.c,v 1.26 2002/11/17 14:45:47 yusuke Exp $
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 2 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include <sys/types.h>
37 #include <anthy/anthy.h>
38 #include <anthy/alloc.h>
39 #include <anthy/record.h>
40 #include <anthy/ordering.h>
41 #include <anthy/splitter.h>
42 #include <anthy/xstr.h>
46 static allocator context_ator;
50 * ̤ÀßÄê¤Î¤Þ¤ÞÊÑ´¹¤ò³«»Ï¤·¤¿¾ì¹ç: "default"
53 static char *current_personality;
56 #define HISTORY_FILE_LIMIT 100000
61 anthy_do_reset_context((struct anthy_context *)p);
64 /** ¸½ºß¤Îpersonality¤òÊÖ¤¹ */
68 if (!current_personality) {
69 current_personality = strdup("default");
70 anthy_dic_set_personality(current_personality);
72 return current_personality;
76 release_segment(struct seg_ent *s)
80 for (i = 0; i < s->nr_cands; i++) {
81 anthy_release_cand_ent(s->cands[i]);
92 /** ʸÀá¥ê¥¹¥È¤ÎºÇ¸å¤ÎÍ×ÁǤòºï½ü¤¹¤ë */
94 pop_back_seg_ent(struct anthy_context *c)
97 s = c->seg_list.list_head.prev;
98 if (s == &c->seg_list.list_head) {
101 s->prev->next = s->next;
102 s->next->prev = s->prev;
104 c->seg_list.nr_segments --;
108 /** nÈÖÌܤÎʸÀá¤Îʸ»ú¤Îindex¤òµá¤á¤ë */
110 get_nth_segment_index(struct anthy_context *c, int n)
113 for (i = 0, s = 0; i < c->str.len; i++) {
114 if (c->split_info.ce[i].seg_border) {
124 /** nÈÖÌܤÎʸÀá¤ÎŤµ¤òµá¤á¤ë¡¥
125 * segment_list¤¬¹½À®¤µ¤ì¤Æ¤¤¤Ê¤¯¤Æ¤â·×»»¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡¥
128 get_nth_segment_len(struct anthy_context *c, int sindex)
131 a = get_nth_segment_index(c, sindex);
136 for (i = a+1; !c->split_info.ce[i].seg_border; i++) {
142 /** metaword¤ÎÇÛÎó¤òºî¤ë */
144 make_metaword_array(struct anthy_context *ac,
149 for (i = se->len; i > 0; i--) {
151 /* ºÇ¸å¤ËÂùÅÀ¤È¤«¤¬¤Ä¤¤¤Æ¤¿¤éľÁ°¤Îʸ»ú¤´¤ÈÍ */
153 anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
154 /* FIXME ÂùÅÀ¤È¤«¤¬¤¢¤ê¤¨¤Ê¤¤Ê¤Ӥò¤·¤Æ¤¿¤é */
159 se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i);
160 if (!se->nr_metaword) {
163 /* metaword¤òÇÛÎó¤Ë¼è¤ê¹þ¤à */
164 se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword);
165 for (j = 0; j < se->nr_metaword; j++) {
166 se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j);
172 static struct seg_ent*
173 create_segment(struct anthy_context *ac, int from, int len,
174 struct meta_word* best_mw)
177 s = (struct seg_ent *)malloc(sizeof(struct seg_ent));
178 s->str.str = &ac->str.str[from];
184 s->best_seg_class = ac->split_info.ce[from].best_seg_class;
185 s->best_mw = best_mw;
186 make_metaword_array(ac, s);
190 /** ÊÑ´¹¥³¥ó¥Æ¥¥¹¥È¤ËʸÀá¤òÄɲ乤ë */
192 push_back_segment(struct anthy_context *ac, struct seg_ent *se)
194 se->next = &ac->seg_list.list_head;
195 se->prev = ac->seg_list.list_head.prev;
196 ac->seg_list.list_head.prev->next = se;
197 ac->seg_list.list_head.prev = se;
198 ac->seg_list.nr_segments ++;
202 /** splitter¤Ë¤è¤Ã¤ÆÇÛÎóÃæ¤ËÉÕ¤±¤é¤ì¤¿Ê¸Àᶳ¦¤Î¥Þ¡¼¥¯¤«¤é¡¢
203 * ʸÀá¤Î¥ê¥¹¥È¤ò¹½À®¤¹¤ë
206 create_segment_list(struct anthy_context *ac, int from, int to)
210 /* from ¤Î½ê¤Þ¤Ç¤Ë¤¤¤¯¤Ä¤ÎʸÀ᤬¤¢¤ë¤«Ä´¤Ù¤ë */
213 i += get_nth_segment_len(ac, n);
217 for (i = from; i < to; i++) {
218 if (ac->split_info.ce[i].seg_border) {
219 int len = get_nth_segment_len(ac, n);
220 s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw);
222 push_back_segment(ac, s);
228 /** ¥³¥ó¥Æ¥¥¹¥È¤òºî¤ë */
229 struct anthy_context *
230 anthy_do_create_context(int encoding)
232 struct anthy_context *ac;
233 char *p = get_personality();
239 ac = (struct anthy_context *)anthy_smalloc(context_ator);
242 ac->seg_list.nr_segments = 0;
243 ac->seg_list.list_head.prev = &ac->seg_list.list_head;
244 ac->seg_list.list_head.next = &ac->seg_list.list_head;
245 ac->split_info.word_split_info = NULL;
246 ac->split_info.ce = NULL;
247 ac->ordering_info.oc = NULL;
248 ac->dic_session = NULL;
249 ac->prediction.str.str = NULL;
250 ac->prediction.str.len = 0;
251 ac->prediction.nr_prediction = 0;
252 ac->prediction.predictions = NULL;
253 ac->encoding = encoding;
254 ac->reconversion_mode = ANTHY_RECONVERT_AUTO;
259 /** ¥³¥ó¥Æ¥¥¹¥È¤Î¥¢¥í¥±¡¼¥¿¤òºî¤ë */
261 anthy_init_contexts(void)
263 context_ator = anthy_create_allocator(sizeof(struct anthy_context),
268 anthy_quit_contexts(void)
270 anthy_free_allocator(context_ator);
274 release_prediction(struct prediction_cache *pc)
281 if (pc->predictions) {
282 for (i = 0; i < pc->nr_prediction; ++i) {
283 anthy_free_xstr(pc->predictions[i].src_str);
284 anthy_free_xstr(pc->predictions[i].str);
286 free(pc->predictions);
287 pc->predictions = NULL;
292 anthy_release_segment_list(struct anthy_context *ac)
295 sc = ac->seg_list.nr_segments;
296 for (i = 0; i < sc; i++) {
297 pop_back_seg_ent(ac);
299 ac->seg_list.nr_segments = 0;
302 /* reset¤Ç¤Ïcontext¤Î¤¿¤á¤Ë³ÎÊݤµ¤ì¤¿¥ê¥½¡¼¥¹¤òÁ´¤Æ²òÊü¤¹¤ë */
304 anthy_do_reset_context(struct anthy_context *ac)
306 /* ¤Þ¤º¼½ñ¥»¥Ã¥·¥ç¥ó¤ò²òÊü */
307 if (ac->dic_session) {
308 anthy_dic_release_session(ac->dic_session);
309 ac->dic_session = NULL;
312 /* ʸ»úÎó¤¬ÀßÄꤵ¤ì¤Æ¤¤¤Ê¤±¤ì¤Ð²òÊü¤¹¤Ù¤Êª¤Ï¤â¤¦Ìµ¤¤ */
317 anthy_release_split_context(&ac->split_info);
318 anthy_release_segment_list(ac);
320 /* ͽ¬¤µ¤ì¤¿Ê¸»úÎó¤Î²òÊü */
321 release_prediction(&ac->prediction);
325 anthy_do_release_context(struct anthy_context *ac)
327 anthy_sfree(context_ator, ac);
331 make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse)
334 int len = ac->str.len;
336 /* ʸÀá¤Î¶³¦¤òÀßÄê */
337 /* from ¤È from2¤Î´Ö¤Ë¶³¦¤òºî¤ë¤³¤È¤ò¶Ø»ß¤¹¤ë */
338 anthy_mark_border(&ac->split_info, from, from2, len);
339 create_segment_list(ac, from, len);
340 anthy_sort_metaword(&ac->seg_list);
343 for (i = 0; i < ac->seg_list.nr_segments; i++) {
344 anthy_do_make_candidates(&ac->split_info,
345 anthy_get_nth_segment(&ac->seg_list, i),
349 anthy_sort_candidate(&ac->seg_list, 0);
353 anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse)
357 /* ʸ»úÎó¤ò¥³¥Ô¡¼(°ìʸ»úʬ;·×¤Ë¤·¤Æ0¤ò¥»¥Ã¥È) */
358 ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1));
359 anthy_xstrcpy(&ac->str, s);
360 ac->str.str[s->len] = 0;
362 /* splitter¤Î½é´ü²½*/
363 anthy_init_split_context(&ac->str, &ac->split_info, is_reverse);
366 make_candidates(ac, 0, 0, is_reverse);
368 /* ºÇ½é¤ËÀßÄꤷ¤¿Ê¸Àᶳ¦¤ò³Ð¤¨¤Æ¤ª¤¯ */
369 for (i = 0; i < ac->seg_list.nr_segments; i++) {
370 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
371 ac->split_info.ce[s->from].initial_seg_len = s->len;
378 anthy_do_resize_segment(struct anthy_context *ac,
384 /* resize¤¬²Äǽ¤«¸¡ºº¤¹¤ë */
385 if (nth >= ac->seg_list.nr_segments) {
388 index = get_nth_segment_index(ac, nth);
389 len = get_nth_segment_len(ac, nth);
390 if (index + len + resize > ac->str.len) {
393 if (len + resize < 1) {
397 /* nth°Ê¹ß¤Îseg_ent¤ò²òÊü¤¹¤ë */
398 sc = ac->seg_list.nr_segments;
399 for (i = nth; i < sc; i++) {
400 pop_back_seg_ent(ac);
403 /* resize¤·¤¿seg_border¤ò¥Þ¡¼¥¯¤¹¤ë */
404 /* ¸½ºß¤Î¥Þ¡¼¥¯¤ò¾Ã¤·¤Æ¿·¤·¤¤¥Þ¡¼¥¯¤ò¤Ä¤±¤ë */
405 ac->split_info.ce[index+len].seg_border = 0;
406 ac->split_info.ce[ac->str.len].seg_border = 1;
407 for (i = index+len+resize+1; i < ac->str.len; i++) {
408 ac->split_info.ce[i].seg_border = 0;
410 ac->split_info.ce[index+len+resize].seg_border = 1;
411 for (i = index; i < ac->str.len; i++) {
412 ac->split_info.ce[i].best_mw = NULL;
416 make_candidates(ac, index, index+len+resize, 0);
420 * nÈÖ¤á¤ÎʸÀá¤ò¼èÆÀ¤¹¤ë¡¢Ìµ¤¤¾ì¹ç¤Ë¤ÏNULL¤òÊÖ¤¹
423 anthy_get_nth_segment(struct segment_list *sl, int n)
427 if (n >= sl->nr_segments ||
431 for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next);
436 anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs)
438 struct prediction_cache* prediction = &ac->prediction;
441 /* ¤Þ¤º¼½ñ¥»¥Ã¥·¥ç¥ó¤ò²òÊü */
442 if (ac->dic_session) {
443 anthy_dic_release_session(ac->dic_session);
444 ac->dic_session = NULL;
446 /* ͽ¬¤µ¤ì¤¿Ê¸»úÎó¤Î²òÊü */
447 release_prediction(&ac->prediction);
449 /* ¼½ñ¥»¥Ã¥·¥ç¥ó¤Î³«»Ï */
450 if (!ac->dic_session) {
451 ac->dic_session = anthy_dic_create_session();
452 if (!ac->dic_session) {
457 prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1));
458 anthy_xstrcpy(&prediction->str, xs);
459 prediction->str.str[xs->len]=0;
461 nr_prediction = anthy_traverse_record_for_prediction(xs, NULL);
462 prediction->nr_prediction = nr_prediction;
465 prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) *
467 anthy_traverse_record_for_prediction(xs, prediction->predictions);
473 get_change_state(struct anthy_context *ac)
475 int resize = 0, cand_change = 0;
477 for (i = 0; i < ac->seg_list.nr_segments; i++) {
478 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
479 if (ac->split_info.ce[s->from].initial_seg_len != s->len) {
482 if (s->committed > 0) {
487 if (resize && cand_change) {
500 write_history(FILE *fp, struct anthy_context *ac)
505 for (i = 0; i < ac->seg_list.nr_segments; i++) {
506 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
507 char *c = anthy_xstr_to_cstr(&s->str, ANTHY_EUC_JP_ENCODING);
508 fprintf(fp, "%s|", c);
513 for (i = 0; i < ac->seg_list.nr_segments; i++) {
514 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
517 if (s->committed < 0) {
521 c = anthy_xstr_to_cstr(&s->cands[s->committed]->str,
522 ANTHY_EUC_JP_ENCODING);
523 fprintf(fp, "%s|", c);
529 anthy_save_history(const char *fn, struct anthy_context *ac)
541 st.st_size > HISTORY_FILE_LIMIT) {
546 fprintf(fp, "anthy-%s ", anthy_get_version_string());
547 fprintf(fp, "%s ", get_change_state(ac));
548 write_history(fp, ac);
552 chmod(fn, S_IREAD | S_IWRITE);
555 /** ¸õÊä¤òɽ¼¨¤¹¤ë */
557 anthy_print_candidate(struct cand_ent *ce)
559 int mod = (ce->score % 1000);
563 seg_score = ce->mw->score;
565 anthy_putxstr(&ce->str);
567 /*if (ce->nr_words == 1) {printf("%d,", ce->elm[0].id); }*/
568 if (ce->flag & CEF_OCHAIRE) {
571 if (ce->flag & CEF_SINGLEWORD) {
574 if (ce->flag & CEF_GUESS) {
577 if (ce->flag & (CEF_KATAKANA | CEF_HIRAGANA)) {
580 if (ce->flag & CEF_USEDICT) {
583 if (ce->flag & CEF_CONTEXT) {
586 printf(",%d,", seg_score);
590 printf("%s,%d", anthy_seg_class_sym(ce->mw->seg_class),
591 ce->mw->struct_score);
597 if (ce->score >= 1000) {
598 printf("%d,", ce->score/1000);
607 printf("%d ", ce->score);
611 /** ʸÀá¤òɽ¼¨¤¹¤ë */
613 print_segment(struct seg_ent *e)
617 anthy_putxstr(&e->str);
619 for ( i = 0 ; i < e->nr_cands ; i++) {
620 anthy_print_candidate(e->cands[i]);
627 /** ¥³¥ó¥Æ¥¥¹¥È¤òɽ¼¨¤¹¤ë */
629 anthy_do_print_context(struct anthy_context *ac, int encoding)
633 anthy_xstr_set_print_encoding(encoding);
635 ce = ac->split_info.ce;
637 printf("(invalid)\n");
640 /* ³Æʸ»ú¤òɽ¼¨¤¹¤ë */
641 for (i = 0, ce = ac->split_info.ce; i < ac->str.len; i++, ce++) {
642 if (ce->seg_border) {
645 anthy_putxchar(*(ce->c));
648 /* ³ÆʸÀá¤òɽ¼¨¤¹¤ë */
649 for (i = 0; i < ac->seg_list.nr_segments; i++) {
650 print_segment(anthy_get_nth_segment(&ac->seg_list, i));
656 anthy_release_cand_ent(struct cand_ent *ce)
662 anthy_free_xstr_str(&ce->str);
668 anthy_do_set_personality(const char *id)
670 if (current_personality) {
671 /* ¤¹¤Ç¤ËÀßÄꤵ¤ì¤Æ¤ë */
674 if (!id || strchr(id, '/')) {
677 current_personality = strdup(id);
678 anthy_dic_set_personality(current_personality);
683 anthy_init_personality(void)
685 current_personality = NULL;
689 anthy_quit_personality(void)
691 if (current_personality) {
692 free(current_personality);
693 current_personality = NULL;