2 * ÊÑ´¹¥¨¥ó¥¸¥ó¤ÎÆâÉô¾ðÊó¤ò»È¤¦¤¿¤á¡¢°Õ¿ÞŪ¤Ë
3 * layer violation¤òÊüÃÖ¤·¤Æ¤¤¤ë¡£
10 #include <anthy/anthy.h>
11 #include <anthy/convdb.h>
12 #include <anthy/segment.h>
13 #include <anthy/feature_set.h>
15 #include "../src-main/main.h"
16 #include "../src-splitter/wordborder.h"
17 #include "../src-worddic/dic_ent.h"
20 /* ¼«Î©¸ìÉô¤«ÉÕ°¸ìÉô¤« */
24 /* ñ¸ì(¼«Î©¸ìorÉÕ°¸ì) */
28 /* ÉÕ°¸ì¤Îhash(WORD_INDEP)¤â¤·¤¯¤ÏÊÑ´¹¸å¤Îʸ»úÎó¤Îhash(WORD_DEP) */
30 /* ÆɤߤÎʸ»úÎó¤Îhash */
40 static struct cand_ent *
41 selected_candidate(struct seg_ent *seg)
43 if (seg->committed > -1) {
44 return seg->cands[seg->committed];
50 get_res(anthy_context_t ac, char *res_buf, int conv)
52 struct anthy_conv_stat acs;
55 anthy_get_stat(ac, &acs);
60 for (i = 0; i < acs.nr_segment; i++) {
63 anthy_get_segment(ac, i, 0, buf, 1024);
66 anthy_get_segment(ac, i, NTH_UNCONVERTED_CANDIDATE, buf, 1024);
73 static struct conv_res *
74 do_find_conv_res(struct res_db *db, const char *src, const char *res)
78 for (cr = db->res_list.next; cr; cr = cr->next) {
79 if (((!cr->res_str && !res) ||
80 !strcmp(cr->res_str, res)) &&
81 !strcmp(cr->src_str, src)) {
85 cr = (struct conv_res *)malloc(sizeof(struct conv_res));
86 cr->src_str = strdup(src);
88 cr->res_str = strdup(res);
93 cr->check = CHK_UNKNOWN;
95 cr->cand_check = NULL;
104 find_conv_res(struct res_db *db, anthy_context_t ac,
105 const char *src, int conv)
108 get_res(ac, res_buf, conv);
110 return do_find_conv_res(db, src, res_buf);
114 chomp_line(char *buf)
116 int len = strlen(buf);
117 if (buf[len-1] == '\n') {
127 db = malloc(sizeof(struct res_db));
128 db->res_list.next = NULL;
129 db->tail = &db->res_list;
134 db->res.dontcare = 0;
135 db->split.unknown = 0;
138 db->split.dontcare = 0;
144 strip_separator_vbar(char *buf, const char *str)
146 const char *src = str;
149 if (*src != '|' && *src != '~') {
159 parse_line(struct res_db *db, char *line)
161 char buf1[1024], buf2[1024], buf3[1024], buf4[1024];
167 if (line[0] == '#' || line[0] == 0) {
170 nr = sscanf(line, "%s %s %s", buf1, buf2, buf3);
172 cr = do_find_conv_res(db, buf1, NULL);
173 cr->check = CHK_UNKNOWN;
179 if (buf1[0] != '|') {
182 * ʿʸ ¶èÀÚ¤êʸ ÊÑ´¹¸å
183 * ʿʸ ¶èÀÚ¤êʸ check
198 strip_separator_vbar(buf4, buf1);
203 cr = do_find_conv_res(db, src, res);
204 if (nr == 2 && check[0] != '|') {
208 if (check[0] == 'O') {
210 } else if (check[0] == 'X') {
211 cr->check = CHK_MISS;
212 } else if (check[0] == '*') {
213 cr->check = CHK_DONTCARE;
214 } else if (check[0] == '|') {
215 cr->check = CHK_UNKNOWN;
216 cr->cand_str = strdup(check);
218 cr->check = CHK_UNKNOWN;
223 read_db(struct res_db *db, const char *fn)
235 while (fgets(line, 1024, fp)) {
236 parse_line(db, line);
241 fill_conv_info(struct word *w, struct cand_elm *elm)
243 /*w->conv_xs, w->wt*/
245 if (elm->nth == -1 ||
246 elm->nth >= elm->se->nr_dic_ents) {
251 if (!elm->se->dic_ents) {
257 de = elm->se->dic_ents[elm->nth];
258 w->conv_xs = anthy_xstr_dup(&de->str);
260 w->hash = anthy_xstr_hash(w->conv_xs);
264 init_word(struct word *w, int type)
273 free_word(struct word *w)
275 anthy_free_xstr(w->raw_xs);
276 anthy_free_xstr(w->conv_xs);
281 fill_indep_word(struct word *w, struct cand_elm *elm)
283 init_word(w, WORD_INDEP);
284 /* ÊÑ´¹Á°¤ÎÆɤߤò¼èÆÀ¤¹¤ë */
285 w->raw_xs = anthy_xstr_dup(&elm->str);
286 w->yomi_hash = anthy_xstr_hash(w->raw_xs);
289 fill_conv_info(w, elm);
294 fill_dep_word(struct word *w, struct cand_elm *elm)
296 init_word(w, WORD_DEP);
298 w->hash = anthy_xstr_hash(&elm->str);
299 w->yomi_hash = w->hash;
300 w->raw_xs = anthy_xstr_dup(&elm->str);
304 print_features(struct feature_list *fl)
310 nr = anthy_feature_list_nr(fl);
314 printf(" features=");
315 for (i = 0; i < nr; i++) {
319 printf("%d", anthy_feature_list_nth(fl, i));
324 print_word(const char *prefix, struct word *w, struct feature_list *fl)
326 printf("%s", prefix);
327 if (w->type == WORD_DEP) {
329 printf("dep_word hash=%d ", w->hash);
330 anthy_putxstrln(w->raw_xs);
334 printf("indep_word hash=%d", w->hash);
341 printf(" %s", w->wt);
348 anthy_putxstr(w->conv_xs);
353 anthy_putxstrln(w->raw_xs);
356 /** seg¤ÎʸÀ᥯¥é¥¹¤òÊÖ¤¹
357 * seg¤¬null¤Ç¤¢¤ì¤Ð¡¢cl¤ò¥¯¥é¥¹¤È¤¹¤ë
360 get_seg_class(struct seg_ent *seg, int cl)
366 ce = selected_candidate(seg);
368 return ce->mw->seg_class;
374 set_features(struct feature_list *fl,
375 struct seg_ent *prev_seg,
376 struct seg_ent *cur_seg)
379 cl = get_seg_class(cur_seg, SEG_TAIL);
380 pc = get_seg_class(prev_seg, SEG_HEAD);
382 anthy_feature_list_set_cur_class(fl, cl);
384 struct cand_ent *ce = selected_candidate(cur_seg);
385 anthy_feature_list_set_dep_word(fl, ce->dep_word_hash);
387 anthy_feature_list_set_dep_class(fl, ce->mw->dep_class);
388 anthy_feature_list_set_mw_features(fl, ce->mw->mw_features);
389 anthy_feature_list_set_noun_cos(fl, ce->mw->core_wt);
392 anthy_feature_list_set_class_trans(fl, pc, cl);
394 anthy_feature_list_sort(fl);
398 print_element(const char *prefix,
399 struct cand_elm *elm, struct feature_list *fl)
403 if (elm->str.len == 0) {
408 fill_indep_word(&w, elm);
409 print_word(prefix, &w, fl);
412 fill_dep_word(&w, elm);
413 print_word(prefix, &w, NULL);
419 print_unconverted(struct cand_ent *ce)
422 anthy_putxstrln(&ce->str);
426 print_eos(struct seg_ent *prev_seg)
428 struct feature_list fl;
429 anthy_feature_list_init(&fl);
430 set_features(&fl, prev_seg, NULL);
434 anthy_feature_list_free(&fl);
437 /* ¸õÊä¤Î¥ß¥¹¤Ë¤Ï '~'¡¢Ê¸ÀáĹ¤Î¥ß¥¹¤Ë¤Ï '!'¤òÉÕ¤±¤ë
438 * Ʊ¤¸Ê¸ÀáÆâ¤ÎÆó¤Ä¤á°Ê¹ß¤Î¼«Î©¸ì¤Ë¤Ï '^'¤òÉÕ¤±¤ë
443 if (flag & CONV_INVALID) {
446 if (flag & CONV_SIZE_MISS) {
449 if (flag & CONV_CAND_MISS) {
456 print_segment_info(int is_negative,
457 struct seg_ent *prev_seg,
461 struct feature_list fl;
462 struct cand_ent *ce = selected_candidate(seg);
464 const char *prefix = get_prefix(is_negative);
466 anthy_feature_list_init(&fl);
467 set_features(&fl, prev_seg, seg);
468 for (i = 0; i < ce->nr_words; i++) {
469 struct cand_elm *elm = &ce->elm[i];
470 prefix = get_prefix(is_negative);
471 if (nr_indep > 0 && elm->id != -1) {
472 prefix = get_prefix(is_negative | CONV_INVALID);
475 print_element(prefix, elm, &fl);
481 anthy_feature_list_free(&fl);
485 print_size_miss_segment_info(anthy_context_t ac, int nth)
487 struct seg_ent *prev_seg = NULL;
488 struct seg_ent *seg = anthy_get_nth_segment(&ac->seg_list, nth);
490 prev_seg = anthy_get_nth_segment(&ac->seg_list, nth - 1);
492 print_segment_info(CONV_SIZE_MISS, prev_seg, seg);
496 print_cand_miss_segment_info(anthy_context_t ac, int nth)
498 struct seg_ent *prev_seg = NULL;
499 struct seg_ent *seg = anthy_get_nth_segment(&ac->seg_list, nth);
501 prev_seg = anthy_get_nth_segment(&ac->seg_list, nth - 1);
503 print_segment_info(CONV_CAND_MISS, prev_seg, seg);
507 print_context_info(anthy_context_t ac, struct conv_res *cr)
510 struct seg_ent *prev_seg = NULL;
512 printf("segments: %d\n", ac->seg_list.nr_segments);
514 for (i = 0; i < ac->seg_list.nr_segments; i++) {
515 struct seg_ent *seg = anthy_get_nth_segment(&ac->seg_list, i);
516 struct cand_ent *ce = selected_candidate(seg);
518 if (cr && cr->cand_check && cr->cand_check[i]) {
519 is_negative = CONV_CAND_MISS;
524 /* Í×ÁǤ¬Ìµ¤¤¤â¤Î¤Ï¤½¤Î¤Þ¤Þɽ¼¨ */
525 print_unconverted(ce);
527 /* ¸õÊä¤ÎÊѹ¹¤¬¤¢¤Ã¤¿¾ì¹ç¤Ï¤½¤ì¤òɽ¼¨ */
528 if (seg->committed > 0) {
529 int tmp = seg->committed;
531 print_cand_miss_segment_info(ac, i);
532 seg->committed = tmp;
534 /* ʸÀá¤Î¹½À®¤òɽ¼¨ */
535 print_segment_info(is_negative, prev_seg, seg);