2 * ʸÀá¤ËÂФ¹¤ë¸õÊä¤ò¥½¡¼¥È¤¹¤ë¡£
3 * ¾ÍèŪ¤Ë¤Ï¶áÀܤ¹¤ëʸÀá¤â¸«¤Æ¡¢Ã±¸ì¤Î·ë¹ç¤Ë¤è¤ëɾ²Á¤ò¤¹¤ë¡£
4 * ¥À¥Ö¤Ã¤¿¸õÊä¤Îºï½ü¤â¤¹¤ë¡£
6 * Funded by IPA̤Ƨ¥½¥Õ¥È¥¦¥§¥¢ÁϤ»ö¶È 2001 9/22
7 * Copyright (C) 2000-2006 TABATA Yusuke
8 * Copyright (C) 2001 UGAWA Tomoharu
10 * $Id: candsort.c,v 1.27 2002/11/17 14:45:47 yusuke Exp $
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include <anthy/segment.h>
33 #include <anthy/splitter.h>
34 #include <anthy/ordering.h>
37 /* ¤ªÃãÆþ¤ì³Ø½¬¤Ë¤è¤ë¸õÊä */
38 #define OCHAIRE_BASE OCHAIRE_SCORE
39 /* metaword¤¬½½Ê¬ÌµÍýÌðÍý¤¯¤µ¤¤¤È¤¤Î¡¢¤Ò¤é¤¬¤Ê¥«¥¿¥«¥Ê¤Î¥¹¥³¥¢ */
40 #define NOCONV_WITH_BIAS 900000
42 #define NORMAL_BASE 100
44 #define SINGLEWORD_BASE 10
46 #define COMPOUND_BASE (OCHAIRE_SCORE / 2)
47 /* Ê£¹ç¸ì¤Î°ìÉôʬ¤ò°ìʸÀá¤Ë¤·¤¿¤â¤Î */
48 #define COMPOUND_PART_BASE 2
50 #define DEPWORD_BASE (OCHAIRE_SCORE / 2)
51 /* ¤Ò¤é¤¬¤Ê¥«¥¿¥«¥Ê¤Î¥Ç¥Õ¥©¥ë¥È¤Î¥¹¥³¥¢ */
54 /* ̵Íý¤Ã¤Ý¤¤¸õÊä³ä¤êÅö¤Æ¤«È½ÃǤ¹¤ë */
56 uncertain_segment_p(struct seg_ent *se)
59 if (se->nr_metaword == 0) {
66 if (se->len * 3 >= mw->len * 5) {
73 release_redundant_candidate(struct seg_ent *se)
76 /* ÇÛÎó¤Ï¥½¡¼¥È¤µ¤ì¤Æ¤¤¤ë¤Î¤Çscore¤¬0¤Î¸õÊ䤬¸å¤í¤Ëʤó¤Ç¤¤¤ë */
77 for (i = 0; i < se->nr_cands && se->cands[i]->score; i++);
78 /* i¤«¤é¸å¤í¤Î¸õÊä¤ò²òÊü */
79 if (i < se->nr_cands) {
80 for (j = i; j < se->nr_cands; j++) {
81 anthy_release_cand_ent(se->cands[j]);
87 /* qsortÍѤθõÊäÈæ³Ó´Ø¿ô */
89 candidate_compare_func(const void *p1, const void *p2)
91 const struct cand_ent *const *c1 = p1, *const *c2 = p2;
92 return (*c2)->score - (*c1)->score;
96 sort_segment(struct seg_ent *se)
98 qsort(se->cands, se->nr_cands,
99 sizeof(struct cand_ent *),
100 candidate_compare_func);
104 trim_kana_candidate(struct seg_ent *se)
107 if (NULL == se->cands) { /* ¼½ñ¤â¤·¤¯¤Ï³Ø½¬¥Ç¡¼¥¿¤¬²õ¤ì¤Æ¤¤¤¿»þ¤ÎÂкö */
110 if (se->cands[0]->flag & CEF_KATAKANA) {
113 for (i = 1; i < se->nr_cands; i++) {
114 if (se->cands[i]->flag & CEF_KATAKANA) {
115 /* ºÇÄãÅÀ¤Þ¤Ç²¼¤²¤ë */
116 se->cands[i]->score = NOCONV_BASE;
122 check_dupl_candidate(struct seg_ent *se)
125 for (i = 0; i < se->nr_cands - 1; i++) {
126 for (j = i + 1; j < se->nr_cands; j++) {
127 if (!anthy_xstrcmp(&se->cands[i]->str, &se->cands[j]->str)) {
128 /* ¥ë¡¼¥ë¤ËÎɤ¯¥Þ¥Ã¥Á¤·¤¿¤â¤Î¤ÎÊý¤òÁª¤Ö¤È¤«¤¹¤Ù¤ */
129 se->cands[j]->score = 0;
130 se->cands[i]->flag |= se->cands[j]->flag;
136 /* ÉÊ»ì³ä¤êÅö¤Æ¤Ë¤è¤Ã¤ÆÀ¸À®¤µ¤ì¤¿¸õÊä¤òɾ²Á¤¹¤ë */
138 eval_candidate_by_metaword(struct cand_ent *ce)
143 /* ¤Þ¤º¡¢Ã±¸ì¤ÎÉÑÅ٤ˤè¤ëscore¤ò²Ã»» */
144 for (i = 0; i < ce->nr_words; i++) {
145 struct cand_elm *elm = &ce->elm[i];
150 /* ¸õÊä³ä¤êÅö¤Æ¤ÎÂоݳ°¤Ê¤Î¤Ç¥¹¥¥Ã¥× */
153 pos = anthy_wtype_get_pos(elm->wt);
154 if (pos == POS_PRE || pos == POS_SUC) {
158 freq = anthy_get_nth_dic_ent_freq(elm->se, elm->nth);
163 score *= ce->mw->struct_score;
171 eval_candidate(struct cand_ent *ce, int uncertain)
174 (CEF_OCHAIRE | CEF_SINGLEWORD | CEF_HIRAGANA |
175 CEF_KATAKANA | CEF_GUESS | CEF_COMPOUND | CEF_COMPOUND_PART |
177 /* splitter¤«¤é¤Î¾ðÊó(metaword)¤Ë¤è¤Ã¤ÆÀ¸À®¤µ¤ì¤¿¸õÊä */
178 eval_candidate_by_metaword(ce);
179 } else if (ce->flag & CEF_OCHAIRE) {
180 ce->score = OCHAIRE_BASE;
181 } else if (ce->flag & CEF_SINGLEWORD) {
182 ce->score = SINGLEWORD_BASE;
183 } else if (ce->flag & CEF_COMPOUND) {
184 ce->score = COMPOUND_BASE;
185 } else if (ce->flag & CEF_COMPOUND_PART) {
186 ce->score = COMPOUND_PART_BASE;
187 } else if (ce->flag & CEF_BEST) {
188 ce->score = OCHAIRE_BASE;
189 } else if (ce->flag & (CEF_HIRAGANA | CEF_KATAKANA |
193 * ¤³¤ÎʸÀá¤Ï³°Íè¸ì¤Ê¤É¤Î¤è¤¦¤Ê¤Î¤Ç¡¢À¸À®¤·¤¿¸õÊä¤è¤ê¤â
194 * ¤Ò¤é¤¬¤Ê¥«¥¿¥«¥Ê¤Î¸õÊä¤ò½Ð¤·¤¿Êý¤¬¤è¤¤
196 ce->score = NOCONV_WITH_BIAS;
197 if (CEF_KATAKANA & ce->flag) {
200 if (CEF_GUESS & ce->flag) {
204 ce->score = NOCONV_BASE;
211 eval_segment(struct seg_ent *se)
214 int uncertain = uncertain_segment_p(se);
215 for (i = 0; i < se->nr_cands; i++) {
216 eval_candidate(se->cands[i], uncertain);
220 /* ³Ø½¬ÍúÎò¤ÎÆâÍƤǽç°Ì¤òÄ´À°¤¹¤ë */
222 apply_learning(struct segment_list *sl, int nth)
227 * Í¥Àè½ç°Ì¤ÎÄ㤤¤â¤Î¤«¤é½ç¤ËŬÍѤ¹¤ë
230 /* ÍÑÎã¼½ñ¤Ë¤è¤ë½ç½ø¤ÎÊѹ¹ */
231 anthy_reorder_candidates_by_relation(sl, nth);
233 for (i = nth; i < sl->nr_segments; i++) {
234 struct seg_ent *seg = anthy_get_nth_segment(sl, i);
236 anthy_proc_swap_candidate(seg);
237 /* ÍúÎò¤Ë¤è¤ë½ç½ø¤ÎÊѹ¹ */
238 anthy_reorder_candidates_by_history(anthy_get_nth_segment(sl, i));
242 /** ³°¤«¤é¸Æ¤Ð¤ì¤ë¥¨¥ó¥È¥ê¥Ý¥¤¥ó¥È
243 * @nth°Ê¹ß¤ÎʸÀá¤òÂоݤȤ¹¤ë
246 anthy_sort_candidate(struct segment_list *sl, int nth)
249 for (i = nth; i < sl->nr_segments; i++) {
250 struct seg_ent *seg = anthy_get_nth_segment(sl, i);
253 /* ¤Ä¤®¤Ë¥½¡¼¥È¤¹¤ë */
255 /* ¥À¥Ö¤Ã¤¿¥¨¥ó¥È¥ê¤ÎÅÀ¤ÎÄ㤤Êý¤Ë0ÅÀ¤òÉÕ¤±¤ë */
256 check_dupl_candidate(seg);
257 /* ¤â¤¦¤¤¤Á¤É¥½¡¼¥È¤¹¤ë */
259 /* ɾ²Á0¤Î¸õÊä¤ò²òÊü */
260 release_redundant_candidate(seg);
263 /* ³Ø½¬¤ÎÍúÎò¤òŬÍѤ¹¤ë */
264 apply_learning(sl, nth);
267 for ( i = nth ; i < sl->nr_segments ; i++){
268 sort_segment(anthy_get_nth_segment(sl, i));
270 /* ¥«¥¿¥«¥Ê¤Î¸õÊ䤬ÀèƬ¤Ç¤Ê¤±¤ì¤ÐºÇ¸å¤Ë²ó¤¹ */
271 for (i = nth; i < sl->nr_segments; i++) {
272 trim_kana_candidate(anthy_get_nth_segment(sl, i));
275 for ( i = nth ; i < sl->nr_segments ; i++){
276 sort_segment(anthy_get_nth_segment(sl, i));