2 * ʸÀá¤Î¼«Î©¸ìÉô(ÀÜƬ¼¡¢ÀÜÈø¼´Þ¤à)¤Ë³¤¯
3 * ½õ»ì¡¢½õÆ°»ì¤Ê¤É¤ÎÉÕ°¸ì¤Î¥Ñ¥¿¡¼¥ó¤ò¤¿¤É¤ë¡£
4 * ¥Ñ¥¿¡¼¥ó¤Ï¥°¥é¥Õ¤È¤·¤ÆÀßÄê¥Õ¥¡¥¤¥ë¤ËÍÑ°Õ¤¹¤ë¡£
9 * |branch+--cond--+--transition--> node
10 * | | +--transition--> node
13 * |branch+--cond-----transition--> node
15 * |branch+--cond-----transition--> node
19 * Copyright (C) 2000-2007 TABATA Yusuke
20 * Copyright (C) 2006 YOSHIDA Yuichi
27 #include <anthy/anthy.h>
29 #include <anthy/conf.h>
30 #include <anthy/ruleparser.h>
31 #include <anthy/xstr.h>
32 #include <anthy/filemap.h>
33 #include <anthy/logger.h>
34 #include <anthy/segclass.h>
35 #include <anthy/splitter.h>
36 #include <anthy/wtype.h>
37 #include <anthy/diclib.h>
38 #include "wordborder.h"
41 static struct dep_dic ddic;
45 match_branch(struct splitter_context *sc,
46 struct word_list *tmpl,
47 xstr *xs, struct dep_branch *db);
49 match_nodes(struct splitter_context *sc,
51 xstr follow_str, int node);
55 anthy_xstrcmp_with_ondisk(xstr *xs,
59 int len = anthy_dic_ntohl(d[0]);
66 for (i = 0; i < len; i++) {
67 c = anthy_dic_ntohl(d[i]);
68 if (xs->str[i] != c) {
76 anthy_next_ondisk_xstr(ondisk_xstr *dxs)
79 int len = anthy_dic_ntohl(d[0]);
84 anthy_ondisk_xstr_len(ondisk_xstr *dxs)
87 return anthy_dic_ntohl(d[0]);
91 * ³Æ¥Î¡¼¥É¤Ë¤ª¤±¤ëÁ«°Ü¾ò·ï¤ò¥Æ¥¹¥È¤¹¤ë
93 * wl ¼«Î©¸ìÉô¤Îword_list
94 * follow_str ¼«Î©¸ìÉô°Ê¹ß¤Îʸ»úÎó
98 match_nodes(struct splitter_context *sc,
100 xstr follow_str, int node)
102 struct dep_node *dn = &ddic.nodes[node];
103 struct dep_branch *db;
107 for (i = 0; i < dn->nr_branch; i++) {
113 for (j = 0; j < db->nr_strs;
114 j++, dep_xs = anthy_next_ondisk_xstr(dep_xs)) {
116 /* ÉÕ°¸ì¤ÎÊý¤¬Á«°Ü¾ò·ï¤è¤êŤ¤¤³¤È¤¬É¬Í× */
117 if (follow_str.len < anthy_ondisk_xstr_len(dep_xs)) {
120 /* Á«°Ü¾ò·ï¤ÎÉôʬ¤òÀÚ¤ê½Ð¤¹ */
121 cond_xs.str = follow_str.str;
122 cond_xs.len = anthy_ondisk_xstr_len(dep_xs);
124 /* Á«°Ü¾ò·ï¤ÈÈæ³Ó¤¹¤ë */
125 if (!anthy_xstrcmp_with_ondisk(&cond_xs, dep_xs)) {
126 /* Á«°Ü¾ò·ï¤Ëmatch¤·¤¿ */
127 struct word_list new_wl = *wl;
128 struct part_info *part = &new_wl.part[PART_DEPWORD];
131 part->len += cond_xs.len;
132 new_follow.str = &follow_str.str[cond_xs.len];
133 new_follow.len = follow_str.len - cond_xs.len;
135 match_branch(sc, &new_wl, &new_follow, db);
142 * ³ÆÁ«°Ü¤ò¼Â¹Ô¤·¤Æ¤ß¤ë
144 * tmpl ¤³¤³¤Þ¤Ç¤Ë¹½À®¤·¤¿word_list
146 * db ¸½ºßÄ´ººÃæ¤Îbranch
149 match_branch(struct splitter_context *sc,
150 struct word_list *tmpl,
151 xstr *xs, struct dep_branch *db)
153 struct part_info *part = &tmpl->part[PART_DEPWORD];
156 /* Á«°ÜÀè¤ò½ç¤Ë¥È¥é¥¤¤¹¤ë */
157 for (i = 0; i < db->nr_transitions; i++) {
159 int head_pos = tmpl->head_pos; /* ÉÊ»ì¤Î¾ðÊó */
160 int features = tmpl->mw_features;
161 enum dep_class dc = part->dc;
163 struct dep_transition *transition = &db->transition[i];
165 tmpl->tail_ct = anthy_dic_ntohl(transition->ct);
166 /* Á«°Ü¤Î³èÍÑ·Á¤ÈÉÊ»ì */
167 if (anthy_dic_ntohl(transition->dc) != DEP_NONE) {
168 part->dc = anthy_dic_ntohl(transition->dc);
170 /* ̾»ì²½¤¹¤ëÆ°»ìÅù¤ÇÉÊ»ì̾¤ò¾å½ñ¤ */
171 if (anthy_dic_ntohl(transition->head_pos) != POS_NONE) {
172 tmpl->head_pos = anthy_dic_ntohl(transition->head_pos);
174 if (transition->weak) {
175 tmpl->mw_features |= MW_FEATURE_WEAK_CONN;
179 if (anthy_dic_ntohl(transition->next_node)) {
181 match_nodes(sc, tmpl, *xs, anthy_dic_ntohl(transition->next_node));
183 struct word_list *wl;
186 * ½ªÃ¼¥Î¡¼¥É¤ËÅþ㤷¤¿¤Î¤Ç¡¢
187 * ¤½¤ì¤òword_list¤È¤·¤Æ¥³¥ß¥Ã¥È
189 wl = anthy_alloc_word_list(sc);
191 wl->len += part->len;
194 anthy_commit_word_list(sc, wl);
198 tmpl->head_pos = head_pos;
199 tmpl->mw_features = features;
206 anthy_scan_node(struct splitter_context *sc,
207 struct word_list *tmpl,
208 xstr *follow, int node)
210 /* ÉÕ°¸ì¤ÎÉÕ¤¤¤Æ¤¤¤Ê¤¤¾õÂÖ¤«¤é¸¡º÷¤ò³«»Ï¤¹¤ë */
211 match_nodes(sc, tmpl, *follow, node);
218 read_xstr(struct dep_dic* ddic, int* offset)
220 int len = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
221 *offset += sizeof(int);
222 *offset += sizeof(xchar) * len;
226 read_branch(struct dep_dic* ddic, struct dep_branch* branch, int* offset)
230 /* Á«°Ü¾ò·ï¤Î¿ô¤òÆɤà */
231 branch->nr_strs = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
232 *offset += sizeof(int);
233 /* Á«°Ü¾ò·ï¤Îʸ»úÎó¤òÆɤ߼è¤ë */
234 branch->xstrs = (ondisk_xstr *)&ddic->file_ptr[*offset];
236 for (i = 0; i < branch->nr_strs; ++i) {
237 read_xstr(ddic, offset);
240 branch->nr_transitions = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
241 *offset += sizeof(int);
242 branch->transition = (struct dep_transition*)&ddic->file_ptr[*offset];
243 *offset += sizeof(struct dep_transition) * branch->nr_transitions;
247 read_node(struct dep_dic* ddic, struct dep_node* node, int* offset)
250 node->nr_branch = anthy_dic_ntohl(*(int*)&ddic->file_ptr[*offset]);
251 *offset += sizeof(int);
253 node->branch = malloc(sizeof(struct dep_branch) * node->nr_branch);
254 for (i = 0; i < node->nr_branch; ++i) {
255 read_branch(ddic, &node->branch[i], offset);
266 ddic.file_ptr = anthy_file_dic_get_section("dep_dic");
268 /* ºÇ½é¤Ë¥ë¡¼¥ë¤Î¿ô */
269 ddic.nrRules = anthy_dic_ntohl(*(int*)&ddic.file_ptr[offset]);
270 offset += sizeof(int);
273 ddic.rules = (struct ondisk_wordseq_rule*)&ddic.file_ptr[offset];
274 offset += sizeof(struct ondisk_wordseq_rule) * ddic.nrRules;
276 ddic.nrNodes = anthy_dic_ntohl(*(int*)&ddic.file_ptr[offset]);
277 offset += sizeof(int);
279 /* ³Æ¥Î¡¼¥É¤òÆɤ߹þ¤à */
280 ddic.nodes = malloc(sizeof(struct dep_node) * ddic.nrNodes);
281 for (i = 0; i < ddic.nrNodes; ++i) {
282 read_node(&ddic, &ddic.nodes[i], &offset);
287 anthy_get_nr_dep_rule()
293 anthy_get_nth_dep_rule(int index, struct wordseq_rule *rule)
295 /* ¥Õ¥¡¥¤¥ë¾å¤Î¾ðÊ󤫤é¥Ç¡¼¥¿¤ò¼è¤ê½Ð¤¹ */
296 struct ondisk_wordseq_rule *r = &ddic.rules[index];
297 rule->wt = anthy_get_wtype(r->wt[0], r->wt[1], r->wt[2],
298 r->wt[3], r->wt[4], r->wt[5]);
299 rule->node_id = anthy_dic_ntohl(r->node_id);
303 anthy_init_depword_tab()
310 anthy_quit_depword_tab(void)
313 for (i = 0; i < ddic.nrNodes; i++) {
314 struct dep_node* node = &ddic.nodes[i];