2 * Copyright (C) 2000-2007 TABATA Yusuke
3 * Copyright (C) 2004-2006 YOSHIDA Yuichi
6 * ÉÕ°¸ì¥°¥é¥Õ¤ò¥Ð¥¤¥Ê¥ê²½¤¹¤ë
8 * ÉÕ°¸ì¥Æ¡¼¥Ö¥ëÃæ¤Î¥Î¡¼¥É¤Ø¤Î¥Ý¥¤¥ó¥¿¤Î½é´ü²½
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <anthy/alloc.h>
31 #include <anthy/conf.h>
32 #include <anthy/ruleparser.h>
33 #include <anthy/xstr.h>
34 #include <anthy/logger.h>
35 #include <anthy/splitter.h>
36 #include <anthy/anthy.h>
37 #include <anthy/depgraph.h>
38 #include <anthy/diclib.h>
46 static struct dep_node* gNodes;
47 static char** gNodeNames;
51 static struct wordseq_rule *gRules;
55 get_node_id_by_name(const char *name)
58 /* ÅÐÏ¿ºÑ¤ß¤Î¤â¤Î¤«¤éõ¤¹ */
59 for (i = 0; i < nrNodes; i++) {
60 if (!strcmp(name,gNodeNames[i])) {
64 /* ¤Ê¤«¤Ã¤¿¤Î¤Çºî¤ë */
65 gNodes = realloc(gNodes, sizeof(struct dep_node)*(nrNodes+1));
66 gNodeNames = realloc(gNodeNames, sizeof(char*)*(nrNodes+1));
67 gNodes[nrNodes].nr_branch = 0;
68 gNodes[nrNodes].branch = 0;
69 gNodeNames[nrNodes] = strdup(name);
75 /* Á«°Ü¾ò·ï¤«¤ébranch¤òÁܤ·½Ð¤¹ */
76 static struct dep_branch *
77 find_branch(struct dep_node *node, xstr **strs, int nr_strs)
79 struct dep_branch *db;
81 /* Ʊ¤¸Á«°Ü¾ò·ï¤Î¥Ö¥é¥ó¥Á¤òõ¤¹ */
82 for (i = 0; i < node->nr_branch; i++) {
83 db = &node->branch[i];
84 if (nr_strs != db->nr_strs) {
87 for (j = 0; j < nr_strs; j++) {
88 if (anthy_xstrcmp(db->str[j], strs[j])) {
96 /* ¿·¤·¤¤¥Ö¥é¥ó¥Á¤ò³ÎÊݤ¹¤ë */
97 node->branch = realloc(node->branch,
98 sizeof(struct dep_branch)*(node->nr_branch+1));
99 db = &node->branch[node->nr_branch];
101 db->str = malloc(sizeof(xstr*)*nr_strs);
102 for (i = 0; i < nr_strs; i++) {
103 db->str[i] = strs[i];
105 db->nr_strs = nr_strs;
106 db->nr_transitions = 0;
116 parse_transition(char *token, struct dep_transition *tr)
120 enum dep_class dc = DEP_NONE;
122 tr->head_pos = POS_NONE;
124 /* Á«°Ü¤Î°À¤ò²òÀÏ*/
125 while (*token != '@') {
134 case 'z': ct = CT_MIZEN; break;
135 case 'y': ct = CT_RENYOU; break;
136 case 's': ct = CT_SYUSI; break;
137 case 't': ct = CT_RENTAI; break;
138 case 'k': ct = CT_KATEI; break;
139 case 'm': ct = CT_MEIREI; break;
140 case 'g': ct = CT_HEAD; break;
147 case 'n': tr->head_pos = POS_NOUN; break;
148 case 'v': tr->head_pos = POS_V; break;
149 case 'j': tr->head_pos = POS_AJV; break;
156 /* case 'n': sc = DEP_NO; break;*/
157 case 'f': dc = DEP_FUZOKUGO; break;
158 case 'k': dc = DEP_KAKUJOSHI; break;
159 case 'y': dc = DEP_RENYOU; break;
160 case 't': dc = DEP_RENTAI; break;
161 case 'e': dc = DEP_END; break;
162 case 'r': dc = DEP_RAW; break;
163 default: printf("unknown (S%c)\n", token[1]);
168 printf("Unknown (%c) %s\n", *token, str);
173 /* @¤«¤é¸å¤Ï¥Î¡¼¥É¤Î̾Á° */
174 tr->next_node = get_node_id_by_name(token);
182 * ¥Î¡¼¥É̾ Á«°Ü¾ò·ï+ Á«°ÜÀè+
185 parse_dep(char **tokens, int nr)
188 struct dep_branch *db;
191 xstr **strs = alloca(sizeof(xstr*) * nr);
193 /* ¥Î¡¼¥É¤È¤½¤Îid¤ò³ÎÊÝ */
194 id = get_node_id_by_name(tokens[row]);
200 /* Á«°Ü¾ò·ï¤ÎÉÕ°¸ì¤ÎÇÛÎó¤òºî¤ë */
201 for (; row < nr && tokens[row][0] == '\"'; row++) {
203 s = strdup(&tokens[row][1]);
205 strs[nr_strs] = anthy_cstr_to_xstr(s, ANTHY_EUC_JP_ENCODING);
210 /* Á«°Ü¾ò·ï¤¬¤Ê¤¤»þ¤Ï·Ù¹ð¤ò½Ð¤·¤Æ¡¢¶õ¤ÎÁ«°Ü¾ò·ï¤òÄɲ乤ë */
213 anthy_log(0, "node %s has a branch without any transition condition.\n",
216 strs[0] = anthy_cstr_to_xstr(s, ANTHY_EUC_JP_ENCODING);
221 /* ¥Ö¥é¥ó¥Á¤ËÁ«°ÜÀè¤Î¥Î¡¼¥É¤òÄɲ乤ë */
222 db = find_branch(dn, strs, nr_strs);
223 for ( ; row < nr; row++){
224 struct dep_transition *tr;
225 db->transition = realloc(db->transition,
226 sizeof(struct dep_transition)*
227 (db->nr_transitions+1));
228 tr = &db->transition[db->nr_transitions];
229 parse_transition(tokens[row], tr);
230 db->nr_transitions ++;
234 /* ʸˡÄêµÁ¥Õ¥¡¥¤¥ëÃæ¤Ë¶õ¤Î¥Î¡¼¥É¤¬¤¢¤ë¤«¥Á¥§¥Ã¥¯¤¹¤ë */
239 for (i = 1; i < nrNodes; i++) {
240 if (gNodes[i].nr_branch == 0) {
241 anthy_log(0, "node %s has no branch.\n", gNodeNames);
248 init_depword_tab(void)
254 /* id 0 ¤ò¶õ¥Î¡¼¥É¤Ë³äÅö¤Æ¤ë */
255 get_node_id_by_name("@");
258 fn = anthy_conf_get_str("DEPWORD");
260 anthy_log(0, "Dependent word dictionary is unspecified.\n");
263 if (anthy_open_file(fn) == -1) {
264 anthy_log(0, "Failed to open dep word dict (%s).\n", fn);
267 /* °ì¹Ô¤º¤ÄÉÕ°¸ì¥°¥é¥Õ¤òÆɤà */
268 while (!anthy_read_line(&tokens, &nr)) {
269 parse_dep(tokens, nr);
279 parse_indep(char **tokens, int nr)
282 printf("Syntex error in indepword defs"
283 " :%d.\n", anthy_get_line_number());
286 gRules = realloc(gRules, sizeof(struct wordseq_rule)*(nrRules+1));
288 /* ¹Ô¤ÎÀèƬ¤Ë¤ÏÉÊ»ì¤Î̾Á°¤¬Æþ¤Ã¤Æ¤¤¤ë */
289 gRules[nrRules].wt = anthy_init_wtype_by_name(tokens[0]);
291 /* ¤½¤Î¼¡¤Ë¤Ï¥Î¡¼¥É̾¤¬Æþ¤Ã¤Æ¤¤¤ë */
292 gRules[nrRules].node_id = get_node_id_by_name(tokens[1]);
295 printf("%d (%s)\n", nrRules, tokens[0]);
301 /** ¼«Î©¸ì¤«¤é¤ÎÁ«°Üɽ */
303 init_indep_word_seq_tab(void)
309 fn = anthy_conf_get_str("INDEPWORD");
311 printf("independent word dict unspecified.\n");
314 if (anthy_open_file(fn) == -1) {
315 printf("Failed to open indep word dict (%s).\n", fn);
318 /* ¥Õ¥¡¥¤¥ë¤ò°ì¹Ô¤º¤ÄÆɤà */
319 while (!anthy_read_line(&tokens, &nr)) {
320 parse_indep(tokens, nr);
329 ¥Í¥Ã¥È¥ï¡¼¥¯¥Ð¥¤¥È¥ª¡¼¥À¡¼¤Ç4byte½ñ¤½Ð¤¹
332 write_nl(FILE* fp, int i)
334 i = anthy_dic_htonl(i);
335 fwrite(&i, sizeof(int), 1, fp);
339 write_transition(FILE* fp, struct dep_transition* transition)
341 write_nl(fp, transition->next_node);
342 write_nl(fp, transition->pos);
343 write_nl(fp, transition->ct);
344 write_nl(fp, transition->dc);
345 write_nl(fp, transition->head_pos);
346 write_nl(fp, transition->weak);
350 write_xstr(FILE* fp, xstr* str)
354 write_nl(fp, str->len);
356 for (i = 0; i < str->len; i++) {
357 c = anthy_dic_htonl(str->str[i]);
358 fwrite(&c, sizeof(xchar), 1, fp);
363 write_branch(FILE* fp, struct dep_branch* branch)
367 write_nl(fp, branch->nr_strs);
368 for (i = 0; i < branch->nr_strs; ++i) {
369 write_xstr(fp, branch->str[i]);
372 write_nl(fp, branch->nr_transitions);
373 for (i = 0; i < branch->nr_transitions; ++i) {
374 write_transition(fp, &branch->transition[i]);
379 write_node(FILE* fp, struct dep_node* node)
382 write_nl(fp, node->nr_branch);
383 for (i = 0; i < node->nr_branch; ++i) {
384 write_branch(fp, &node->branch[i]);
389 write_wtype(FILE *fp, wtype_t wt)
391 fputc(anthy_wtype_get_pos(wt), fp);
392 fputc(anthy_wtype_get_cos(wt), fp);
393 fputc(anthy_wtype_get_scos(wt), fp);
394 fputc(anthy_wtype_get_cc(wt), fp);
395 fputc(anthy_wtype_get_ct(wt), fp);
396 fputc(anthy_wtype_get_wf(wt), fp);
402 write_file(const char* file_name)
405 FILE* fp = fopen(file_name, "w");
406 int* node_offset = malloc(sizeof(int) * nrNodes); /* gNodes¤Î¥Õ¥¡¥¤¥ë¾å¤Î°ÌÃÖ */
409 write_nl(fp, nrRules);
410 for (i = 0; i < nrRules; ++i) {
411 write_wtype(fp, gRules[i].wt);
412 write_nl(fp, gRules[i].node_id);
415 write_nl(fp, nrNodes);
417 for (i = 0; i < nrNodes; ++i) {
418 write_node(fp, &gNodes[i]);
426 main(int argc, char* argv[])
428 /* ÉÕ°¸ì¼½ñ¤òÆɤ߹þ¤ó¤Ç¥Õ¥¡¥¤¥ë¤Ë½ñ¤½Ð¤¹ */
429 anthy_conf_override("CONFFILE", "../anthy-conf");
430 anthy_conf_override("ANTHYDIR", SRCDIR "/../depgraph/");
433 anthy_do_conf_init();
436 /* ¼«Î©¸ì¤«¤é¤ÎÁ«°Üɽ */
437 init_indep_word_seq_tab();
439 write_file("anthy.dep");