2 * Æɤߤ«¤éñ¸ì¤Î¾ðÊó¤ò¼èÆÀ¤¹¤ë¥Ç¡¼¥¿¹½Â¤¤ò¥Õ¥¡¥¤¥ëÃæ¤Ë
5 * ¥Ç¡¼¥¿¹½Â¤¤òÊѹ¹¤·¤ä¤¹¤¯¤¹¤ë¤¿¤á¤Ëmkdic.c¤«¤éʬΥ(2005/7/8)
7 * output_word_dict()¤¬¸Æ¤Ó½Ð¤µ¤ì¤ë
9 * Copyright (C) 2000-2006 TABATA Yusuke
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 2 of the License, or (at your option) any later version.
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
22 You should have received a copy of the GNU Lesser General Public
23 License along with this library; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include <anthy/anthy.h>
30 #include <anthy/word_dic.h>
33 extern FILE *page_out, *page_index_out;
34 extern FILE *yomi_entry_index_out, *yomi_entry_out;
37 write_word(struct word_entry *we, int encoding)
40 if (encoding == ANTHY_UTF8_ENCODING) {
41 count = fprintf(yomi_entry_out, "%s", we->word_utf8);
43 char *s = anthy_conv_utf8_to_euc(we->word_utf8);
44 count = fprintf(yomi_entry_out, "%s", s);
51 write_freq(FILE *fp, struct word_entry *we)
54 int freq = we->freq / 100;
56 count += fprintf(fp, "*%d", freq);
62 compare_word_entry(struct word_entry *prev_we,
63 struct word_entry *we)
65 if (strcmp(prev_we->wt_name, we->wt_name) ||
66 (prev_we->freq / 100) != (we->freq / 100) ||
67 prev_we->feature != we->feature) {
73 /** °ì¤Ä¤ÎÆɤߤËÂФ¹¤ëñ¸ì¤ÎÆâÍƤò½ÐÎϤ¹¤ë
74 * ÊÖ¤êÃͤϽÐÎϤ·¤¿¥Ð¥¤¥È¿ô
77 output_word_entry_for_a_yomi(struct yomi_entry *ye, int encoding)
85 if (encoding == ANTHY_UTF8_ENCODING) {
87 fputc('u', yomi_entry_out);
89 /* ³Æñ¸ì¤ò½ÐÎϤ¹¤ë */
90 for (i = 0; i < ye->nr_entries; i++) {
91 struct word_entry *we = &ye->entries[i];
92 struct word_entry *prev_we = NULL;
94 prev_we = &ye->entries[i-1];
101 /* Æó¤ÄÌܰʹߤ϶õÇò¤«¤é»Ï¤Þ¤ë */
102 count += fprintf(yomi_entry_out, " ");
104 /* ÉÊ»ì¤ÈÉÑÅÙ¤ò½ÐÎϤ¹¤ë */
106 compare_word_entry(prev_we, we)) {
107 count += fprintf(yomi_entry_out, "%s", we->wt_name);
108 if (we->feature != 0) {
109 count += fprintf(yomi_entry_out, ",");
111 count += write_freq(yomi_entry_out, we);
112 count += fprintf(yomi_entry_out, " ");
114 /* ñ¸ì¤ò½ÐÎϤ¹¤ë¾ì½ê¤¬¤³¤Îñ¸ì¤Îid */
115 we->offset = count + ye->offset;
117 count += write_word(we, encoding);
120 fputc(0, yomi_entry_out);
124 /* 2¤Ä¤Îʸ»úÎó¤Î¶¦ÄÌÉôʬ¤ÎŤµ¤òµá¤á¤ë */
126 common_len(xstr *s1, xstr *s2)
132 if (s1->len < s2->len) {
137 for (i = 0; i < m; i++) {
138 if (s1->str[i] != s2->str[i]) {
146 * 2¤Ä¤Îʸ»úÎó¤Îº¹Ê¬¤ò½ÐÎϤ¹¤ë
147 * AAA ABBB ¤È¤¤¤¦2¤Ä¤Îʸ»úÎó¤ò¸«¤¿¾ì¹ç¤Ë¤Ï
148 * ABBB¤ÏAAA¤Î¤¦¤·¤í2ʸ»ú¤ò¾Ã¤·¤ÆBBB¤òÉÕ¤±¤¿¤â¤Î¤È¤·¤Æ
149 * \0x2BBB¤È½ÐÎϤµ¤ì¤ë¡£
152 output_diff(xstr *p, xstr *c, int encoding)
155 m = common_len(p, c);
156 if (p && p->len > m) {
157 fprintf(page_out, "%c", p->len - m + 1);
159 fprintf(page_out, "%c", 1);
161 for (i = m; i < c-> len; i++) {
163 len += anthy_sputxchar(buf, c->str[i], encoding);
164 fputs(buf, page_out);
170 begin_new_page(int i)
173 write_nl(page_index_out, i);
177 output_entry_index(int i)
179 write_nl(yomi_entry_index_out, i);
182 /* ÆɤߤÎʸ»úÎ󤫤é¥Õ¥¡¥¤¥ëÃæ¤Î°ÌÃÖ(offset)¤òµá¤á¤ë¤¿¤á¤Î¥Æ¡¼¥Ö¥ë¤òºî¤ë
183 * page_out, page_index_out, yomi_entry_index_out¤Ë½ÐÎÏ
186 generate_yomi_to_offset_map(struct yomi_entry_list *yl)
189 struct yomi_entry *ye = NULL;
192 /* Æɤߤ«¤é°ÌÃÖ(offset)¤ò·×»»¤¹¤ë¥Ç¡¼¥¿¹½Â¤¤ò¹½À®¤¹¤ë */
194 /* ¤Þ¤º¡¢ºÇ½é¤ÎÆɤߤËÂФ¹¤ë¥¨¥ó¥È¥ê¤Î¥¤¥ó¥Ç¥Ã¥¯¥¹¤ò½ñ¤½Ð¤¹ */
195 write_nl(page_index_out, page_index);
197 for (i = 0; i < yl->nr_valid_entries; i++) {
198 ye = yl->ye_array[i];
199 /* ¿·¤·¤¤¥Ú¡¼¥¸¤Î³«»Ï */
200 if ((i % WORDS_PER_PAGE) == 0 && (i != 0)) {
203 begin_new_page(page_index);
206 /* ÆɤߤËÂбþ¤¹¤ë¾ðÊó¤ò½ÐÎϤ¹¤ë */
207 page_index += output_diff(prev, ye->index_xstr, yl->index_encoding);
209 output_entry_index(ye->offset);
211 prev = ye->index_xstr;
215 /** ñ¸ì¼½ñ¤ò½ÐÎϤ¹¤ë
216 * ¤Þ¤¿¡¢¤³¤Î¤È¤¤Ë¼½ñÃæ¤Î¥ª¥Õ¥»¥Ã¥È¤â·×»»¤¹¤ë */
218 output_word_dict(struct yomi_entry_list *yl)
222 struct yomi_entry *ye = NULL;
224 /* ³ÆÆɤߤËÂФ¹¤ë¥ë¡¼¥× */
225 for (i = 0; i < yl->nr_valid_entries; i++) {
226 /* ñ¸ì¤ò½ÐÎϤ·¤Æ¡¢¥Õ¥¡¥¤¥ëÃæ¤Î°ÌÃÖ(offset)¤ò·×»»¤¹¤ë */
227 ye = yl->ye_array[i];
228 ye->offset = entry_index;
229 entry_index += output_word_entry_for_a_yomi(ye, yl->body_encoding);
231 /* ÆɤߤÎʸ»úÎ󤫤é¥Õ¥¡¥¤¥ëÃæ¤Î°ÌÃÖ(offset)¤òµá¤á¤ë¤¿¤á¤Î¥Æ¡¼¥Ö¥ë¤òºî¤ë */
232 generate_yomi_to_offset_map(yl);
234 /* ºÇ¸å¤ÎÆɤߤò½ªÎ» */
235 entry_index += output_word_entry_for_a_yomi(ye, yl->body_encoding);
236 write_nl(yomi_entry_index_out, entry_index);
237 write_nl(page_index_out, 0);
240 printf("Total %d indexes, %d words, (%d pages).\n",
241 yl->nr_valid_entries,
243 yl->nr_valid_entries / WORDS_PER_PAGE + 1);