2 * Anthy¤Î¼½ñ¥é¥¤¥Ö¥é¥ê¤ÎÃæ¿´
4 * anthy_get_seq_ent_from_xstr()¤Ç¼½ñ¤ò¤Ò¤¯
6 * Copyright (C) 2000-2007 TABATA Yusuke
7 * Copyright (C) 2005-2006 YOSHIDA Yuichi
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include <anthy/anthy.h>
29 #include <anthy/dic.h>
30 #include <anthy/conf.h>
31 #include <anthy/record.h>
32 #include <anthy/alloc.h>
33 #include <anthy/logger.h>
34 #include <anthy/xchar.h>
35 #include <anthy/feature_set.h>
36 #include <anthy/textdict.h>
38 #include <anthy/diclib.h>
41 #include "dic_personality.h"
45 static int dic_init_count;
48 /* Á´personality¤Ç¶¦Í¤µ¤ì¤ë¥Õ¥¡¥¤¥ë¼½ñ */
49 static struct word_dic *master_dic_file;
51 /* ³Æ¥Ñ¡¼¥½¥Ê¥ê¥Æ¥£¤´¤È¤Î¼½ñ */
52 struct mem_dic *anthy_current_personal_dic_cache;/* ¥¥ã¥Ã¥·¥å */
54 struct record_stat *anthy_current_record;
57 anthy_validate_seq_ent(struct seq_ent *seq, xstr *xs, int is_reverse)
62 if (seq->nr_dic_ents == 0 && seq->nr_compound_ents == 0) {
63 /* ̵¸ú¤Ê¥¨¥ó¥È¥ê¤òºîÀ®¤·¤¿¤Î¤Çcache¤«¤éºï½ü */
64 anthy_mem_dic_release_seq_ent(anthy_current_personal_dic_cache,
73 anthy_cache_get_seq_ent(xstr *xs, int is_reverse)
77 /* ¥¥ã¥Ã¥·¥åÃæ¤Ë´û¤Ë¤¢¤ì¤Ð¤½¤ì¤òÊÖ¤¹ */
78 seq = anthy_mem_dic_find_seq_ent_by_xstr(anthy_current_personal_dic_cache,
84 /* ¥¥ã¥Ã¥·¥åÃæ¤Ë̵¤¤¤Î¤Ç³ÎÊÝ */
85 return anthy_mem_dic_alloc_seq_ent_by_xstr(anthy_current_personal_dic_cache,
90 anthy_dic_check_word_relation(int from, int to)
92 return anthy_word_dic_check_word_relation(master_dic_file, from, to);
96 do_get_seq_ent_from_xstr(xstr *xs, int is_reverse)
99 /* ¥¥ã¥Ã¥·¥å¤«¤é¼è¤ê½Ð¤¹ */
100 seq = anthy_cache_get_seq_ent(xs, is_reverse);
101 seq = anthy_validate_seq_ent(seq, xs, is_reverse);
103 /* ¿ô»ú¤Ê¤É¤Î¼½ñ¤Ë̵¤¤Ê¸»úÎó¤ò¸¡º÷¤¹¤ë */
104 return anthy_get_ext_seq_ent_from_xstr(xs, is_reverse);
115 /* ¡Ö¥ô¡×¤Î½Ð¸½¤ò¿ô¤¨¤ë */
116 for (i = 0; i < xs->len; i++) {
117 if (xs->str[i] == KK_VU) {
122 xstr *nx = malloc(sizeof(xstr));
123 nx->len = xs->len + v;
124 nx->str = malloc(sizeof(xchar)*nx->len);
126 /* ¡Ö¥ô¡×¤ò¡Ö¤¦¡«¡×¤ËÊÑ´¹¤·¤Ä¤Ä¥³¥Ô¡¼¤¹¤ë */
127 for (i = 0; i < xs->len; i++) {
128 if (xs->str[i] == KK_VU) {
131 nx->str[j] = HK_DDOT;
134 nx->str[j] = xs->str[i];
144 anthy_get_seq_ent_from_xstr(xstr *xs, int is_reverse)
152 xstr *nx = convert_vu(xs);
153 /* ¡Ö¥ô¡×¤Îº®¤¶¤Ã¤¿½çÊÑ´¹¤Î¾ì¹ç¡¢¡Ö¤¦¡«¡×¤Ëľ¤·¤Æ¸¡º÷¤¹¤ë
154 * ¾å°Ì¤Î¥ì¥¤¥ä¡¼¤Ç¤Ï¥æ¡¼¥¶¤ÎÍ¿¤¨¤¿Ê¸»úÎó¤ò¤½¤Î¤Þ¤ÞÊÝ»ý¤¹¤ë¤³¤È¤¬
155 * ´üÂÔ¤µ¤ì¤ë¤Î¤Ç¡¢ÊÑ´¹¤Ï¤³¤³¤Ç¹Ô¤Ê¤¦¡£
158 se = do_get_seq_ent_from_xstr(nx, 0);
163 /* ¡Ö¥ô¡×¤¬½Ð¸½¤·¤Ê¤¤¡¢¤â¤·¤¯¤ÏµÕÊÑ´¹¤Î¾ì¹ç */
164 return do_get_seq_ent_from_xstr(xs, is_reverse);
168 gang_elm_dtor(void *p)
170 struct gang_elm *ge = p;
175 find_gang_elm(allocator ator, struct gang_elm *head, xstr *xs)
177 char *str = anthy_xstr_to_cstr(xs, ANTHY_UTF8_ENCODING);
179 for (ge = head->tmp.next; ge; ge = ge->tmp.next) {
180 if (!strcmp(ge->key, str)) {
185 ge = anthy_smalloc(ator);
188 ge->tmp.next = head->tmp.next;
194 gang_elm_compare_func(const void *p1, const void *p2)
196 const struct gang_elm * const *s1 = p1;
197 const struct gang_elm * const *s2 = p2;
198 return strcmp((*s1)->key, (*s2)->key);
201 struct gang_scan_context {
204 struct gang_elm **array;
210 is_ext_ent(struct seq_ent *seq)
219 scan_misc_dic(struct gang_elm **array, int nr, int is_reverse)
222 for (i = 0; i < nr; i++) {
223 xstr *xs = &array[i]->xs;
225 seq = anthy_cache_get_seq_ent(xs, is_reverse);
226 /* ¸Ä¿Í¼½ñ¤«¤é¤Î¼èÆÀ(texttrie(µì·Á¼°)¤È̤Ãθ켽ñ) */
228 anthy_copy_words_from_private_dic(seq, xs, is_reverse);
229 anthy_validate_seq_ent(seq, xs, is_reverse);
235 load_word(xstr *xs, const char *n, int is_reverse)
237 struct seq_ent *seq = anthy_get_seq_ent_from_xstr(xs, 0);
241 if (!seq || is_ext_ent(seq)) {
242 seq = anthy_mem_dic_alloc_seq_ent_by_xstr(anthy_current_personal_dic_cache,
245 if (anthy_parse_word_line(n, &wl)) {
248 word_xs = anthy_cstr_to_xstr(wl.word, ANTHY_UTF8_ENCODING);
249 if (anthy_type_to_wtype(wl.wt, &wt)) {
250 anthy_mem_dic_push_back_dic_ent(seq, 0, word_xs, wt,
254 anthy_free_xstr(word_xs);
258 gang_scan(void *p, int offset, const char *key, const char *n)
260 struct gang_scan_context *gsc = p;
261 struct gang_elm *elm;
265 if (gsc->nth >= gsc->nr) {
268 elm = gsc->array[gsc->nth];
269 r = strcmp(elm->key, key);
272 load_word(&elm->xs, n, 0);
273 /* go next in dictionary */
276 /* go next in dictionary */
279 /* go next in lookup */
287 scan_dict(struct textdict *td, int nr, struct gang_elm **array)
289 struct gang_scan_context gsc;
293 anthy_textdict_scan(td, 0, &gsc, gang_scan);
297 struct gang_elm **array;
302 request_scan(struct textdict *td, void *arg)
304 struct scan_arg *sarg = (struct scan_arg *)arg;
305 scan_dict(td, sarg->nr, sarg->array);
309 do_gang_load_dic(xstr *sentence, int is_reverse)
311 allocator ator = anthy_create_allocator(sizeof(struct gang_elm),
316 struct gang_elm head;
317 struct gang_elm **array, *cur;
318 struct scan_arg sarg;
319 head.tmp.next = NULL;
321 for (from = 0; from < sentence->len ; from ++) {
322 for (len = 1; len < 32 && from + len <= sentence->len; len ++) {
323 xs.str = &sentence->str[from];
325 nr += find_gang_elm(ator, &head, &xs);
328 array = malloc(sizeof(struct gang_elm *) * nr);
330 for (i = 0; i < nr; i++) {
334 qsort(array, nr, sizeof(struct gang_elm *), gang_elm_compare_func);
336 anthy_gang_fill_seq_ent(master_dic_file, array, nr, is_reverse);
338 scan_misc_dic(array, nr, is_reverse);
339 /* ¸Ä¿Í¼½ñ¤«¤éÆɤà */
342 anthy_ask_scan(request_scan, (void *)&sarg);
345 anthy_free_allocator(ator);
349 anthy_gang_load_dic(xstr *sentence, int is_reverse)
352 if (!is_reverse && (nx = convert_vu(sentence))) {
353 do_gang_load_dic(nx, is_reverse);
356 do_gang_load_dic(sentence, is_reverse);
362 ************************
363 * seq_ent¤Î³Æ¼ï¾ðÊó¤Î¼èÆÀ
366 anthy_get_nr_dic_ents(seq_ent_t se, xstr *xs)
368 struct seq_ent *s = se;
373 return s->nr_dic_ents;
375 return s->nr_dic_ents + anthy_get_nr_dic_ents_of_ext_ent(se, xs);
379 anthy_get_nth_dic_ent_str(seq_ent_t se, xstr *orig,
382 if (!se || (n < 0)) { /* INDEPPAIR³Ø½¬¤Ë¤è¤ë¸ò´¹À褬¸«¤Ä¤«¤é¤Ê¤«¤Ã¤¿»þ¤ËÉÔÀµ¤Ê¥á¥â¥ê¥¢¥¯¥»¥¹¤ò¤¹¤ë¥Ð¥°¤Î½¤Àµ¡ÊÄ̾Ρ֤¤¤Á¤ª¤¯¡×¤Î·ï¡Ë */
383 x->str = NULL; /* ÉÔÀµ¤Ê¥á¥â¥ê¥¢¥¯¥»¥¹¤ä¥á¥â¥ê¤Î¿½Å²òÊü¤ò¤¹¤ë¥Ð¥°¤Î½¤Àµ */
387 if (n >= se->nr_dic_ents) {
388 return anthy_get_nth_dic_ent_str_of_ext_ent(se, orig,
389 n - se->nr_dic_ents, x);
391 x->len = se->dic_ents[n]->str.len;
392 x->str = anthy_xstr_dup_str(&se->dic_ents[n]->str);
397 anthy_get_nth_dic_ent_is_compound(seq_ent_t se, int nth)
402 if (nth >= se->nr_dic_ents) {
405 return se->dic_ents[nth]->is_compound;
409 anthy_get_nth_dic_ent_freq(seq_ent_t se, int nth)
411 struct seq_ent *s = se;
416 return anthy_get_nth_dic_ent_freq_of_ext_ent(se, nth);
418 if (s->nr_dic_ents <= nth) {
419 return anthy_get_nth_dic_ent_freq_of_ext_ent(se, nth - se->nr_dic_ents);
421 return s->dic_ents[nth]->freq;
425 anthy_get_nth_dic_ent_wtype(seq_ent_t se, xstr *xs,
428 struct seq_ent *s = se;
433 if (s->nr_dic_ents <= n) {
435 r = anthy_get_nth_dic_ent_wtype_of_ext_ent(xs, n - s->nr_dic_ents, w);
441 *w = s->dic_ents[n]->type;
446 anthy_get_seq_ent_pos(seq_ent_t se, int pos)
449 struct seq_ent *s = se;
453 if (s->nr_dic_ents == 0) {
454 return anthy_get_ext_seq_ent_pos(se, pos);
456 for (i = 0; i < s->nr_dic_ents; i++) {
457 if (anthy_wtype_get_pos(s->dic_ents[i]->type) == pos) {
458 v += s->dic_ents[i]->freq;
468 anthy_get_seq_ent_ct(seq_ent_t se, int pos, int ct)
471 struct seq_ent *s = se;
475 if (s->nr_dic_ents == 0) {
476 return anthy_get_ext_seq_ent_ct(s, pos, ct);
478 for (i = 0; i < s->nr_dic_ents; i++) {
479 if (anthy_wtype_get_pos(s->dic_ents[i]->type)== pos &&
480 anthy_wtype_get_ct(s->dic_ents[i]->type)==ct) {
481 v += s->dic_ents[i]->freq;
491 * wt¤ÎÉÊ»ì¤ò»ý¤Äñ¸ì¤ÎÃæ¤ÇºÇÂç¤ÎÉÑÅÙ¤ò»ý¤Ä¤â¤Î¤òÊÖ¤¹
494 anthy_get_seq_ent_wtype_freq(seq_ent_t seq, wtype_t wt)
502 if (seq->nr_dic_ents == 0) {
503 return anthy_get_ext_seq_ent_wtype(seq, wt);
508 for (i = 0; i < seq->nr_dic_ents; i++) {
509 if (seq->dic_ents[i]->order == 0 &&
510 anthy_wtype_include(wt, seq->dic_ents[i]->type)) {
511 if (f < seq->dic_ents[i]->freq) {
512 f = seq->dic_ents[i]->freq;
520 * wt¤ÎÉÊ»ì¤ò»ý¤ÄÊ£¹ç¸ì¤ÎÃæ¤ÇºÇÂç¤ÎÉÑÅÙ¤ò»ý¤Ä¤â¤Î¤òÊÖ¤¹
523 anthy_get_seq_ent_wtype_compound_freq(seq_ent_t se, wtype_t wt)
526 struct seq_ent *s = se;
532 for (i = 0; i < s->nr_dic_ents; i++) {
533 if (!anthy_get_nth_dic_ent_is_compound(se, i)) {
536 if (anthy_wtype_include(wt, s->dic_ents[i]->type)) {
537 if (f < s->dic_ents[i]->freq) {
538 f = s->dic_ents[i]->freq;
546 anthy_get_seq_ent_indep(seq_ent_t se)
549 struct seq_ent *s = se;
553 if (s->nr_dic_ents == 0) {
554 return anthy_get_ext_seq_ent_indep(s);
556 for (i = 0; i < s->nr_dic_ents; i++) {
557 if (anthy_wtype_get_indep(s->dic_ents[i]->type)) {
565 anthy_has_compound_ents(seq_ent_t se)
570 return se->nr_compound_ents;
573 /* compund¤Ç¤Ê¤¤¸õÊä¤ò»ý¤Ã¤Æ¤¤¤ë¤« */
575 anthy_has_non_compound_ents(seq_ent_t se)
580 if (se->nr_dic_ents == 0) {
583 return se->nr_dic_ents - se->nr_compound_ents;
587 anthy_get_nth_compound_ent(seq_ent_t se, int nth)
592 if (nth >= 0 && nth < se->nr_dic_ents) {
593 return se->dic_ents[nth];
598 struct elm_compound {
603 /* Í×ÁǤËÂбþ¤¹¤ëÆɤߤÎŤµ¤òÊÖ¤¹ */
605 get_element_len(xchar xc)
607 if (xc > '0' && xc <= '9') {
610 if (xc >= 'a' && xc <= 'z') {
611 return xc - 'a' + 10;
616 static struct elm_compound *
617 get_nth_elm_compound(compound_ent_t ce, struct elm_compound *elm, int nth)
621 for (i = 0; i <= nth; i++) {
622 /* nthÈÖÌܤÎÍ×ÁǤÎÀèƬ¤Ø°ÜÆ°¤¹¤ë */
623 while (!(ce->str.str[off] == '_' &&
624 get_element_len(ce->str.str[off+1]) > 0)) {
626 if (off + 1 >= ce->str.len) {
630 /* ¹½Â¤ÂΤؾðÊó¤ò¼è¤ê¹þ¤à */
631 elm->len = get_element_len(ce->str.str[off+1]);
632 elm->str.str = &ce->str.str[off+2];
633 elm->str.len = ce->str.len - off - 2;
634 for (j = 0; j < elm->str.len; j++) {
635 if (elm->str.str[j] == '_') {
646 anthy_compound_get_nr_segments(compound_ent_t ce)
648 struct elm_compound elm;
653 for (i = 0; get_nth_elm_compound(ce, &elm, i); i++);
658 anthy_compound_get_nth_segment_len(compound_ent_t ce, int nth)
660 struct elm_compound elm;
661 if (get_nth_elm_compound(ce, &elm, nth)) {
668 anthy_compound_get_nth_segment_xstr(compound_ent_t ce, int nth, xstr *xs)
670 struct elm_compound elm;
671 if (get_nth_elm_compound(ce, &elm, nth)) {
681 anthy_compound_get_wtype(compound_ent_t ce, wtype_t *w)
688 anthy_compound_get_freq(compound_ent_t ce)
693 /* ¥Õ¥í¥ó¥È¥¨¥ó¥É¤«¤é¸Æ¤Ð¤ì¤ë */
697 anthy_priv_dic_lock();
698 anthy_priv_dic_update();
701 /* ¥Õ¥í¥ó¥È¥¨¥ó¥É¤«¤é¸Æ¤Ð¤ì¤ë */
703 anthy_unlock_dic(void)
705 anthy_priv_dic_unlock();
710 anthy_dic_create_session(void)
712 return anthy_create_mem_dic();
716 anthy_dic_activate_session(dic_session_t d)
718 anthy_current_personal_dic_cache = d;
722 anthy_dic_release_session(dic_session_t d)
724 anthy_release_mem_dic(d);
728 anthy_dic_set_personality(const char *id)
730 anthy_current_record = anthy_create_record(id);
731 anthy_current_personal_dic_cache = anthy_create_mem_dic();
732 anthy_init_private_dic(id);
736 /** ¼½ñ¥µ¥Ö¥·¥¹¥Æ¥à¤ò½é´ü²½
741 if (dic_init_count) {
745 if (anthy_init_diclib() == -1) {
750 anthy_init_mem_dic();
752 anthy_init_ext_ent();
753 anthy_init_features();
755 anthy_init_word_dic();
756 master_dic_file = anthy_create_word_dic();
757 if (!master_dic_file) {
758 anthy_log(0, "Failed to create file dic.\n");
765 /** ¼½ñ¥µ¥Ö¥·¥¹¥Æ¥à¤ò¤¹¤Ù¤Æ²òÊü
771 if (dic_init_count) {
774 if (anthy_current_record) {
775 anthy_release_record(anthy_current_record);
777 anthy_release_private_dic();
778 anthy_current_record = NULL;
779 anthy_quit_mem_dic();