2 #include "e_kbd_dict.h"
9 static unsigned char _e_kbd_normalise_base[MAXLATIN];
10 static unsigned char _e_kbd_normalise_ready = 0;
13 _e_kbd_normalise_init(void)
16 const char *table[][2] =
83 if (_e_kbd_normalise_ready) return;
84 _e_kbd_normalise_ready = 1;
85 for (i = 0; i < 128; i++)
86 _e_kbd_normalise_base[i] = tolower(i);
87 for (;i < MAXLATIN; i++)
91 for (j = 0; j < 63; j++)
93 evas_string_char_next_get(table[j][0], 0, &glyph);
96 _e_kbd_normalise_base[i] = *table[j][1];
104 _e_kbd_dict_letter_normalise(int glyph)
106 // FIXME: ö -> o, ä -> a, Ó -> o etc. - ie normalise to latin-1
107 if (glyph < MAXLATIN) return _e_kbd_normalise_base[glyph];
108 return tolower(glyph) & 0x7f;
112 _e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
114 // FIXME: normalise 2 strings and then compare
115 if (len < 0) return strcasecmp(a, b);
116 return strncasecmp(a, b, len);
120 _e_kbd_dict_normalized_strcmp(const char *a, const char *b)
122 return _e_kbd_dict_normalized_strncmp(a, b, -1);
126 _e_kbd_dict_normalized_strcpy(char *dst, const char *src)
131 for (p = src, d = dst; *p; p++, d++)
132 *d = _e_kbd_dict_letter_normalise(*p);
137 _e_kbd_dict_matches_lookup_cb_sort(const void *d1, const void *d2)
139 const E_Kbd_Dict_Word *kw1, *kw2;
143 if (kw1->usage < kw2->usage) return 1;
144 else if (kw1->usage > kw2->usage) return -1;
149 _e_kbd_dict_writes_cb_sort(const void *d1, const void *d2)
151 const E_Kbd_Dict_Word *kw1, *kw2;
155 return _e_kbd_dict_normalized_strcmp(kw1->word, kw2->word);
160 _e_kbd_dict_line_next(E_Kbd_Dict *kd, const char *p)
164 e = kd->file.dict + kd->file.size;
165 for (pp = p; pp < e; pp++)
166 if (*pp == '\n') return pp + 1;
171 _e_kbd_dict_line_parse(E_Kbd_Dict *kd __UNUSED__, const char *p, int *usage)
176 for (ps = p; !isspace(*ps); ps++);
177 wd = malloc(ps - p + 1);
178 if (!wd) return NULL;
179 strncpy(wd, p, ps - p);
181 if (*ps == '\n') *usage = 0;
191 _e_kbd_dict_lookup_build_line(E_Kbd_Dict *kd __UNUSED__, const char *p, const char *eol, int *glyphs)
196 s = alloca(eol - p + 1);
197 strncpy(s, p, eol - p);
199 p2 = evas_string_char_next_get(s, 0, &(glyphs[0]));
200 if ((p2 > 0) && (glyphs[0] > 0))
201 evas_string_char_next_get(s, p2, &(glyphs[1]));
205 _e_kbd_dict_lookup_build(E_Kbd_Dict *kd)
207 const char *p, *e, *eol;
208 int glyphs[2], pglyphs[2];
211 e = p + kd->file.size;
212 pglyphs[0] = pglyphs[1] = 0;
215 eol = strchr(p, '\n');
219 glyphs[0] = glyphs[1] = 0;
220 _e_kbd_dict_lookup_build_line(kd, p, eol, glyphs);
221 if ((glyphs[1] != pglyphs[1]) || (glyphs[0] != pglyphs[0]))
225 if (isspace(glyphs[0]))
230 else if (isspace(glyphs[1]))
234 pglyphs[0] = pglyphs[1] = 0;
238 v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
239 v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
240 if (!kd->lookup.tuples[v1][v2])
241 kd->lookup.tuples[v1][v2] = p;
251 _e_kbd_dict_open(E_Kbd_Dict *kd)
255 kd->file.fd = open(kd->file.file, O_RDONLY);
256 if (kd->file.fd < 0) return 0;
257 if (fstat(kd->file.fd, &st) < 0)
262 kd->file.size = st.st_size;
263 kd->file.dict = mmap(NULL, kd->file.size, PROT_READ, MAP_SHARED,
265 if ((kd->file.dict== MAP_FAILED) || (!kd->file.dict))
274 _e_kbd_dict_close(E_Kbd_Dict *kd)
276 if (kd->file.fd < 0) return;
277 memset(kd->lookup.tuples, 0, sizeof(kd->lookup.tuples));
278 munmap((void *)kd->file.dict, kd->file.size);
281 kd->file.dict = NULL;
286 e_kbd_dict_new(const char *file)
288 // alloc and load new dict - build quick-lookup table. words MUST be sorted
291 _e_kbd_normalise_init();
292 kd = E_NEW(E_Kbd_Dict, 1);
293 if (!kd) return NULL;
294 kd->file.file = eina_stringshare_add(file);
301 if (!_e_kbd_dict_open(kd))
303 eina_stringshare_del(kd->file.file);
307 _e_kbd_dict_lookup_build(kd);
312 e_kbd_dict_free(E_Kbd_Dict *kd)
314 // free dict and anything in it
315 e_kbd_dict_word_letter_clear(kd);
317 _e_kbd_dict_close(kd);
321 static E_Kbd_Dict_Word *
322 _e_kbd_dict_changed_write_find(E_Kbd_Dict *kd, const char *word)
326 for (l = kd->changed.writes; l; l = l->next)
331 if (!strcmp(kw->word, word)) return kw;
337 e_kbd_dict_save(E_Kbd_Dict *kd)
341 // save any changes (new words added, usage adjustments).
342 // all words MUST be sorted
343 if (!kd->changed.writes) return;
344 if (kd->changed.flush_timer)
346 ecore_timer_del(kd->changed.flush_timer);
347 kd->changed.flush_timer = NULL;
349 ecore_file_unlink(kd->file.file);
350 f = fopen(kd->file.file, "w");
351 kd->changed.writes = eina_list_sort(kd->changed.writes,
352 eina_list_count(kd->changed.writes),
353 _e_kbd_dict_writes_cb_sort);
364 pn = _e_kbd_dict_line_next(kd, p);
370 wd = _e_kbd_dict_line_parse(kd, p, &usage);
371 if ((wd) && (strlen(wd) > 0))
373 if (kd->changed.writes)
377 while (kd->changed.writes)
382 kw = kd->changed.writes->data;
383 cmp = _e_kbd_dict_normalized_strcmp(kw->word, wd);
386 fprintf(f, "%s %i\n", kw->word, kw->usage);
388 eina_stringshare_del(kw->word);
390 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
394 fprintf(f, "%s %i\n", wd, kw->usage);
395 if (!strcmp(kw->word, wd))
399 eina_stringshare_del(kw->word);
401 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
411 fprintf(f, "%s %i\n", wd, usage);
414 fprintf(f, "%s %i\n", wd, usage);
418 if (p >= (kd->file.dict + kd->file.size)) break;
420 while (kd->changed.writes)
424 kw = kd->changed.writes->data;
425 fprintf(f, "%s %i\n", kw->word, kw->usage);
426 eina_stringshare_del(kw->word);
428 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
432 _e_kbd_dict_close(kd);
433 if (_e_kbd_dict_open(kd)) _e_kbd_dict_lookup_build(kd);
437 _e_kbd_dict_cb_save_flush(void *data)
442 if ((kd->matches.list) || (kd->word.letters) || (kd->matches.deadends) ||
445 kd->changed.flush_timer = NULL;
451 _e_kbd_dict_changed_write_add(E_Kbd_Dict *kd, const char *word, int usage)
455 kw = E_NEW(E_Kbd_Dict_Word, 1);
456 kw->word = eina_stringshare_add(word);
458 kd->changed.writes = eina_list_prepend(kd->changed.writes, kw);
459 if (eina_list_count(kd->changed.writes) > 64)
463 if (kd->changed.flush_timer)
464 ecore_timer_del(kd->changed.flush_timer);
465 kd->changed.flush_timer =
466 ecore_timer_add(5.0, _e_kbd_dict_cb_save_flush, kd);
471 _e_kbd_dict_find_pointer(E_Kbd_Dict *kd, const char *p, int baselen, const char *word)
480 pn = _e_kbd_dict_line_next(kd, p);
481 if (!pn) return NULL;
484 if (!_e_kbd_dict_normalized_strncmp(p, word, len))
487 if (_e_kbd_dict_normalized_strncmp(p, word, baselen))
490 if (p >= (kd->file.dict + kd->file.size)) break;
496 _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
500 int glyphs[2], p2, v1, v2, i;
502 /* work backwards in leads. i.e.:
509 tword = alloca(strlen(word) + 1);
510 _e_kbd_dict_normalized_strcpy(tword, word);
511 p = eina_hash_find(kd->matches.leads, tword);
516 p2 = evas_string_char_prev_get(tword, p2, &i);
519 p = eina_hash_find(kd->matches.leads, tword);
521 return _e_kbd_dict_find_pointer(kd, p, p2, word);
523 /* looking at leads going back letters didn't work */
525 if ((p[0] == '\n') && (kd->file.size <= 1)) return NULL;
526 glyphs[0] = glyphs[1] = 0;
527 p2 = evas_string_char_next_get(word, 0, &(glyphs[0]));
528 if ((p2 > 0) && (glyphs[0] > 0))
529 p2 = evas_string_char_next_get(word, p2, &(glyphs[1]));
530 v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
533 v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
534 p = kd->lookup.tuples[v1][v2];
538 for (i = 0; i < 128; i++)
540 p = kd->lookup.tuples[v1][i];
544 return _e_kbd_dict_find_pointer(kd, p, p2, word);
548 _e_kbd_dict_find_full(E_Kbd_Dict *kd, const char *word)
553 p = _e_kbd_dict_find(kd, word);
556 if (isspace(p[len])) return p;
561 e_kbd_dict_word_usage_adjust(E_Kbd_Dict *kd, const char *word, int adjust)
563 // add "adjust" to word usage count
566 kw = _e_kbd_dict_changed_write_find(kd, word);
570 if (kd->changed.flush_timer)
571 ecore_timer_del(kd->changed.flush_timer);
572 kd->changed.flush_timer = ecore_timer_add(5.0, _e_kbd_dict_cb_save_flush, kd);
579 line = _e_kbd_dict_find_full(kd, word);
584 // FIXME: we need to find an EXACT line match - case and all
585 wd = _e_kbd_dict_line_parse(kd, line, &usage);
589 _e_kbd_dict_changed_write_add(kd, word, usage);
594 e_kbd_dict_word_delete(E_Kbd_Dict *kd, const char *word)
596 // delete a word from the dictionary
599 kw = _e_kbd_dict_changed_write_find(kd, word);
604 if (_e_kbd_dict_find_full(kd, word))
605 _e_kbd_dict_changed_write_add(kd, word, -1);
610 e_kbd_dict_word_letter_clear(E_Kbd_Dict *kd)
612 // clear the current word buffer
613 while (kd->word.letters)
614 e_kbd_dict_word_letter_delete(kd);
615 if (kd->matches.deadends)
617 eina_hash_free(kd->matches.deadends);
618 kd->matches.deadends = NULL;
620 if (kd->matches.leads)
622 eina_hash_free(kd->matches.leads);
623 kd->matches.leads = NULL;
625 while (kd->matches.list)
629 kw = kd->matches.list->data;
630 eina_stringshare_del(kw->word);
632 kd->matches.list = eina_list_remove_list(kd->matches.list, kd->matches.list);
637 e_kbd_dict_word_letter_add(E_Kbd_Dict *kd, const char *letter, int dist)
639 // add a letter with a distance (0 == closest) as an option for the current
640 // letter position - advance starts a new letter position
642 E_Kbd_Dict_Letter *kl;
644 l = eina_list_last(kd->word.letters);
647 kl = E_NEW(E_Kbd_Dict_Letter, 1);
649 kl->letter = eina_stringshare_add(letter);
651 list = eina_list_append(list, kl);
656 e_kbd_dict_word_letter_advance(E_Kbd_Dict *kd)
658 // start a new letter in the word
659 kd->word.letters = eina_list_append(kd->word.letters, NULL);
663 e_kbd_dict_word_letter_delete(E_Kbd_Dict *kd)
665 // delete the current letter completely
668 l = eina_list_last(kd->word.letters);
673 E_Kbd_Dict_Letter *kl;
676 eina_stringshare_del(kl->letter);
678 list = eina_list_remove_list(list, list);
680 kd->word.letters = eina_list_remove_list(kd->word.letters, l);
684 _e_kbd_dict_matches_lookup_iter(E_Kbd_Dict *kd, Eina_List *word,
689 char *base, *buf, *wd, *bufapp;
690 E_Kbd_Dict_Letter *kl;
691 int len = 0, dist = 0, d, baselen, maxdist = 0, md;
692 static int level = 0;
695 for (l = word; l; l = l->next)
698 len += strlen(kl->letter);
700 if (kl->dist > maxdist) maxdist = kl->dist;
702 if (maxdist < 1) maxdist = 1;
703 buf = alloca(len + 20); // 20 - just padding enough for 1 more utf8 char
704 base = alloca(len + 20);
706 for (l = word; l; l = l->next)
709 strcat(base, kl->letter);
711 baselen = strlen(base);
713 bufapp = buf + baselen;
715 for (l = list; l; l = l->next)
718 if (kl->dist > maxdist) maxdist = kl->dist;
720 for (l = list; l; l = l->next)
723 strcpy(bufapp, kl->letter);
724 if ((kd->matches.deadends) && eina_hash_find(kd->matches.deadends, buf))
726 p = eina_hash_find(kd->matches.leads, buf);
727 if (p) p = _e_kbd_dict_find_pointer(kd, p, baselen, buf);
728 else p = _e_kbd_dict_find(kd, buf);
731 if (!kd->matches.deadends)
732 kd->matches.deadends = eina_hash_string_superfast_new(NULL);
733 eina_hash_add(kd->matches.deadends, buf, kd);
738 if (!kd->matches.leads)
739 kd->matches.leads = eina_hash_string_superfast_new(NULL);
740 eina_hash_add(kd->matches.leads, buf, p);
742 if ((!more->next) || (!more->next->data))
751 wd = _e_kbd_dict_line_parse(kd, p, &usage);
753 if (_e_kbd_dict_normalized_strcmp(wd, buf))
758 kw = E_NEW(E_Kbd_Dict_Word, 1);
762 int w, b, w2, b2, wc, bc;
764 // match any capitalisation
765 for (w = 0, b = 0; wd[w] && buf[b];)
767 b2 = evas_string_char_next_get(buf, b, &bc);
768 w2 = evas_string_char_next_get(wd, w, &wc);
769 if (isupper(bc)) wd[w] = toupper(wc);
773 kw->word = eina_stringshare_add(wd);
774 // FIXME: magic combination of distance metric and
775 // frequency of usage. this is simple now, but could
777 wc = eina_list_count(word);
780 // basically a metric to see how far away the keys that
781 // were actually pressed are away from the letters of
782 // this word in a physical on-screen sense
783 accuracy = md - (d / (wc + 1));
784 // usage is the frequency of usage in the dictionary.
785 // it its < 1 time, it's assumed to be 1.
786 if (usage < 1) usage = 1;
787 // multiply usage by a factor of 100 for better detailed
788 // sorting. 10 == 1/10th factor
789 usage = 100 + ((usage - 1) * 10);
790 // and well just multiply and lets see. maybe this can
791 // do with multiplication factors etc. but simple for
793 kw->usage = (usage * accuracy) / md;
794 kd->matches.list = eina_list_append(kd->matches.list, kw);
797 p = _e_kbd_dict_line_next(kd, p);
798 if (p >= (kd->file.dict + kd->file.size)) break;
804 word = eina_list_append(word, kl);
805 _e_kbd_dict_matches_lookup_iter(kd, word, more->next);
806 word = eina_list_remove_list(word, eina_list_last(word));
813 e_kbd_dict_matches_lookup(E_Kbd_Dict *kd)
815 // find all matches and sort them
816 while (kd->matches.list)
820 kw = kd->matches.list->data;
821 eina_stringshare_del(kw->word);
823 kd->matches.list = eina_list_remove_list(kd->matches.list, kd->matches.list);
825 if (kd->word.letters)
826 _e_kbd_dict_matches_lookup_iter(kd, NULL, kd->word.letters);
827 kd->matches.list = eina_list_sort(kd->matches.list,
828 eina_list_count(kd->matches.list),
829 _e_kbd_dict_matches_lookup_cb_sort);
833 e_kbd_dict_matches_first(E_Kbd_Dict *kd)
835 // jump to first match
836 kd->matches.list_ptr = kd->matches.list;
840 e_kbd_dict_matches_next(E_Kbd_Dict *kd)
842 // jump to next match
843 kd->matches.list_ptr = kd->matches.list_ptr->next;
847 e_kbd_dict_matches_match_get(E_Kbd_Dict *kd, int *pri_ret)
849 // return the word (string utf-8) for the current match
850 if (kd->matches.list_ptr)
854 kw = kd->matches.list_ptr->data;
857 *pri_ret = kw->usage;