2 #include "e_kbd_dict.h"
9 static unsigned char _e_kbd_normalise_base[MAXLATIN];
10 static unsigned char _e_kbd_normalise_ready = 0;
13 _e_kbd_normalise_init(void)
16 const char *table[][2] =
83 if (_e_kbd_normalise_ready) return;
84 _e_kbd_normalise_ready = 1;
85 for (i = 0; i < 128; i++)
86 _e_kbd_normalise_base[i] = tolower(i);
87 for (;i < MAXLATIN; i++)
91 for (j = 0; j < 63; j++)
93 evas_string_char_next_get(table[j][0], 0, &glyph);
96 _e_kbd_normalise_base[i] = *table[j][1];
104 _e_kbd_dict_letter_normalise(int glyph)
106 // FIXME: ö -> o, ä -> a, Ó -> o etc. - ie normalise to latin-1
107 if (glyph < MAXLATIN) return _e_kbd_normalise_base[glyph];
108 return tolower(glyph) & 0x7f;
112 _e_kbd_dict_normalized_strncmp(const char *a, const char *b, int len)
114 // FIXME: normalise 2 strings and then compare
115 if (len < 0) return strcasecmp(a, b);
116 return strncasecmp(a, b, len);
120 _e_kbd_dict_normalized_strcmp(const char *a, const char *b)
122 return _e_kbd_dict_normalized_strncmp(a, b, -1);
126 _e_kbd_dict_normalized_strcpy(char *dst, const char *src)
131 for (p = src, d = dst; *p; p++, d++)
132 *d = _e_kbd_dict_letter_normalise(*p);
137 _e_kbd_dict_matches_lookup_cb_sort(const void *d1, const void *d2)
139 const E_Kbd_Dict_Word *kw1, *kw2;
143 if (kw1->usage < kw2->usage) return 1;
144 else if (kw1->usage > kw2->usage) return -1;
149 _e_kbd_dict_writes_cb_sort(const void *d1, const void *d2)
151 const E_Kbd_Dict_Word *kw1, *kw2;
155 return _e_kbd_dict_normalized_strcmp(kw1->word, kw2->word);
160 _e_kbd_dict_line_next(E_Kbd_Dict *kd, const char *p)
164 e = kd->file.dict + kd->file.size;
165 for (pp = p; pp < e; pp++)
166 if (*pp == '\n') return pp + 1;
171 _e_kbd_dict_line_parse(E_Kbd_Dict *kd __UNUSED__, const char *p, int *usage)
176 for (ps = p; !isspace(*ps); ps++);
177 wd = malloc(ps - p + 1);
178 if (!wd) return NULL;
179 strncpy(wd, p, ps - p);
181 if (*ps == '\n') *usage = 0;
191 _e_kbd_dict_lookup_build_line(E_Kbd_Dict *kd __UNUSED__, const char *p, const char *eol, int *glyphs)
196 s = alloca(eol - p + 1);
197 strncpy(s, p, eol - p);
199 p2 = evas_string_char_next_get(s, 0, &(glyphs[0]));
200 if ((p2 > 0) && (glyphs[0] > 0))
201 evas_string_char_next_get(s, p2, &(glyphs[1]));
205 _e_kbd_dict_lookup_build(E_Kbd_Dict *kd)
207 const char *p, *e, *eol;
208 int glyphs[2], pglyphs[2];
211 e = p + kd->file.size;
212 pglyphs[0] = pglyphs[1] = 0;
215 eol = strchr(p, '\n');
219 glyphs[0] = glyphs[1] = 0;
220 _e_kbd_dict_lookup_build_line(kd, p, eol, glyphs);
221 if ((glyphs[1] != pglyphs[1]) || (glyphs[0] != pglyphs[0]))
225 if (isspace(glyphs[0]))
230 else if (isspace(glyphs[1]))
234 pglyphs[0] = pglyphs[1] = 0;
238 v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
239 v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
240 if (!kd->lookup.tuples[v1][v2])
241 kd->lookup.tuples[v1][v2] = p;
251 _e_kbd_dict_open(E_Kbd_Dict *kd)
255 kd->file.fd = open(kd->file.file, O_RDONLY);
256 if (kd->file.fd < 0) return 0;
257 if (fstat(kd->file.fd, &st) < 0)
262 kd->file.size = st.st_size;
264 eina_mmap_safety_enabled_set(EINA_TRUE);
266 kd->file.dict = mmap(NULL, kd->file.size, PROT_READ, MAP_SHARED,
268 if ((kd->file.dict== MAP_FAILED) || (!kd->file.dict))
277 _e_kbd_dict_close(E_Kbd_Dict *kd)
279 if (kd->file.fd < 0) return;
280 memset(kd->lookup.tuples, 0, sizeof(kd->lookup.tuples));
281 munmap((void *)kd->file.dict, kd->file.size);
284 kd->file.dict = NULL;
289 e_kbd_dict_new(const char *file)
291 // alloc and load new dict - build quick-lookup table. words MUST be sorted
294 _e_kbd_normalise_init();
295 kd = E_NEW(E_Kbd_Dict, 1);
296 if (!kd) return NULL;
297 kd->file.file = eina_stringshare_add(file);
304 if (!_e_kbd_dict_open(kd))
306 eina_stringshare_del(kd->file.file);
310 _e_kbd_dict_lookup_build(kd);
315 e_kbd_dict_free(E_Kbd_Dict *kd)
317 // free dict and anything in it
318 e_kbd_dict_word_letter_clear(kd);
320 _e_kbd_dict_close(kd);
324 static E_Kbd_Dict_Word *
325 _e_kbd_dict_changed_write_find(E_Kbd_Dict *kd, const char *word)
329 for (l = kd->changed.writes; l; l = l->next)
334 if (!strcmp(kw->word, word)) return kw;
340 e_kbd_dict_save(E_Kbd_Dict *kd)
344 // save any changes (new words added, usage adjustments).
345 // all words MUST be sorted
346 if (!kd->changed.writes) return;
347 if (kd->changed.flush_timer)
349 ecore_timer_del(kd->changed.flush_timer);
350 kd->changed.flush_timer = NULL;
352 ecore_file_unlink(kd->file.file);
353 f = fopen(kd->file.file, "w");
354 kd->changed.writes = eina_list_sort(kd->changed.writes,
355 eina_list_count(kd->changed.writes),
356 _e_kbd_dict_writes_cb_sort);
367 pn = _e_kbd_dict_line_next(kd, p);
373 wd = _e_kbd_dict_line_parse(kd, p, &usage);
374 if ((wd) && (strlen(wd) > 0))
376 if (kd->changed.writes)
380 while (kd->changed.writes)
385 kw = kd->changed.writes->data;
386 cmp = _e_kbd_dict_normalized_strcmp(kw->word, wd);
389 fprintf(f, "%s %i\n", kw->word, kw->usage);
391 eina_stringshare_del(kw->word);
393 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
397 fprintf(f, "%s %i\n", wd, kw->usage);
398 if (!strcmp(kw->word, wd))
402 eina_stringshare_del(kw->word);
404 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
414 fprintf(f, "%s %i\n", wd, usage);
417 fprintf(f, "%s %i\n", wd, usage);
421 if (p >= (kd->file.dict + kd->file.size)) break;
423 while (kd->changed.writes)
427 kw = kd->changed.writes->data;
428 fprintf(f, "%s %i\n", kw->word, kw->usage);
429 eina_stringshare_del(kw->word);
431 kd->changed.writes = eina_list_remove_list(kd->changed.writes, kd->changed.writes);
435 _e_kbd_dict_close(kd);
436 if (_e_kbd_dict_open(kd)) _e_kbd_dict_lookup_build(kd);
440 _e_kbd_dict_cb_save_flush(void *data)
445 if ((kd->matches.list) || (kd->word.letters) || (kd->matches.deadends) ||
448 kd->changed.flush_timer = NULL;
454 _e_kbd_dict_changed_write_add(E_Kbd_Dict *kd, const char *word, int usage)
458 kw = E_NEW(E_Kbd_Dict_Word, 1);
459 kw->word = eina_stringshare_add(word);
461 kd->changed.writes = eina_list_prepend(kd->changed.writes, kw);
462 if (eina_list_count(kd->changed.writes) > 64)
466 if (kd->changed.flush_timer)
467 ecore_timer_del(kd->changed.flush_timer);
468 kd->changed.flush_timer =
469 ecore_timer_add(5.0, _e_kbd_dict_cb_save_flush, kd);
474 _e_kbd_dict_find_pointer(E_Kbd_Dict *kd, const char *p, int baselen, const char *word)
483 pn = _e_kbd_dict_line_next(kd, p);
484 if (!pn) return NULL;
487 if (!_e_kbd_dict_normalized_strncmp(p, word, len))
490 if (_e_kbd_dict_normalized_strncmp(p, word, baselen))
493 if (p >= (kd->file.dict + kd->file.size)) break;
499 _e_kbd_dict_find(E_Kbd_Dict *kd, const char *word)
503 int glyphs[2], p2, v1, v2, i;
505 /* work backwards in leads. i.e.:
512 tword = alloca(strlen(word) + 1);
513 _e_kbd_dict_normalized_strcpy(tword, word);
514 p = eina_hash_find(kd->matches.leads, tword);
519 p2 = evas_string_char_prev_get(tword, p2, &i);
522 p = eina_hash_find(kd->matches.leads, tword);
524 return _e_kbd_dict_find_pointer(kd, p, p2, word);
526 /* looking at leads going back letters didn't work */
528 if ((p[0] == '\n') && (kd->file.size <= 1)) return NULL;
529 glyphs[0] = glyphs[1] = 0;
530 p2 = evas_string_char_next_get(word, 0, &(glyphs[0]));
531 if ((p2 > 0) && (glyphs[0] > 0))
532 p2 = evas_string_char_next_get(word, p2, &(glyphs[1]));
533 v1 = _e_kbd_dict_letter_normalise(glyphs[0]);
536 v2 = _e_kbd_dict_letter_normalise(glyphs[1]);
537 p = kd->lookup.tuples[v1][v2];
541 for (i = 0; i < 128; i++)
543 p = kd->lookup.tuples[v1][i];
547 return _e_kbd_dict_find_pointer(kd, p, p2, word);
551 _e_kbd_dict_find_full(E_Kbd_Dict *kd, const char *word)
556 p = _e_kbd_dict_find(kd, word);
559 if (isspace(p[len])) return p;
564 e_kbd_dict_word_usage_adjust(E_Kbd_Dict *kd, const char *word, int adjust)
566 // add "adjust" to word usage count
569 kw = _e_kbd_dict_changed_write_find(kd, word);
573 if (kd->changed.flush_timer)
574 ecore_timer_del(kd->changed.flush_timer);
575 kd->changed.flush_timer = ecore_timer_add(5.0, _e_kbd_dict_cb_save_flush, kd);
582 line = _e_kbd_dict_find_full(kd, word);
587 // FIXME: we need to find an EXACT line match - case and all
588 wd = _e_kbd_dict_line_parse(kd, line, &usage);
592 _e_kbd_dict_changed_write_add(kd, word, usage);
597 e_kbd_dict_word_delete(E_Kbd_Dict *kd, const char *word)
599 // delete a word from the dictionary
602 kw = _e_kbd_dict_changed_write_find(kd, word);
607 if (_e_kbd_dict_find_full(kd, word))
608 _e_kbd_dict_changed_write_add(kd, word, -1);
613 e_kbd_dict_word_letter_clear(E_Kbd_Dict *kd)
615 // clear the current word buffer
616 while (kd->word.letters)
617 e_kbd_dict_word_letter_delete(kd);
618 if (kd->matches.deadends)
620 eina_hash_free(kd->matches.deadends);
621 kd->matches.deadends = NULL;
623 if (kd->matches.leads)
625 eina_hash_free(kd->matches.leads);
626 kd->matches.leads = NULL;
628 while (kd->matches.list)
632 kw = kd->matches.list->data;
633 eina_stringshare_del(kw->word);
635 kd->matches.list = eina_list_remove_list(kd->matches.list, kd->matches.list);
640 e_kbd_dict_word_letter_add(E_Kbd_Dict *kd, const char *letter, int dist)
642 // add a letter with a distance (0 == closest) as an option for the current
643 // letter position - advance starts a new letter position
645 E_Kbd_Dict_Letter *kl;
647 l = eina_list_last(kd->word.letters);
650 kl = E_NEW(E_Kbd_Dict_Letter, 1);
652 kl->letter = eina_stringshare_add(letter);
654 list = eina_list_append(list, kl);
659 e_kbd_dict_word_letter_advance(E_Kbd_Dict *kd)
661 // start a new letter in the word
662 kd->word.letters = eina_list_append(kd->word.letters, NULL);
666 e_kbd_dict_word_letter_delete(E_Kbd_Dict *kd)
668 // delete the current letter completely
671 l = eina_list_last(kd->word.letters);
676 E_Kbd_Dict_Letter *kl;
679 eina_stringshare_del(kl->letter);
681 list = eina_list_remove_list(list, list);
683 kd->word.letters = eina_list_remove_list(kd->word.letters, l);
687 _e_kbd_dict_matches_lookup_iter(E_Kbd_Dict *kd, Eina_List *word,
692 char *base, *buf, *wd, *bufapp;
693 E_Kbd_Dict_Letter *kl;
694 int len = 0, dist = 0, d, baselen, maxdist = 0, md;
695 static int level = 0;
698 for (l = word; l; l = l->next)
701 len += strlen(kl->letter);
703 if (kl->dist > maxdist) maxdist = kl->dist;
705 if (maxdist < 1) maxdist = 1;
706 buf = alloca(len + 20); // 20 - just padding enough for 1 more utf8 char
707 base = alloca(len + 20);
709 for (l = word; l; l = l->next)
712 strcat(base, kl->letter);
714 baselen = strlen(base);
716 bufapp = buf + baselen;
718 for (l = list; l; l = l->next)
721 if (kl->dist > maxdist) maxdist = kl->dist;
723 for (l = list; l; l = l->next)
726 strcpy(bufapp, kl->letter);
727 if ((kd->matches.deadends) && eina_hash_find(kd->matches.deadends, buf))
729 p = eina_hash_find(kd->matches.leads, buf);
730 if (p) p = _e_kbd_dict_find_pointer(kd, p, baselen, buf);
731 else p = _e_kbd_dict_find(kd, buf);
734 if (!kd->matches.deadends)
735 kd->matches.deadends = eina_hash_string_superfast_new(NULL);
736 eina_hash_add(kd->matches.deadends, buf, kd);
741 if (!kd->matches.leads)
742 kd->matches.leads = eina_hash_string_superfast_new(NULL);
743 eina_hash_add(kd->matches.leads, buf, p);
745 if ((!more->next) || (!more->next->data))
754 wd = _e_kbd_dict_line_parse(kd, p, &usage);
756 if (_e_kbd_dict_normalized_strcmp(wd, buf))
761 kw = E_NEW(E_Kbd_Dict_Word, 1);
765 int w, b, w2, b2, wc, bc;
767 // match any capitalisation
768 for (w = 0, b = 0; wd[w] && buf[b];)
770 b2 = evas_string_char_next_get(buf, b, &bc);
771 w2 = evas_string_char_next_get(wd, w, &wc);
772 if (isupper(bc)) wd[w] = toupper(wc);
776 kw->word = eina_stringshare_add(wd);
777 // FIXME: magic combination of distance metric and
778 // frequency of usage. this is simple now, but could
780 wc = eina_list_count(word);
783 // basically a metric to see how far away the keys that
784 // were actually pressed are away from the letters of
785 // this word in a physical on-screen sense
786 accuracy = md - (d / (wc + 1));
787 // usage is the frequency of usage in the dictionary.
788 // it its < 1 time, it's assumed to be 1.
789 if (usage < 1) usage = 1;
790 // multiply usage by a factor of 100 for better detailed
791 // sorting. 10 == 1/10th factor
792 usage = 100 + ((usage - 1) * 10);
793 // and well just multiply and lets see. maybe this can
794 // do with multiplication factors etc. but simple for
796 kw->usage = (usage * accuracy) / md;
797 kd->matches.list = eina_list_append(kd->matches.list, kw);
800 p = _e_kbd_dict_line_next(kd, p);
801 if (p >= (kd->file.dict + kd->file.size)) break;
807 word = eina_list_append(word, kl);
808 _e_kbd_dict_matches_lookup_iter(kd, word, more->next);
809 word = eina_list_remove_list(word, eina_list_last(word));
816 e_kbd_dict_matches_lookup(E_Kbd_Dict *kd)
818 // find all matches and sort them
819 while (kd->matches.list)
823 kw = kd->matches.list->data;
824 eina_stringshare_del(kw->word);
826 kd->matches.list = eina_list_remove_list(kd->matches.list, kd->matches.list);
828 if (kd->word.letters)
829 _e_kbd_dict_matches_lookup_iter(kd, NULL, kd->word.letters);
830 kd->matches.list = eina_list_sort(kd->matches.list,
831 eina_list_count(kd->matches.list),
832 _e_kbd_dict_matches_lookup_cb_sort);
836 e_kbd_dict_matches_first(E_Kbd_Dict *kd)
838 // jump to first match
839 kd->matches.list_ptr = kd->matches.list;
843 e_kbd_dict_matches_next(E_Kbd_Dict *kd)
845 // jump to next match
846 kd->matches.list_ptr = kd->matches.list_ptr->next;
850 e_kbd_dict_matches_match_get(E_Kbd_Dict *kd, int *pri_ret)
852 // return the word (string utf-8) for the current match
853 if (kd->matches.list_ptr)
857 kw = kd->matches.list_ptr->data;
860 *pri_ret = kw->usage;