1 // This file is part of The New Aspell Copyright (C)
2 // 2002,2003,2004,2011 by Kevin Atkinson under the GNU LGPL license
3 // version 2.0 or 2.1. You should have received a copy of the LGPL
4 // license along with this library if you did not you can find it at
5 // http://www.gnu.org/.
8 // NOTE: This program currently uses a very ugly mix of the internal
9 // API and the external C interface. The eventual goal is to
10 // use only the external C++ interface, however, the external
11 // C++ interface is currently incomplete. The C interface is
12 // used in some places because without the strings will not get
13 // converted properly when the encoding is not the same as the
14 // internal encoding used by Aspell.
24 #ifdef HAVE_LANGINFO_CODESET
25 # include <langinfo.h>
31 # include <sys/types.h>
32 # include <sys/stat.h>
37 #include "asc_ctype.hpp"
38 #include "check_funs.hpp"
40 #include "convert.hpp"
41 #include "document_checker.hpp"
42 #include "enumeration.hpp"
44 #include "file_util.hpp"
45 #include "fstream.hpp"
47 #include "iostream.hpp"
48 #include "posib_err.hpp"
49 #include "speller.hpp"
50 #include "stack_ptr.hpp"
51 #include "string_enumeration.hpp"
52 #include "string_map.hpp"
53 #include "word_list.hpp"
55 #include "string_list.hpp"
56 #include "speller_impl.hpp"
60 #include "hash_fun.hpp"
64 using namespace acommon;
68 // action functions declarations
71 void print_help(bool verbose = false);
95 void print_error(ParmString msg)
97 CERR.printf(_("Error: %s\n"), msg.str());
100 void print_error(ParmString msg, ParmString str)
102 CERR.put(_("Error: "));
103 CERR.printf(msg.str(), str.str());
107 #define EXIT_ON_ERR(command) \
108 do{PosibErrBase pe(command);\
109 if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
111 #define EXIT_ON_ERR_SET(command, type, var)\
113 do{PosibErr< type > pe(command);\
114 if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
117 #define BREAK_ON_ERR(command) \
118 do{PosibErrBase pe(command);\
119 if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
121 #define BREAK_ON_ERR_SET(command, type, var)\
123 do{PosibErr< type > pe(command);\
124 if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
129 /////////////////////////////////////////////////////////
131 // Command line options functions and classes
135 typedef Vector<String> Args;
136 typedef Config Options;
137 enum Action {do_create, do_merge, do_dump, do_test, do_other};
140 StackPtr<Options> options;
141 Action action = do_other;
143 struct PossibleOption {
150 #define OPTION(name,abrv,num) {name,abrv,num,false}
151 #define COMMAND(name,abrv,num) {name,abrv,num,true}
152 #define ISPELL_COMP(abrv,num) {"",abrv,num,false}
154 const PossibleOption possible_options[] = {
155 OPTION("master", 'd', 1),
156 OPTION("personal", 'p', 1),
157 OPTION("ignore", 'W', 1),
158 OPTION("lang", 'l', 1),
159 OPTION("backup", 'b', 0),
160 OPTION("dont-backup", 'x', 0),
161 OPTION("run-together", 'C', 0),
162 OPTION("dont-run-together",'B', 0),
163 OPTION("guess", 'm', 0),
164 OPTION("dont-guess", 'P', 0),
166 COMMAND("usage", '?', 0),
167 COMMAND("help", '\0', 0),
168 COMMAND("version", 'v', 0),
169 COMMAND("config", '\0', 0),
170 COMMAND("dicts", '\0', 0),
171 COMMAND("check", 'c', 0),
172 COMMAND("pipe", 'a', 0),
173 COMMAND("list", '\0', 0),
174 COMMAND("conv", '\0', 2),
175 COMMAND("norm", '\0', 1),
176 COMMAND("filter", '\0', 0),
177 COMMAND("soundslike",'\0', 0),
178 COMMAND("munch", '\0', 0),
179 COMMAND("expand", '\0', 0),
180 COMMAND("combine", '\0', 0),
181 COMMAND("munch-list",'\0', 0),
182 COMMAND("clean", '\0', 0),
183 COMMAND("filters", '\0', 0),
184 COMMAND("modes", '\0', 0),
186 COMMAND("dump", '\0', 1),
187 COMMAND("create", '\0', 1),
188 COMMAND("merge", '\0', 1),
190 ISPELL_COMP('S',0), ISPELL_COMP('w',1), ISPELL_COMP('T',1),
195 const PossibleOption * possible_options_end = possible_options + sizeof(possible_options)/sizeof(PossibleOption) - 2;
202 static const ModeAbrv mode_abrvs[] = {
203 {'e', "mode=email", N_("enter Email mode.")},
204 {'H', "mode=html", N_("enter HTML mode.")},
205 {'t', "mode=tex", N_("enter TeX mode.")},
206 {'n', "mode=nroff", N_("enter Nroff mode.")}
209 static const ModeAbrv * mode_abrvs_end = mode_abrvs + 4;
211 const PossibleOption * find_option(char c) {
212 const PossibleOption * i = possible_options;
213 while (i != possible_options_end && i->abrv != c)
218 static inline bool str_equal(const char * begin, const char * end,
221 while(begin != end && *begin == *other)
223 return (begin == end && *other == '\0');
226 static const PossibleOption * find_option(const char * begin, const char * end) {
227 const PossibleOption * i = possible_options;
228 while (i != possible_options_end
229 && !str_equal(begin, end, i->name))
234 static const PossibleOption * find_option(const char * str) {
235 const PossibleOption * i = possible_options;
236 while (i != possible_options_end
237 && !strcmp(str, i->name) == 0)
242 static void line_buffer() {
244 // set up stdin and stdout to be line buffered
245 assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
246 assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
253 int main (int argc, const char *argv[])
255 options = new_config(); // this needs to be here becuase of a bug
256 // with static initlizers on Darwin.
258 setlocale (LC_ALL, "");
260 aspell_gettext_init();
262 options->set_committed_state(false);
264 if (argc == 1) {print_help(); return 0;}
267 const PossibleOption * o;
271 // process command line options by setting the appropriate options
272 // in "options" and/or pushing non-options onto "argv"
274 PossibleOption other_opt = OPTION("",'\0',0);
277 if (argv[i][0] == '-') {
278 bool have_parm = false;
279 if (argv[i][1] == '-') {
281 const char * c = argv[i] + 2;
282 while(*c != '=' && *c != '\0') ++c;
283 o = find_option(argv[i] + 2, c);
284 if (o == possible_options_end) {
285 option_name.assign(argv[i] + 2, c - argv[i] - 2);
286 other_opt.name = option_name.c_str();
287 other_opt.num_arg = -1;
290 if (*c == '=') {have_parm = true; ++c;}
294 const ModeAbrv * j = mode_abrvs;
295 while (j != mode_abrvs_end && j->abrv != argv[i][1]) ++j;
296 if (j == mode_abrvs_end) {
297 o = find_option(argv[i][1]);
298 if (argv[i][1] == 'v' && argv[i][2] == 'v')
303 } else { // mode option
304 other_opt.name = "mode";
305 other_opt.num_arg = 1;
309 if (*parm) have_parm = true;
311 if (o == possible_options_end) {
312 print_error(_("Invalid Option: %s"), argv[i]);
316 if (o->num_arg == 0) {
318 if (parm[0] != '\0') {
319 print_error(_(" does not take any parameters."),
320 String(argv[i], parm - argv[i]));
324 } else if (have_parm) {
327 } else if (i + 1 == argc || argv[i+1][0] == '-') {
328 if (o->num_arg == -1) {
332 print_error(_("You must specify a parameter for \"%s\"."), argv[i]);
336 num_parms = o->num_arg;
341 args.push_back(o->name);
343 args.push_back(parm);
344 } else if (o->name[0] != '\0') {
345 Config::Entry * entry = new Config::Entry;
346 entry->key = o->name;
348 entry->need_conv = true;
349 if (num_parms == -1) {
350 entry->place_holder = args.size();
351 args.push_back(parm);
356 args.push_back(argv[i]);
361 options->read_in_settings();
363 const char * codeset = 0;
364 #ifdef HAVE_LANGINFO_CODESET
365 codeset = nl_langinfo(CODESET);
366 if (ascii_encoding(*options, codeset)) codeset = 0;
370 // if (!options->have("encoding") && codeset)
371 // EXIT_ON_ERR(options->replace("encoding", codeset));
374 Vector<int> to_remove;
375 EXIT_ON_ERR(options->commit_all(&to_remove, codeset));
376 for (int i = to_remove.size() - 1; i >= 0; --i) {
377 args.erase(args.begin() + to_remove[i]);
381 print_error(_("You must specify an action"));
385 String action_str = args.front();
387 const PossibleOption * action_opt = find_option(action_str.str());
388 if (!action_opt->is_command) {
389 print_error(_("Unknown Action: %s"), action_str);
391 } else if (action_opt->num_arg == 1 && args.empty()) {
392 print_error(_("You must specify a parameter for \"%s\"."), action_str);
394 } else if (action_opt->num_arg > (int)args.size()) {
395 CERR.printf(_("Error: You must specify at least %d parameters for \"%s\".\n"),
396 action_opt->num_arg, action_str.str());
401 // perform the requested action
403 if (action_str == "usage")
405 else if (action_str == "help")
407 else if (action_str == "version")
409 else if (action_str == "config")
411 else if (action_str == "dicts")
413 else if (action_str == "check")
415 else if (action_str == "pipe")
417 else if (action_str == "list")
419 else if (action_str == "conv")
421 else if (action_str == "norm")
423 else if (action_str == "filter")
425 else if (action_str == "soundslike")
427 else if (action_str == "munch")
429 else if (action_str == "expand")
431 else if (action_str == "combine")
433 else if (action_str == "munch-list")
435 else if (action_str == "clean")
437 else if (action_str == "filters")
439 else if (action_str == "modes")
441 else if (action_str == "dump")
443 else if (action_str == "create")
445 else if (action_str == "merge")
448 abort(); // this should not happen
450 if (action != do_other) {
452 print_error(_("Unknown Action: %s"), action_str);
455 String what_str = args.front();
457 if (what_str == "config")
459 else if (what_str == "dicts")
461 else if (what_str == "filters")
463 else if (what_str == "modes")
465 else if (what_str == "master")
467 else if (what_str == "personal")
469 else if (what_str == "repl")
471 else if (what_str == "affix")
474 print_error(_("Unknown Action: %s"),
475 String(action_str + " " + what_str));
485 /////////////////////////////////////////////////////////
492 static Convert * setup_conv(const aspeller::Language * lang,
495 if (config->retrieve("encoding") != "none") {
496 PosibErr<Convert *> pe = new_convert_if_needed(*config,
498 config->retrieve("encoding"),
500 if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
507 static Convert * setup_conv(Config * config,
508 const aspeller::Language * lang)
510 if (config->retrieve("encoding") != "none") {
511 PosibErr<Convert *> pe = new_convert_if_needed(*config,
512 config->retrieve("encoding"),
515 if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
522 void setup_display_conv()
524 const char * gettext_enc = 0;
525 const char * env_enc = 0;
526 String doc_enc = options->retrieve("encoding");
529 gettext_enc = bind_textdomain_codeset("aspell", 0);
530 if (ascii_encoding(*options,gettext_enc)) gettext_enc = 0;
532 #ifdef HAVE_LANGINFO_CODESET
533 env_enc = nl_langinfo(CODESET);
534 if (ascii_encoding(*options, env_enc)) env_enc = 0;
536 if (gettext_enc && env_enc && strcmp(gettext_enc,env_enc) != 0)
538 fputs(("Error: bind_textdomain_codeset != nl_langinfo(CODESET)\n"), stderr);
548 EXIT_ON_ERR(dconv.setup(*options, doc_enc, enc, NormNone));
549 EXIT_ON_ERR(uiconv.setup(*options, enc, doc_enc, NormNone));
553 ///////////////////////////
560 if (args.size() == 0) {
561 load_all_filters(options);
562 options->write_to_stream(COUT);
564 EXIT_ON_ERR_SET(options->retrieve_any(args[0]), String, value);
565 COUT << value << "\n";
569 ///////////////////////////
576 const DictInfoList * dlist = get_dict_info_list(options);
578 StackPtr<DictInfoEnumeration> dels(dlist->elements());
580 const DictInfo * entry;
582 while ( (entry = dels->next()) != 0)
586 ///////////////////////////
588 // list available (filters/filter modes)
591 void list_available(PosibErr<StringPairEnumeration *> (*fun)(Config *))
593 EXIT_ON_ERR_SET(fun(options), StringPairEnumeration *, els);
595 while (!els->at_end()) {
597 printf("%-14s %s\n", sp.first, gt_(sp.second));
604 load_all_filters(options);
605 list_available(available_filters);
610 list_available(available_filter_modes);
613 ///////////////////////////
618 // precond: strlen(str) > 0
619 char * trim_wspace (char * str)
621 int last = strlen(str) - 1;
622 while (asc_isspace(str[0])) {
626 while (last > 0 && asc_isspace(str[last])) {
629 str[last + 1] = '\0';
633 bool get_word_pair(char * line, char * & w1, char * & w2)
635 w2 = strchr(line, ',');
637 print_error(_("Invalid Input"));
642 w1 = trim_wspace(line);
643 w2 = trim_wspace(w2);
647 void print_elements(const AspellWordList * wl) {
648 AspellStringEnumeration * els = aspell_word_list_elements(wl);
652 while ( (w = aspell_string_enumeration_next(els)) != 0 ) {
657 line.resize(line.size() - 2);
658 COUT.printf("%u: %s\n", count, line.c_str());
663 aspeller::SpellerImpl * real_speller;
666 StatusFunInf(Convert * c) : oconv(c) {}
669 void status_fun(void * d, Token, int correct)
671 StatusFunInf * p = static_cast<StatusFunInf *>(d);
672 if (p->verbose && correct) {
673 const CheckInfo * ci = p->real_speller->check_info();
676 else if (ci->pre_flag || ci->suf_flag)
677 COUT.printf("+ %s\n", p->oconv(ci->word.str()));
683 DocumentChecker * new_checker(AspellSpeller * speller,
684 StatusFunInf & status_fun_inf)
686 EXIT_ON_ERR_SET(new_document_checker(reinterpret_cast<Speller *>(speller)),
687 StackPtr<DocumentChecker>, checker);
688 checker->set_status_fun(status_fun, &status_fun_inf);
689 return checker.release();
692 #define BREAK_ON_SPELLER_ERR\
693 do {if (aspell_speller_error(speller)) {\
694 print_error(aspell_speller_error_message(speller)); break;\
701 bool terse_mode = true;
702 bool do_time = options->retrieve_bool("time");
703 bool suggest = options->retrieve_bool("suggest");
704 bool include_guesses = options->retrieve_bool("guess");
705 clock_t start,finish;
707 if (!options->have("mode") && !options->have("filter")) {
708 PosibErrBase err(options->replace("mode", "nroff"));
710 CERR.printf(_("WARNING: Unable to enter Nroff mode: %s\n"),
711 err.get_err()->mesg);
716 AspellCanHaveError * ret
717 = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
718 if (aspell_error(ret)) {
719 print_error(aspell_error_message(ret));
722 AspellSpeller * speller = to_aspell_speller(ret);
723 aspeller::SpellerImpl * real_speller = reinterpret_cast<aspeller::SpellerImpl *>(speller);
724 Config * config = real_speller->config();
725 Conv iconv(setup_conv(config, &real_speller->lang()));
726 Conv oconv(setup_conv(&real_speller->lang(), config));
728 if (!config->retrieve_bool("byte-offsets"))
729 mb_len.setup(*config, config->retrieve("encoding"));
731 COUT << _("Time to load word list: ")
732 << (clock() - start)/(double)CLOCKS_PER_SEC << "\n";
733 StatusFunInf status_fun_inf(setup_conv(&real_speller->lang(), config));
734 status_fun_inf.real_speller = real_speller;
735 bool & print_star = status_fun_inf.verbose;
737 StackPtr<DocumentChecker> checker(new_checker(speller, status_fun_inf));
753 while (c = getchar(), c != '\n' && c != EOF)
754 buf.push_back(static_cast<char>(c));
755 buf.push_back('\n'); // always add new line so strlen > 0
761 if (c != EOF) continue;
764 word = trim_wspace(line + 1);
765 aspell_speller_add_to_personal(speller, word, -1);
766 BREAK_ON_SPELLER_ERR;
769 word = trim_wspace(line + 1);
770 aspell_speller_add_to_personal
772 real_speller->to_lower(word), -1);
773 BREAK_ON_SPELLER_ERR;
776 word = trim_wspace(line + 1);
777 aspell_speller_add_to_session(speller, word, -1);
778 BREAK_ON_SPELLER_ERR;
781 aspell_speller_save_all_word_lists(speller);
782 BREAK_ON_SPELLER_ERR;
785 word = trim_wspace(line + 1);
786 err = config->replace("mode", word);
788 config->replace("mode", "tex");
789 reload_filters(real_speller);
791 checker = new_checker(speller, status_fun_inf);
794 config->remove("filter");
795 reload_filters(real_speller);
797 checker = new_checker(speller, status_fun_inf);
810 if (line[1] == '$') {
815 if (get_word_pair(line + 4, word, word2))
816 aspell_speller_store_replacement(speller, word, -1, word2, -1);
823 if (get_word_pair(line + 4, word, word2))
824 BREAK_ON_ERR(err = config->replace(word, word2));
825 if (strcmp(word,"suggest") == 0)
826 suggest = config->retrieve_bool("suggest");
827 else if (strcmp(word,"time") == 0)
828 do_time = config->retrieve_bool("time");
829 else if (strcmp(word,"guess") == 0)
830 include_guesses = config->retrieve_bool("guess");
833 word = trim_wspace(line + 4);
834 BREAK_ON_ERR_SET(config->retrieve(word), String, ret);
842 print_elements(aspell_speller_personal_word_list(speller));
845 print_elements(aspell_speller_session_word_list(speller));
850 COUT.printl(config->retrieve("lang"));
855 // continue on (no break)
862 checker->process(line, strlen(line));
863 while (Token token = checker->next_misspelling()) {
864 word = line + token.offset;
865 word[token.len] = '\0';
866 const char * cword = iconv(word);
867 String guesses, guess;
868 const CheckInfo * ci = real_speller->check_info();
869 aspeller::CasePattern casep
870 = real_speller->lang().case_pattern(cword);
873 if (ci->pre_add && ci->pre_add[0])
874 guess.append(ci->pre_add, ci->pre_add_len).append('+');
875 guess.append(ci->word);
876 if (ci->pre_strip_len > 0)
877 guess.append('-').append(ci->word.str(), ci->pre_strip_len);
878 if (ci->suf_strip_len > 0)
879 guess.append('-').append(ci->word.str() + ci->word.size() - ci->suf_strip_len,
881 if (ci->suf_add && ci->suf_add[0])
882 guess.append('+').append(ci->suf_add, ci->suf_add_len);
883 real_speller->lang().fix_case(casep, guess.data(), guess.data());
884 guesses << ", " << oconv(guess.str());
888 const AspellWordList * suggestions = 0;
890 suggestions = aspell_speller_suggest(speller, word, -1);
892 unsigned offset = mb_len(line0, token.offset + ignore);
893 if (suggestions && !aspell_word_list_empty(suggestions))
895 COUT.printf("& %s %u %u:", word,
896 aspell_word_list_size(suggestions), offset);
897 AspellStringEnumeration * els
898 = aspell_word_list_elements(suggestions);
899 if (options->retrieve_bool("reverse")) {
901 sugs.reserve(aspell_word_list_size(suggestions));
902 while ( ( w = aspell_string_enumeration_next(els)) != 0)
904 Vector<String>::reverse_iterator i = sugs.rbegin();
906 COUT.printf(" %s", i->c_str());
908 if (i == sugs.rend()) break;
912 while ( ( w = aspell_string_enumeration_next(els)) != 0) {
913 COUT.printf(" %s%s", w,
914 aspell_string_enumeration_at_end(els) ? "" : ",");
917 delete_aspell_string_enumeration(els);
923 COUT.printf("# %s %u\n", word, offset);
925 COUT.printf("? %s 0 %u: %s\n", word, offset,
926 guesses.c_str() + 2);
929 COUT.printf(_("Suggestion Time: %f\n"),
930 (finish-start)/(double)CLOCKS_PER_SEC);
937 delete_aspell_speller(speller);
940 ///////////////////////////
945 enum UserChoice {None, Ignore, IgnoreAll, Replace, ReplaceAll,
946 Add, AddLower, Exit, Abort};
950 UserChoice reverse[256];
953 char & operator[] (UserChoice c) {return primary[c];}
954 UserChoice & operator[] (char c)
955 {return reverse[static_cast<unsigned char>(c)];}
960 void setup_display_conv();
969 bool changed = false;
971 if (args.size() == 0) {
972 print_error(_("You must specify a file name."));
974 } else if (args.size() > 1) {
975 print_error(_("Only one file name may be specified."));
980 new_name = file_name;
983 in = fopen(file_name.c_str(), "r");
985 print_error(_("Could not open the file \"%s\" for reading"), file_name);
989 if (!options->have("mode"))
990 EXIT_ON_ERR(set_mode_from_extension(options, file_name));
992 String m = options->retrieve("keymapping");
995 else if (m == "ispell")
998 print_error(_("Invalid keymapping: %s"), m);
1002 AspellCanHaveError * ret
1003 = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1004 if (aspell_error(ret)) {
1005 print_error(aspell_error_message(ret));
1012 fstat(fileno(in), &st);
1013 int fd = open(new_name.c_str(), O_WRONLY | O_CREAT | O_TRUNC, st.st_mode);
1014 if (fd >= 0) out = fdopen(fd, "w");
1017 out = fopen(new_name.c_str(), "w");
1020 print_error(_("Could not open the file \"%s\" for writing. File not saved."), file_name);
1024 setup_display_conv();
1026 AspellSpeller * speller = to_aspell_speller(ret);
1028 state = new CheckerString(speller,in,out,64);
1030 word_choices = new Choices;
1032 menu_choices = new Choices;
1033 menu_choices->push_back(Choice(mapping[Ignore], _("Ignore")));
1034 menu_choices->push_back(Choice(mapping[IgnoreAll], _("Ignore all")));
1035 menu_choices->push_back(Choice(mapping[Replace], _("Replace")));
1036 menu_choices->push_back(Choice(mapping[ReplaceAll], _("Replace all")));
1037 menu_choices->push_back(Choice(mapping[Add], _("Add")));
1038 menu_choices->push_back(Choice(mapping[AddLower], _("Add Lower")));
1039 menu_choices->push_back(Choice(mapping[Abort], _("Abort")));
1040 menu_choices->push_back(Choice(mapping[Exit], _("Exit")));
1042 String word0, new_word;
1043 Vector<String> sug_con;
1044 StackPtr<StringMap> replace_list(new_string_map());
1049 while (state->next_misspelling()) {
1051 char * word = state->get_real_word(word0);
1054 // check if it is in the replace list
1057 if ((w = replace_list->lookup(word)) != 0) {
1063 // print the line with the misspelled word highlighted;
1066 display_misspelled_word();
1069 // print the suggestions and menu choices
1072 const AspellWordList * suggestions = aspell_speller_suggest(speller, word, -1);
1073 AspellStringEnumeration * els = aspell_word_list_elements(suggestions);
1075 while (sug_con.size() != 10
1076 && (w = aspell_string_enumeration_next(els)) != 0)
1077 sug_con.push_back(w);
1078 delete_aspell_string_enumeration(els);
1081 unsigned int suggestions_size = sug_con.size();
1082 unsigned int suggestions_mid = suggestions_size / 2;
1083 if (suggestions_size % 2) suggestions_mid++; // if odd
1084 word_choices->resize(0);
1085 for (unsigned int j = 0; j != suggestions_mid; ++j) {
1086 word_choices->push_back(Choice('0' + j+1, sug_con[j]));
1087 if (j + suggestions_mid != suggestions_size)
1089 ->push_back(Choice(j+suggestions_mid+1 == 10
1091 : '0' + j+suggestions_mid+1,
1092 sug_con[j+suggestions_mid]));
1104 // Handle the users choice
1110 if (choice == '0') choice = '9' + 1;
1112 switch (mapping[choice]) {
1116 prompt(_("Are you sure you want to abort (y/n)? "));
1118 /* TRANSLATORS: The user may input any of these characters to say "yes".
1119 MUST ONLY CONSIST OF ASCII CHARACTERS. */
1120 const char * yes_characters = _("Yy");
1121 if (strchr(yes_characters, choice) != 0)
1128 aspell_speller_add_to_session(speller, word, -1);
1131 aspell_speller_add_to_personal(speller, word, -1);
1135 // Emulate the c function add_to_personal, but add extra step to
1136 // convert word to lowercase. Yeah its a bit of a hack.
1137 Speller * sp = reinterpret_cast<Speller *>(speller);
1138 sp->temp_str_0.clear();
1139 sp->to_internal_->convert(word, -1, sp->temp_str_0);
1140 char * lower = sp->to_lower(sp->temp_str_0.mstr());
1141 PosibErr<void> ret = sp->add_to_personal(MutableString(lower));
1142 sp->err_.reset(ret.release_err());
1147 // the string new_word is in the encoding of the document
1148 prompt(_("With: "));
1150 if (new_word.size() == 0)
1152 if (new_word[0] >= '1' && new_word[0] < (char)suggestions_size + '1')
1153 new_word = sug_con[new_word[0]-'1'];
1154 state->replace(new_word);
1156 if (mapping[choice] == ReplaceAll && (strcmp(word,new_word.str()) != 0))
1157 replace_list->replace(word, new_word);
1160 // the replasments are in the encoding of the document
1161 if (choice >= '1' && choice < (char)suggestions_size + '1') {
1162 state->replace(sug_con[choice-'1']);
1165 error(_("Sorry that is an invalid choice!"));
1172 aspell_speller_save_all_word_lists(speller);
1173 state.del(); // to close the file handles
1174 delete_aspell_speller(speller);
1178 bool keep_backup = options->retrieve_bool("backup");
1180 String backup_name = file_name;
1181 backup_name += ".bak";
1182 rename_file(file_name, backup_name);
1184 rename_file(new_name, file_name);
1188 remove_file(new_name);
1198 state.del(); // to close the file handles
1199 delete_aspell_speller(speller);
1201 remove_file(new_name);
1207 #define U (unsigned char)
1209 void Mapping::to_aspell()
1211 memset(this, 0, sizeof(Mapping));
1212 primary[Ignore ] = 'i';
1213 reverse[U'i'] = Ignore;
1214 reverse[U' '] = Ignore;
1215 reverse[U'\n'] = Ignore;
1217 primary[IgnoreAll ] = 'I';
1218 reverse[U'I'] = IgnoreAll;
1220 primary[Replace ] = 'r';
1221 reverse[U'r'] = Replace;
1223 primary[ReplaceAll] = 'R';
1224 reverse[U'R'] = ReplaceAll;
1226 primary[Add ] = 'a';
1227 reverse[U'A'] = Add;
1228 reverse[U'a'] = Add;
1230 primary[AddLower ] = 'l';
1231 reverse[U'L'] = AddLower;
1232 reverse[U'l'] = AddLower;
1234 primary[Abort ] = 'b';
1235 reverse[U'b'] = Abort;
1236 reverse[U'B'] = Abort;
1237 reverse[control('c')] = Abort;
1239 primary[Exit ] = 'x';
1240 reverse[U'x'] = Exit;
1241 reverse[U'X'] = Exit;
1244 void Mapping::to_ispell()
1246 memset(this, 0, sizeof(Mapping));
1247 primary[Ignore ] = ' ';
1248 reverse[U' '] = Ignore;
1249 reverse[U'\n'] = Ignore;
1251 primary[IgnoreAll ] = 'A';
1252 reverse[U'A'] = IgnoreAll;
1253 reverse[U'a'] = IgnoreAll;
1255 primary[Replace ] = 'R';
1256 reverse[U'R'] = ReplaceAll;
1257 reverse[U'r'] = Replace;
1259 primary[ReplaceAll] = 'E';
1260 reverse[U'E'] = ReplaceAll;
1261 reverse[U'e'] = Replace;
1263 primary[Add ] = 'I';
1264 reverse[U'I'] = Add;
1265 reverse[U'i'] = Add;
1267 primary[AddLower ] = 'U';
1268 reverse[U'U'] = AddLower;
1269 reverse[U'u'] = AddLower;
1271 primary[Abort ] = 'Q';
1272 reverse[U'Q'] = Abort;
1273 reverse[U'q'] = Abort;
1274 reverse[control('c')] = Abort;
1276 primary[Exit ] = 'X';
1277 reverse[U'X'] = Exit;
1278 reverse[U'x'] = Exit;
1282 ///////////////////////////
1289 AspellCanHaveError * ret
1290 = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1291 if (aspell_error(ret)) {
1292 print_error(aspell_error_message(ret));
1295 AspellSpeller * speller = to_aspell_speller(ret);
1297 state = new CheckerString(speller,stdin,0,64);
1301 while (state->next_misspelling()) {
1303 state->get_real_word(word);
1308 state.del(); // to close the file handles
1309 delete_aspell_speller(speller);
1312 ///////////////////////////
1321 const char * from = fix_encoding_str(args[0], buf1);
1322 const char * to = fix_encoding_str(args[1], buf2);
1323 Normalize norm = NormNone;
1324 if (strcmp(from, "utf-8") == 0 && strcmp(to, "utf-8") != 0)
1326 else if (strcmp(from, "utf-8") != 0 && strcmp(to, "utf-8") == 0)
1328 if (args.size() > 2) {
1329 for (String::iterator i = args[2].begin(); i != args[2].end(); ++i)
1330 *i = asc_tolower(*i);
1331 options->replace("normalize", "true");
1332 if (args[2] == "none")
1333 options->replace("normalize", "false");
1334 else if (args[2] == "internal")
1335 options->replace("norm-strict", "false");
1336 else if (args[2] == "strict")
1337 options->replace("norm-strict", "true");
1339 EXIT_ON_ERR(options->replace("norm-form", args[2]));
1341 EXIT_ON_ERR(conv.setup(*options, args[0], args[1], norm));
1343 while (CIN.getline(line))
1344 COUT.printl(conv(line));
1349 options->replace("normalize", "true");
1350 const char * from = args.size() < 3 ? "utf-8" : args[0].str();
1351 const char * to = args.size() < 3 ? "utf-8" : args[2].str();
1352 const char * intr = args.size() < 3 ? args[0].str() : args[1].str();
1353 String * form = (args.size() == 2 ? &args[1]
1354 : args.size() == 4 ? &args[3]
1356 Normalize decode_norm = NormTo;
1358 for (String::iterator i = form->begin(); i != form->end(); ++i)
1359 *i = asc_tolower(*i);
1360 if (*form == "internal") {
1361 options->replace("norm-strict", "false");
1362 decode_norm = NormNone;
1363 } else if (*form == "strict") {
1364 options->replace("norm-strict", "true");
1365 decode_norm = NormNone;
1367 if (decode_norm == NormTo) EXIT_ON_ERR(options->replace("norm-form", *form));
1370 EXIT_ON_ERR(encode.setup(*options, from, intr, NormFrom));
1371 EXIT_ON_ERR(decode.setup(*options, intr, to, decode_norm));
1373 while (CIN.getline(line))
1374 COUT.printl(decode(encode(line)));
1377 ///////////////////////////
1384 //assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
1385 //assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
1386 CERR << _("Sorry \"filter\" is currently unimplemented.\n");
1391 ///////////////////////////
1397 COUT.put("@(#) International Ispell Version 3.1.20 "
1398 "(but really Aspell " VERSION ")\n");
1401 ///////////////////////////////////////////////////////////////////////
1403 // These functions use implementation details of the default speller
1407 class IstreamEnumeration : public StringEnumeration {
1411 IstreamEnumeration(FStream & i) : in(&i) {}
1412 IstreamEnumeration * clone() const {
1413 return new IstreamEnumeration(*this);
1415 void assign (const StringEnumeration * other) {
1416 *this = *static_cast<const IstreamEnumeration *>(other);
1419 if (!in->getline(data)) return 0;
1420 else return data.c_str();
1422 bool at_end() const {return *in;}
1425 ///////////////////////////
1432 using namespace aspeller;
1434 bool strict = args.size() != 0 && args[0] == "strict";
1436 Config * config = options;
1438 CachePtr<Language> lang;
1439 find_language(*config);
1440 PosibErr<Language *> res = new_language(*config);
1441 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1442 lang.reset(res.data);
1443 IstreamEnumeration in(CIN);
1444 WordListIterator wl_itr(&in, lang, &CERR);
1445 config->replace("validate-words", "true");
1446 config->replace("validate-affixes", "true");
1448 config->replace("clean-words", "true");
1449 config->replace("clean-affixes", "true");
1450 config->replace("skip-invalid-words", "true");
1451 wl_itr.init(*config);
1453 if (config->have("encoding")) {
1454 EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo));
1455 oconv2.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo);
1457 EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), lang->data_encoding(), NormTo));
1458 oconv2.setup(*config, lang->charmap(), lang->data_encoding(), NormTo);
1460 while (wl_itr.adv()) {
1461 if (*wl_itr->aff.str)
1462 COUT.printf("%s/%s\n", oconv(wl_itr->word), oconv2(wl_itr->aff));
1464 COUT.printl(oconv(wl_itr->word));
1468 ///////////////////////////
1473 void dump (aspeller::Dict * lws, Convert * conv)
1475 using namespace aspeller;
1477 switch (lws->basic_type) {
1478 case Dict::basic_dict:
1480 Dictionary * ws = static_cast<Dictionary *>(lws);
1481 StackPtr<WordEntryEnumeration> els(ws->detailed_elements());
1483 while (wi = els->next(), wi) {
1484 wi->write(COUT,*ws->lang(), conv);
1489 case Dict::multi_dict:
1491 StackPtr<DictsEnumeration> els(lws->dictionaries());
1493 while (ws = els->next(), ws)
1503 using namespace aspeller;
1505 if (args.size() != 0) {
1506 options->replace("master", args[0].c_str());
1509 Config * config = options;
1511 if (action == do_create) {
1513 find_language(*config);
1514 EXIT_ON_ERR(create_default_readonly_dict
1515 (new IstreamEnumeration(CIN),
1518 } else if (action == do_merge) {
1520 print_error(_("Can't merge a master word list yet. Sorry."));
1523 } else if (action == do_dump) {
1525 EXIT_ON_ERR_SET(add_data_set(config->retrieve("master-path"), *config), Dict *, d);
1526 StackPtr<Convert> conv(setup_conv(d->lang(), config));
1531 ///////////////////////////
1537 using namespace aspeller;
1539 if (args.size() != 0) {
1540 EXIT_ON_ERR(options->replace("personal", args[0]));
1542 options->replace("module", "aspeller");
1543 if (action == do_create || action == do_merge) {
1544 CERR << _("Sorry \"create/merge personal\" is currently unimplemented.\n");
1549 StackPtr<Speller> speller(new_speller(options));
1551 if (action == do_create) {
1552 if (file_exists(speller->config()->retrieve("personal-path"))) {
1553 print_error(_("Sorry I won't overwrite \"%s\""),
1554 speller->config()->retrieve("personal-path"));
1557 speller->personal_word_list().data->clear();
1562 speller->add_to_personal(word);
1564 speller->save_all_word_lists();
1567 } else { // action == do_dump
1569 // FIXME: This is currently broken
1571 Config * config = options;
1572 Dictionary * per = new_default_writable_dict();
1573 per->load(config->retrieve("personal-path"), *config);
1574 StackPtr<WordEntryEnumeration> els(per->detailed_elements());
1575 StackPtr<Convert> conv(setup_conv(per->lang(), config));
1578 while (wi = els->next(), wi) {
1579 wi->write(COUT,*(per->lang()), conv);
1586 ///////////////////////////
1592 using namespace aspeller;
1594 if (args.size() != 0) {
1595 options->replace("repl", args[0].c_str());
1598 if (action == do_create || action == do_merge) {
1600 CERR << _("Sorry \"create/merge repl\" is currently unimplemented.\n");
1605 SpellerImpl speller(options);
1607 if (action == do_create) {
1608 if (file_exists(speller->config()->retrieve("repl-path"))) {
1609 print_error(_("Sorry I won't overwrite \"%s\""),
1610 speller->config()->retrieve("repl-path"));
1613 speller->personal_repl().clear();
1620 get_word_pair(word,repl,':');
1621 EXIT_ON_ERR(speller->store_repl(word,repl,false));
1624 } catch (bad_cin) {}
1626 EXIT_ON_ERR(speller->personal_repl().synchronize());
1630 } else if (action == do_dump) {
1632 // FIXME: This is currently broken
1634 ReplacementDict * repl = new_default_replacement_dict();
1635 repl->load(options->retrieve("repl-path"), *options);
1636 StackPtr<WordEntryEnumeration> els(repl->detailed_elements());
1640 Conv conv(setup_conv(repl->lang(), options));
1641 while ((rl = els->next())) {
1642 repl->repl_lookup(*rl, words);
1644 COUT << conv(rl->word) << ": " << conv(words.word) << "\n";
1645 } while (words.adv());
1651 //////////////////////////
1657 using namespace aspeller;
1658 CachePtr<Language> lang;
1659 find_language(*options);
1660 PosibErr<Language *> res = new_language(*options);
1661 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1662 lang.reset(res.data);
1663 Conv iconv(setup_conv(options, lang));
1664 Conv oconv(setup_conv(lang, options));
1668 while (CIN.getline(word)) {
1669 const char * w = iconv(word);
1670 lang->LangImpl::to_soundslike(sl, w);
1671 printf("%s\t%s\n", word.str(), oconv(sl));
1675 //////////////////////////
1682 using namespace aspeller;
1683 CachePtr<Language> lang;
1684 find_language(*options);
1685 PosibErr<Language *> res = new_language(*options);
1686 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1687 lang.reset(res.data);
1688 Conv iconv(setup_conv(options, lang));
1689 Conv oconv(setup_conv(lang, options));
1693 while (CIN.getline(word)) {
1694 lang->munch(iconv(word), &gi);
1696 for (const aspeller::CheckInfo * ci = gi.head; ci; ci = ci->next)
1698 COUT << ' ' << oconv(ci->word) << '/';
1699 if (ci->pre_flag != 0) COUT << oconv(static_cast<char>(ci->pre_flag));
1700 if (ci->suf_flag != 0) COUT << oconv(static_cast<char>(ci->suf_flag));
1706 //////////////////////////
1714 if (args.size() > 0)
1715 level = atoi(args[0].c_str()); //FIXME: More verbose
1716 int limit = INT_MAX;
1717 if (args.size() > 1)
1718 limit = atoi(args[1].c_str());
1720 using namespace aspeller;
1721 CachePtr<Language> lang;
1722 find_language(*options);
1723 PosibErr<Language *> res = new_language(*options);
1724 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1725 lang.reset(res.data);
1726 Conv iconv(setup_conv(options, lang));
1727 Conv oconv(setup_conv(lang, options));
1732 while (CIN.getline(word)) {
1734 char * w = iconv(buf.mstr(), buf.size());
1735 char * af = strchr(w, '/');
1745 exp_list = lang->expand(w, af, exp_buf, limit);
1748 COUT << word << ' ';
1749 WordAff * p = exp_list;
1751 COUT << oconv(p->word);
1752 if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1757 } else if (level >= 3) {
1760 for (WordAff * p = exp_list; p; p = p->next)
1761 ratio += p->word.size;
1762 ratio /= exp_list->word.size; // it is assumed the first
1763 // expansion is just the root
1765 for (WordAff * p = exp_list; p; p = p->next) {
1766 COUT << word << ' ' << oconv(p->word);
1767 if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1768 if (level >= 4) COUT.printf(" %f\n", ratio);
1775 //////////////////////////
1780 static void combine_aff(String & aff, const char * app)
1782 for (; *app; ++app) {
1783 if (!memchr(aff.c_str(),*app,aff.size()))
1784 aff.push_back(*app);
1788 static void print_wordaff(const String & base, const String & affs, Conv & oconv)
1790 if (base.empty()) return;
1791 COUT << oconv(base);
1795 COUT.printf("/%s\n", oconv(affs));
1798 static bool lower_equal(aspeller::Language * l, ParmString a, ParmString b)
1800 if (a.size() != b.size()) return false;
1801 if (l->to_lower(a[0]) != l->to_lower(b[0])) return false;
1802 return memcmp(a + 1, b + 1, a.size() - 1) == 0;
1807 using namespace aspeller;
1808 CachePtr<Language> lang;
1809 find_language(*options);
1810 PosibErr<Language *> res = new_language(*options);
1811 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1812 lang.reset(res.data);
1813 Conv iconv(setup_conv(options, lang));
1814 Conv oconv(setup_conv(lang, options));
1819 while (CIN.getline(word)) {
1822 CharVector buf; buf.append(word.c_str(), word.size() + 1);
1823 char * w = buf.data();
1824 char * af = strchr(w, '/');
1834 if (lower_equal(lang, base, w)) {
1835 if (lang->is_lower(base.str())) {
1836 combine_aff(affs, af);
1839 combine_aff(affs, af);
1842 print_wordaff(base, affs, oconv);
1848 print_wordaff(base, affs, oconv);
1851 //////////////////////////
1856 void munch_list_simple();
1857 void munch_list_complete(bool, bool);
1861 bool simple = false;
1863 bool simplify = true;
1865 for (unsigned i = 0; i < args.size(); ++i) {
1866 if (args[i] == "simple") simple = true;
1867 else if (args[i] == "single") multi = false;
1868 else if (args[i] == "multi") multi = true;
1869 else if (args[i] == "keep") simplify = false;
1872 print_error(_("\"%s\" is not a valid flag for the \"munch-list\" command."),
1878 munch_list_simple();
1880 munch_list_complete(multi, simplify);
1884 // munch list (simple version)
1887 // This version works the same way as the myspell "munch" program.
1888 // However, because the results depends on the hash table used and the
1889 // order of the word list it wonn't produce identical results.
1891 struct SML_WordEntry {
1894 bool keep; // boolean
1895 SML_WordEntry(const char * w = 0) : word(w), aff(0), keep(false) {}
1899 typedef SML_WordEntry Value;
1900 typedef const char * Key;
1901 static const bool is_multi = false;
1902 acommon::hash<const char *> hash;
1903 bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
1904 Key key(const Value & v) {return v.word;}
1907 typedef HashTable<SML_Parms> SML_Table;
1909 static inline void add_affix(SML_Table::iterator b, char aff)
1912 if (p) {while (*p) {if (*p == aff) return; ++p;}}
1914 b->aff = (char *)realloc(b->aff, s + 2);
1915 b->aff[s + 0] = aff;
1916 b->aff[s + 1] = '\0';
1919 void munch_list_simple()
1921 using namespace aspeller;
1922 CachePtr<Language> lang;
1923 find_language(*options);
1924 PosibErr<Language *> res = new_language(*options);
1925 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1926 lang.reset(res.data);
1927 Conv iconv(setup_conv(options, lang));
1928 Conv oconv(setup_conv(lang, options));
1936 // add words to dictionary
1937 while (CIN.getline(word)) {
1939 char * w = iconv(buf.mstr(), buf.size());
1940 char * af = strchr(w, '/');
1950 exp_list = lang->expand(w, af, exp_buf);
1951 for (WordAff * q = exp_list; q; q = q->next) {
1952 table.insert(SML_WordEntry(table_buf.dup(q->word)));
1956 // now try to munch each word in the dictionary
1957 SML_Table::iterator p = table.begin();
1958 SML_Table::iterator end = table.end();
1960 for (; p != end; ++p)
1962 const aspeller::CheckInfo * best = 0;
1963 unsigned min_base_size = INT_MAX;
1964 lang->munch(p->word, &gi);
1965 const aspeller::CheckInfo * ci = gi.head;
1968 // check if the base word is in the dictionary
1969 SML_Table::iterator b = table.find(ci->word);
1970 if (b == table.end()) goto cont;
1972 // check if all the words once expanded are in the dictionary
1973 // this included the exiting flags due to pre-suf cross products
1974 if (b->aff) flags = b->aff;
1976 if (ci->pre_flag != 0) flags += ci->pre_flag;
1977 if (ci->suf_flag != 0) flags += ci->suf_flag;
1979 exp_list = lang->expand(ci->word, flags, exp_buf);
1980 for (WordAff * q = exp_list; q; q = q->next) {
1981 if (!table.have(q->word)) goto cont;
1984 // the base word and flags are valid, now keep the one with the
1985 // smallest base word
1986 if (ci->word.size() < min_base_size) {
1987 min_base_size = ci->word.size();
1994 // now add the base to the keep list if one exists
1995 // otherwise just keep the orignal word
1997 SML_Table::iterator b = table.find(best->word);
1998 assert(b != table.end());
1999 if (best->pre_flag) add_affix(b, best->pre_flag);
2000 if (best->suf_flag) add_affix(b, best->suf_flag);
2007 // Print the entries in the table marked as "to keep"
2009 for (; p != end; ++p)
2012 COUT << oconv(p->word);
2014 COUT << '/' << oconv(p->aff);
2021 for (; p != end; ++p)
2023 if (p->aff) free(p->aff);
2029 // munch list (complete version)
2032 // This version will produce a smaller list than the simple version.
2033 // It is very close to the optimum result.
2037 // Hash table to store the words
2046 CML_Entry(const char * w = 0) : word(w), aff(0), parent(0), next(0), rank(0) {}
2050 typedef CML_Entry Value;
2051 typedef const char * Key;
2052 static const bool is_multi = true;
2053 acommon::hash<const char *> hash;
2054 bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
2055 Key key(const Value & v) {return v.word;}
2058 typedef HashTable<CML_Parms> CML_Table;
2061 // add an affix to a word but keep the prefixes and suffixes separate
2064 static void add_affix(CML_Table::iterator b, char aff, bool prefix)
2070 if (*p == aff) return;
2073 s = (p - b->aff) + 2;
2075 char * tmp = (char *)malloc(s);
2078 if (p) {while (*p != '/') *q++ = *p++;}
2079 if (prefix) *q++ = aff;
2081 if (p) {p++; while (*p != '\0') *q++ = *p++;}
2082 if (!prefix) *q++ = aff;
2084 assert(q - tmp == s);
2085 if (b->aff) free(b->aff);
2090 // Standard disjoint set algo with union by rank and path compression
2093 static void link(CML_Entry * x, CML_Entry * y)
2096 if (x->rank > y->rank) {
2100 if (x->rank == y->rank) y->rank++;
2104 static CML_Entry * find_set (CML_Entry * x)
2107 return x->parent = find_set(x->parent);
2113 // Stuff to manage prefix-suffix combinations
2119 String & get(int i) {return i == 0 ? pre : suf;}
2120 const String & get(int i) const {return i == 0 ? pre : suf;}
2121 PreSuf() : next(0) {}
2128 PreSufList() : head(0) {}
2129 void add(PreSuf * to_add) {
2130 to_add->next = head;
2135 PreSuf * tmp = head;
2140 void transfer(PreSufList & other) {
2151 // Example of usage:
2152 // combine(in, res, 0)
2153 // Pre: in = [(ab, c) (ab, d) (c, de) (c, ef)]
2154 // Post: res = [(ab, cd), (c, def)]
2155 static void combine(const PreSufList & in, PreSufList & res, int which)
2157 const PreSuf * i = in.head;
2159 const String & s = i->get(which);
2160 for (const PreSuf * j = in.head; j != i; j = j->next) {
2161 if (j->get(which) == s) goto cont;
2163 PreSuf * tmp = new PreSuf;
2166 String & b = tmp->get(!which);
2167 for (const PreSuf * j = i->next; j; j = j->next) {
2168 if (j->get(which) != s) continue;
2169 const String & a = j->get(!which);
2170 for (String::const_iterator x = a.begin(); x != a.end(); ++x) {
2171 if (memchr(b.data(), *x, b.size())) continue;
2182 // Stuff used when pruning the list of base words
2187 char * aff; // modifying this will modify the affix entry in the hash table
2188 std::vector<bool> exp;
2189 std::vector<bool> orig_exp;
2192 // static void dump(const Vector<Expansion *> & working,
2193 // const Vector<CML_Table::iterator> & entries)
2195 // for (unsigned i = 0; i != working.size(); ++i) {
2196 // if (!working[i]) continue;
2197 // CERR.printf("%s/%s ", working[i]->word, working[i]->aff);
2198 // for (unsigned j = 0; j != working[i]->exp.size(); ++j) {
2199 // if (working[i]->exp[j])
2200 // CERR.printf("%s ", entries[j]->word);
2207 // standard set algorithms on a bit vector
2209 static bool subset(const std::vector<bool> & smaller,
2210 const std::vector<bool> & larger)
2212 assert(smaller.size() == larger.size());
2213 unsigned s = larger.size();
2214 for (unsigned i = 0; i != s; ++i) {
2215 if (smaller[i] && !larger[i]) return false;
2220 static void merge(std::vector<bool> & x, const std::vector<bool> & y)
2222 assert(x.size() == y.size());
2223 unsigned s = x.size();
2224 for (unsigned i = 0; i != s; ++i) {
2225 if (y[i]) x[i] = true;
2229 static void purge(std::vector<bool> & x, const std::vector<bool> & y)
2231 assert(x.size() == y.size());
2232 unsigned s = x.size();
2233 for (unsigned i = 0; i != s; ++i) {
2234 if (y[i]) x[i] = false;
2238 static inline unsigned count(const std::vector<bool> & x) {
2240 for (unsigned i = 0; i != x.size(); ++i) {
2249 bool operator() (Expansion * x, Expansion * y) {
2251 // LARGEST number of expansions
2252 unsigned x_s = count(x->exp);
2253 unsigned y_s = count(y->exp);
2254 if (x_s != y_s) return x_s > y_s;
2256 // SMALLEST base word
2257 x_s = strlen(x->word);
2258 y_s = strlen(y->word);
2259 if (x_s != y_s) return x_s < y_s;
2261 // LARGEST affix string
2262 x_s = strlen(x->aff);
2263 y_s = strlen(y->aff);
2264 if (x_s != y_s) return x_s > y_s;
2267 int cmp = strcmp(x->word, y->word);
2268 if (cmp != 0) return cmp < 0;
2271 cmp = strcmp(x->aff, y->aff);
2277 // Finally the function that does the real work
2280 void munch_list_complete(bool multi, bool simplify)
2282 using namespace aspeller;
2283 CachePtr<Language> lang;
2284 find_language(*options);
2285 PosibErr<Language *> res = new_language(*options);
2286 if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
2287 lang.reset(res.data);
2288 Conv iconv(setup_conv(options, lang));
2289 Conv oconv(setup_conv(lang, options));
2297 // add words to dictionary
2298 while (CIN.getline(word)) {
2300 char * w = iconv(buf.mstr(), buf.size());
2301 char * af = strchr(w, '/');
2311 exp_list = lang->expand(w, af, exp_buf);
2312 for (WordAff * q = exp_list; q; q = q->next) {
2313 if (!table.have(q->word)) // since it is a multi hash table
2314 table.insert(CML_Entry(table_buf.dup(q->word))).first;
2318 // Now try to munch each word in the dictionary. This will also
2319 // group the base words into disjoint sets based on there expansion.
2320 CML_Table::iterator p = table.begin();
2321 CML_Table::iterator end = table.end();
2323 for (; p != end; ++p)
2325 lang->munch(p->word, &gi, false);
2326 const aspeller::CheckInfo * ci = gi.head;
2329 // check if the base word is in the dictionary
2330 CML_Table::iterator b = table.find(ci->word);
2331 if (b == table.end()) goto cont;
2333 // check if all the words once expanded are in the dictionary
2335 assert(!(ci->pre_flag && ci->suf_flag));
2336 if (ci->pre_flag != 0) flags[0] = ci->pre_flag;
2337 else if (ci->suf_flag != 0) flags[0] = ci->suf_flag;
2340 exp_list = lang->expand(ci->word, flags, exp_buf);
2341 for (WordAff * q = exp_list; q; q = q->next) {
2342 if (!table.have(q->word)) goto cont;
2345 // all the expansions are in the dictionary now add the affix to
2346 // the base word and figure out which disjoint set it belongs to
2347 add_affix(b, flags[0], ci->pre_flag != 0);
2348 CML_Entry * bs = find_set(&*b);
2349 for (WordAff * q = exp_list; q; q = q->next) {
2350 CML_Table::iterator w = table.find(q->word);
2351 assert(b != table.end());
2352 CML_Entry * ws = find_set(&*w);
2361 // If a base word has both prefixes and suffixes try to combine them.
2362 // This can lead to multiple entries for the same base word. If "multi"
2363 // is true than include all the entries. Otherwise, only include the
2364 // one with the largest number of expansions. This is a greedy choice
2365 // that may not be optimal, but is close to it.
2368 CML_Entry * extras = 0;
2369 for (; p != end; ++p)
2371 pre.clear(); suf.clear();
2372 if (!p->aff) continue;
2374 while (*s != '/') pre += *s++;
2376 while (*s != '\0') suf += *s++;
2379 strcpy(p->aff, suf.str());
2381 } else if (suf.empty()) {
2383 strcpy(p->aff, pre.str());
2387 // Try all possible combinations and keep the ones which expand
2390 PreSufList cross,tmp1,tmp2;
2393 for (String::iterator pi = pre.begin(); pi != pre.end(); ++pi) {
2394 String::iterator si = suf.begin();
2395 while (si != suf.end()) { {
2396 char flags[3] = {*pi, *si, '\0'};
2398 exp_list = lang->expand(p->word, flags, exp_buf);
2399 for (WordAff * q = exp_list; q; q = q->next) {
2400 if (!table.have(q->word)) goto cont2;
2411 // Now combine the legal cross pairs with other ones when
2414 // final res = [ (pre, []) ([],suf),
2415 // (cross | combine first | combine second)
2416 // (cross | combine second | combine first)
2420 // combine first [(ab, c) (ab, d) (c, de) (c, ef)]
2421 // = [(ab, cd), (c, def)]
2423 combine(cross, tmp1, 0);
2424 combine(tmp1, tmp2, 1);
2427 combine(cross, tmp1, 1);
2428 combine(tmp1, tmp2, 0);
2440 combine(tmp2, tmp1, 0);
2441 combine(tmp1, cross, 1);
2445 // It is OK to have multiple entries with the same base word
2450 memcpy(p->aff, ps->pre.data(), ps->pre.size());
2451 memcpy(p->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2454 CML_Entry * bs = find_set(&*p);
2455 for (; ps; ps = ps->next) {
2457 CML_Entry * tmp = new CML_Entry;
2458 tmp->word = p->word;
2459 tmp->aff = (char *)malloc(ps->pre.size() + ps->suf.size() + 1);
2460 memcpy(tmp->aff, ps->pre.data(), ps->pre.size());
2461 memcpy(tmp->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2471 // chose the one which has the largest number of expansions
2477 for (ps = cross.head; ps; ps = ps->next) {
2481 exp_list = lang->expand(p->word, flags, exp_buf);
2483 for (WordAff * q = exp_list; q; q = q->next) ++c;
2484 if (c > max_exp) {max_exp = c; best = ps;}
2487 memcpy(p->aff, best->pre.data(), best->pre.size());
2488 memcpy(p->aff + best->pre.size(), best->suf.str(), best->suf.size() + 1);
2494 CML_Entry * tmp = extras;
2495 extras = extras->next;
2501 // Create a linked list for each disjoint set
2503 for (; p != end; ++p)
2506 CML_Entry * bs = find_set(&*p);
2513 // Now process each disjoint set independently
2515 for (; p != end; ++p)
2517 if (p->parent) continue;
2519 Vector<CML_Table::iterator> entries;
2520 Vector<Expansion> expansions;
2521 Vector<Expansion *> to_keep;
2522 std::vector<bool> to_keep_exp;
2523 Vector<Expansion *> working;
2524 Vector<unsigned> to_remove;
2526 // First assign numbers to each unique word. The rank field is
2527 // no longer used so use it to store the number.
2528 for (CML_Entry * q = &*p; q; q = q->next) {
2529 CML_Table::iterator e = table.find(q->word);
2530 if (e->rank == -1) {
2531 e->rank = entries.size();
2532 q->rank = entries.size();
2533 entries.push_back(e);
2541 expansions.push_back(tmp);
2545 to_keep_exp.resize(entries.size());
2546 //for (int i = 0; i != to_keep_exp.size(); ++i) {
2547 // printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2550 // Store the expansion of each base word in a bit vector and
2551 // add it to the working set
2552 for (Vector<Expansion>::iterator q = expansions.begin();
2553 q != expansions.end();
2556 q->exp.resize(entries.size());
2558 exp_list = lang->expand(q->word, q->aff, exp_buf);
2559 for (WordAff * i = exp_list; i; i = i->next) {
2560 CML_Table::iterator e = table.find(i->word);
2561 assert(0 <= e->rank && e->rank < (int)entries.size());
2562 q->exp[e->rank] = true;
2564 q->orig_exp = q->exp;
2565 working.push_back(&*q);
2568 unsigned prev_working_size = INT_MAX;
2570 // This loop will repeat until the working set is empty. This
2571 // will produce optimum results in most cases. Non optimum
2572 // results may be possible if step (4) is necessary, but in
2573 // practice this step is rarly necessary.
2575 prev_working_size = working.size();
2577 // Sort the list based on WorkingLt. This is necessary every
2578 // time since the expansion list can change.
2579 std::sort(working.begin(), working.end(), WorkingLt());
2581 // (1) Eliminate any elements which are a subset of others
2582 for (unsigned i = 0; i != working.size(); ++i) {
2583 if (!working[i]) continue;
2584 for (unsigned j = i + 1; j != working.size(); ++j) {
2585 if (!working[j]) continue;
2586 if (subset(working[j]->exp, working[i]->exp)) {
2592 // (2) Move any elements which expand to unique entree
2593 // into the to_keep list
2595 for (unsigned i = 0; i != entries.size(); ++i) {
2597 for (unsigned j = 0; j != working.size(); ++j) {
2598 if (working[j] && working[j]->exp[i]) {
2603 if (n >= 0) to_remove.push_back(n);
2605 for (unsigned i = 0; i != to_remove.size(); ++i) {
2606 unsigned n = to_remove[i];
2607 if (!working[n]) continue;
2608 to_keep.push_back(working[n]);
2609 merge(to_keep_exp, working[n]->exp);
2613 // (3) Eliminate any elements which are a subset of all the
2614 // elements in the to_keep list
2615 for (unsigned i = 0; i != working.size(); ++i) {
2616 if (working[i] && subset(working[i]->exp, to_keep_exp)) {
2621 // Compact the working list
2624 while (j != (int)working.size()) {
2626 working[i] = working[j];
2634 // (4) If none of the entries in working have been removed via
2635 // the above methods then make a greedy choice and move the
2636 // first element into the to_keep list.
2637 if (working.size() > 0 && working.size() == prev_working_size)
2639 to_keep.push_back(working[0]);
2640 //CERR.printf("Making greedy choice! Chosing %s/%s.\n",
2641 // working[0]->word, working[0]->aff);
2642 merge(to_keep_exp, working[0]->exp);
2643 working.erase(working.begin(), working.begin() + 1);
2646 // (5) Trim the expansion list for any elements left in the
2647 // working set by removing the expansions that already exist in
2649 for (unsigned i = 0; i != working.size(); ++i) {
2650 purge(working[i]->exp, to_keep_exp);
2653 } while (working.size() > 0);
2657 // Remove unnecessary flags. A flag is unnecessary if it does
2658 // does not expand to any new words, that is words that are not
2659 // already covered by an earlier entries in the list.
2661 for (unsigned i = 0; i != to_keep.size(); ++i) {
2662 to_keep[i]->exp = to_keep[i]->orig_exp;
2665 std::sort(to_keep.begin(), to_keep.end(), WorkingLt());
2667 std::vector<bool> tally(entries.size());
2668 std::vector<bool> backup(entries.size());
2669 std::vector<bool> working(entries.size());
2672 for (unsigned i = 0; i != to_keep.size(); ++i) {
2676 merge(tally, to_keep[i]->exp);
2678 String flags_to_keep = to_keep[i]->aff;
2679 bool something_changed;
2681 something_changed = false;
2682 for (unsigned j = 0; j != flags_to_keep.size(); ++j) {
2683 flags.assign(flags_to_keep.data(), j);
2684 flags.append(flags_to_keep.data(j+1),
2685 flags_to_keep.size() - (j+1));
2688 exp_list = lang->expand(to_keep[i]->word, flags, exp_buf);
2689 for (WordAff * q = exp_list; q; q = q->next) {
2690 CML_Table::iterator e = table.find(q->word);
2691 working[e->rank] = true;
2693 if (working == tally) {
2694 flags_to_keep = flags;
2695 something_changed = true;
2699 } while (something_changed);
2701 if (flags_to_keep != to_keep[i]->aff) {
2702 memcpy(to_keep[i]->aff, flags_to_keep.str(), flags_to_keep.size() + 1);
2708 // Finally print the resulting list
2710 //printf("XXX %d %d\n", to_keep.size(), to_keep_exp.size());
2711 //for (int i = 0; i != to_keep_exp.size(); ++i) {
2712 // printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2715 for (unsigned i = 0; i != to_keep.size(); ++i) {
2716 COUT << oconv(to_keep[i]->word);
2717 if (to_keep[i]->aff[0]) {
2719 COUT << oconv(to_keep[i]->aff);
2723 for (unsigned i = 0; i != to_keep_exp.size(); ++i) {
2724 if (!to_keep_exp[i]) {
2725 assert(!entries[i]->aff);
2726 COUT.printf("%s\n", oconv(entries[i]->word));
2732 for (; p != end; ++p)
2734 if (p->aff) free(p->aff);
2740 //////////////////////////
2748 EXIT_ON_ERR(aspeller::open_affix_file(*options, in));
2751 while (in.getline(line))
2752 COUT << line << '\n';
2757 ///////////////////////////////////////////////////////////////////////
2760 ///////////////////////////
2765 void print_help_line(char abrv, char dont_abrv, const char * name,
2766 KeyInfoType type, const char * desc, bool no_dont = false)
2772 if (dont_abrv != '\0') {
2775 command += dont_abrv;
2780 if (type == KeyInfoBool && !no_dont) command += "[dont-]";
2781 if (type == KeyInfoList) command += "add|rem-";
2783 if (type == KeyInfoString || type == KeyInfoList)
2784 command += "=<str>";
2785 if (type == KeyInfoInt)
2786 command += "=<int>";
2787 const char * tdesc = _(desc);
2788 printf(" %-27s %s\n", command.c_str(), tdesc); // FIXME: consider word wrapping
2792 PosibErr<ConfigModule *> get_dynamic_filter(Config * config, ParmStr value);
2795 static const char * usage_text[] =
2797 /* TRANSLATORS: These should all be formated to fit in 80 column or
2799 N_("Usage: aspell [options] <command>"),
2800 N_("<command> is one of:"),
2801 N_(" -?|usage display a brief usage message"),
2802 N_(" help display a detailed help message"),
2803 N_(" -c|check <file> to check a file"),
2804 N_(" -a|pipe \"ispell -a\" compatibility mode"),
2805 N_(" [dump] config dumps the current configuration to stdout"),
2806 N_(" config <key> prints the current value of an option"),
2807 N_(" [dump] dicts | filters | modes"),
2808 N_(" lists available dictionaries / filters / filter modes"),
2809 N_("[options] is any of the following:")
2811 static const unsigned usage_text_size = sizeof(usage_text)/sizeof(const char *);
2813 static const char * help_text[] =
2822 N_(" list produce a list of misspelled words from standard input"),
2825 N_(" soundslike returns the sounds like equivalent for each word entered"),
2826 N_(" munch generate possible root words and affixes"),
2827 N_(" expand [1-4] expands affix flags"),
2828 N_(" clean [strict] cleans a word list so that every line is a valid word"),
2829 //N_(" filter passes standard input through filters"),
2830 N_(" -v|version prints a version line"),
2831 N_(" munch-list [simple] [single|multi] [keep]"),
2832 N_(" reduce the size of a word list via affix compression"),
2833 N_(" conv <from> <to> [<norm-form>]"),
2834 N_(" converts from one encoding to another"),
2835 N_(" norm (<norm-map> | <from> <norm-map> <to>) [<norm-form>]"),
2836 N_(" perform Unicode normalization"),
2839 N_(" dump|create|merge master|personal|repl [<name>]"),
2840 N_(" dumps, creates or merges a master, personal, or replacement dictionary."),
2842 /* TRANSLATORS: "none", "internal" and "strict" are literal values
2843 and should not be translated. */
2844 N_(" <norm-form> normalization form to use, either none, internal, or strict"),
2849 static const unsigned help_text_size = sizeof(help_text)/sizeof(const char *);
2851 void print_help (bool verbose) {
2852 load_all_filters(options);
2855 "Aspell %s. Copyright 2000-2011 by Kevin Atkinson.\n"
2857 for (unsigned i = 0; i < help_text_size; ++i)
2858 puts(gt_(help_text[i]));
2860 for (unsigned i = 0; i < usage_text_size; ++i)
2861 puts(gt_(usage_text[i]));
2863 StackPtr<KeyInfoEnumeration> els(options->possible_elements(true,false));
2865 while (k = els->next(), k) {
2866 if (k->desc == 0 || k->flags & KEYINFO_HIDDEN) continue;
2867 if (!verbose && !(k->flags & KEYINFO_COMMON)) continue;
2868 const PossibleOption * o = find_option(k->name);
2869 const char * name = k->name;
2870 print_help_line(o->abrv,
2871 strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2872 name, k->type, k->desc);
2873 if (verbose && strcmp(name, "mode") == 0) {
2874 for (const ModeAbrv * j = mode_abrvs;
2875 j != mode_abrvs_end;
2878 print_help_line(j->abrv, '\0', j->mode, KeyInfoBool, j->desc, true);
2888 _("Available Dictionaries:\n"
2889 " Dictionaries can be selected directly via the \"-d\" or \"master\"\n"
2890 " option. They can also be selected indirectly via the \"lang\",\n"
2891 " \"variety\", and \"size\" options.\n"));
2893 const DictInfoList * dlist = get_dict_info_list(options);
2895 StackPtr<DictInfoEnumeration> dels(dlist->elements());
2897 const DictInfo * entry;
2899 while ( (entry = dels->next()) != 0)
2901 printf(" %s\n", entry->name);
2909 _("Available Filters (and associated options):\n"
2910 " Filters can be added or removed via the \"filter\" option.\n"),
2912 for (Vector<ConfigModule>::const_iterator m = options->filter_modules.begin();
2913 m != options->filter_modules.end();
2916 printf(_("\n %s filter: %s\n"), m->name, gt_(m->desc));
2917 for (k = m->begin; k != m->end; ++k) {
2918 const PossibleOption * o = find_option(k->name);
2919 const char * name = k->name;
2920 const KeyInfo * ok = options->keyinfo(name + 2);
2921 if (k == ok) name += 2;
2922 print_help_line(o->abrv,
2923 strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2924 name, k->type, k->desc);
2932 /* TRANSLATORS: This should be formated to fit in 80 column or less */
2933 _("Available Filter Modes:\n"
2934 " Filter Modes are reconfigured combinations of filters optimized for\n"
2935 " files of a specific type. A mode is selected via the \"mode\" option.\n"
2936 " This will happen implicitly if Aspell is able to identify the file\n"
2937 " type from the extension, and possibility the contents, of the file.\n"));
2939 EXIT_ON_ERR_SET(available_filter_modes(options), StringPairEnumeration *, els);
2941 while (!els->at_end()) {
2943 printf(" %-14s %s\n", sp.first, gt_(sp.second));