prog/aspell.cpp

   1 // This file is part of The New Aspell Copyright (C)
   2 // 2002,2003,2004,2011 by Kevin Atkinson under the GNU LGPL license
   3 // version 2.0 or 2.1.  You should have received a copy of the LGPL
   4 // license along with this library if you did not you can find it at
   5 // http://www.gnu.org/.
   6
   7 //
   8 // NOTE: This program currently uses a very ugly mix of the internal
   9 //       API and the external C interface.  The eventual goal is to
  10 //       use only the external C++ interface, however, the external
  11 //       C++ interface is currently incomplete.  The C interface is
  12 //       used in some places because without the strings will not get
  13 //       converted properly when the encoding is not the same as the
  14 //       internal encoding used by Aspell.
  15 //
  16
  17 #include <ctype.h>
  18 #include "settings.h"
  19
  20 #ifdef USE_LOCALE
  21 # include <locale.h>
  22 #endif
  23
  24 #ifdef HAVE_LANGINFO_CODESET
  25 # include <langinfo.h>
  26 #endif
  27
  28 #include "aspell.h"
  29
  30 #ifdef USE_FILE_INO
  31 # include <sys/types.h>
  32 # include <sys/stat.h>
  33 # include <unistd.h>
  34 # include <fcntl.h>
  35 #endif
  36
  37 #include "asc_ctype.hpp"
  38 #include "check_funs.hpp"
  39 #include "config.hpp"
  40 #include "convert.hpp"
  41 #include "document_checker.hpp"
  42 #include "enumeration.hpp"
  43 #include "errors.hpp"
  44 #include "file_util.hpp"
  45 #include "fstream.hpp"
  46 #include "info.hpp"
  47 #include "iostream.hpp"
  48 #include "posib_err.hpp"
  49 #include "speller.hpp"
  50 #include "stack_ptr.hpp"
  51 #include "string_enumeration.hpp"
  52 #include "string_map.hpp"
  53 #include "word_list.hpp"
  54
  55 #include "string_list.hpp"
  56 #include "speller_impl.hpp"
  57 #include "data.hpp"
  58
  59 #include "hash-t.hpp"
  60 #include "hash_fun.hpp"
  61
  62 #include "gettext.h"
  63
  64 using namespace acommon;
  65
  66 using aspeller::Conv;
  67
  68 // action functions declarations
  69
  70 void print_ver();
  71 void print_help(bool verbose = false);
  72 void config();
  73
  74 void check();
  75 void pipe();
  76 void convt();
  77 void normlz();
  78 void filter();
  79 void list();
  80 void dicts();
  81 void modes();
  82 void filters();
  83
  84 void clean();
  85 void master();
  86 void personal();
  87 void repl();
  88 void soundslike();
  89 void munch();
  90 void expand();
  91 void combine();
  92 void munch_list();
  93 void dump_affix();
  94
  95 void print_error(ParmString msg)
  96 {
  97   CERR.printf(_("Error: %s\n"), msg.str());
  98 }
  99
 100 void print_error(ParmString msg, ParmString str)
 101 {
 102   CERR.put(_("Error: "));
 103   CERR.printf(msg.str(), str.str());
 104   CERR.put('\n');
 105 }
 106
 107 #define EXIT_ON_ERR(command) \
 108   do{PosibErrBase pe(command);\
 109   if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
 110   } while(false)
 111 #define EXIT_ON_ERR_SET(command, type, var)\
 112   type var;\
 113   do{PosibErr< type > pe(command);\
 114   if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
 115   else {var=pe.data;}\
 116   } while(false)
 117 #define BREAK_ON_ERR(command) \
 118   do{PosibErrBase pe(command);\
 119   if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
 120   } while(false)
 121 #define BREAK_ON_ERR_SET(command, type, var)\
 122   type var;\
 123   do{PosibErr< type > pe(command);\
 124   if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
 125   else {var=pe.data;}\
 126   } while(false)
 127
 128
 129 /////////////////////////////////////////////////////////
 130 //
 131 // Command line options functions and classes
 132 // (including main)
 133 //
 134
 135 typedef Vector<String> Args;
 136 typedef Config         Options;
 137 enum Action {do_create, do_merge, do_dump, do_test, do_other};
 138
 139 Args              args;
 140 StackPtr<Options> options;
 141 Action            action  = do_other;
 142
 143 struct PossibleOption {
 144   const char * name;
 145   char         abrv;
 146   int          num_arg;
 147   bool         is_command;
 148 };
 149
 150 #define OPTION(name,abrv,num)         {name,abrv,num,false}
 151 #define COMMAND(name,abrv,num)        {name,abrv,num,true}
 152 #define ISPELL_COMP(abrv,num)         {"",abrv,num,false}
 153
 154 const PossibleOption possible_options[] = {
 155   OPTION("master",           'd', 1),
 156   OPTION("personal",         'p', 1),
 157   OPTION("ignore",           'W', 1),
 158   OPTION("lang",             'l', 1),
 159   OPTION("backup",           'b', 0),
 160   OPTION("dont-backup",      'x', 0),
 161   OPTION("run-together",     'C', 0),
 162   OPTION("dont-run-together",'B', 0),
 163   OPTION("guess",            'm', 0),
 164   OPTION("dont-guess",       'P', 0),
 165
 166   COMMAND("usage",     '?',  0),
 167   COMMAND("help",      '\0', 0),
 168   COMMAND("version",   'v',  0),
 169   COMMAND("config",    '\0', 0),
 170   COMMAND("dicts",     '\0', 0),
 171   COMMAND("check",     'c',  0),
 172   COMMAND("pipe",      'a',  0),
 173   COMMAND("list",      '\0', 0),
 174   COMMAND("conv",      '\0', 2),
 175   COMMAND("norm",      '\0', 1),
 176   COMMAND("filter",    '\0', 0),
 177   COMMAND("soundslike",'\0', 0),
 178   COMMAND("munch",     '\0', 0),
 179   COMMAND("expand",    '\0', 0),
 180   COMMAND("combine",   '\0', 0),
 181   COMMAND("munch-list",'\0', 0),
 182   COMMAND("clean",     '\0', 0),
 183   COMMAND("filters",   '\0', 0),
 184   COMMAND("modes",     '\0', 0),
 185
 186   COMMAND("dump",   '\0', 1),
 187   COMMAND("create", '\0', 1),
 188   COMMAND("merge",  '\0', 1),
 189
 190   ISPELL_COMP('S',0), ISPELL_COMP('w',1), ISPELL_COMP('T',1),
 191
 192   {"",'\0'}, {"",'\0'}
 193 };
 194
 195 const PossibleOption * possible_options_end = possible_options + sizeof(possible_options)/sizeof(PossibleOption) - 2;
 196
 197 struct ModeAbrv {
 198   char abrv;
 199   const char * mode;
 200   const char * desc;
 201 };
 202 static const ModeAbrv mode_abrvs[] = {
 203   {'e', "mode=email", N_("enter Email mode.")},
 204   {'H', "mode=html",  N_("enter HTML mode.")},
 205   {'t', "mode=tex",   N_("enter TeX mode.")},
 206   {'n', "mode=nroff", N_("enter Nroff mode.")}
 207 };
 208
 209 static const ModeAbrv *  mode_abrvs_end = mode_abrvs + 4;
 210
 211 const PossibleOption * find_option(char c) {
 212   const PossibleOption * i = possible_options;
 213   while (i != possible_options_end && i->abrv != c)
 214     ++i;
 215   return i;
 216 }
 217
 218 static inline bool str_equal(const char * begin, const char * end,
 219                              const char * other)
 220 {
 221   while(begin != end && *begin == *other)
 222     ++begin, ++other;
 223   return (begin == end && *other == '\0');
 224 }
 225
 226 static const PossibleOption * find_option(const char * begin, const char * end) {
 227   const PossibleOption * i = possible_options;
 228   while (i != possible_options_end
 229          && !str_equal(begin, end, i->name))
 230     ++i;
 231   return i;
 232 }
 233
 234 static const PossibleOption * find_option(const char * str) {
 235   const PossibleOption * i = possible_options;
 236   while (i != possible_options_end
 237          && !strcmp(str, i->name) == 0)
 238     ++i;
 239   return i;
 240 }
 241
 242 static void line_buffer() {
 243 #ifndef WIN32
 244   // set up stdin and stdout to be line buffered
 245   assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
 246   assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
 247 #endif
 248 }
 249
 250 Conv dconv;
 251 Conv uiconv;
 252
 253 int main (int argc, const char *argv[])
 254 {
 255   options = new_config(); // this needs to be here becuase of a bug
 256                           // with static initlizers on Darwin.
 257 #ifdef USE_LOCALE
 258   setlocale (LC_ALL, "");
 259 #endif
 260   aspell_gettext_init();
 261
 262   options->set_committed_state(false);
 263
 264   if (argc == 1) {print_help(); return 0;}
 265
 266   int i = 1;
 267   const PossibleOption * o;
 268   const char           * parm;
 269
 270   //
 271   // process command line options by setting the appropriate options
 272   // in "options" and/or pushing non-options onto "argv"
 273   //
 274   PossibleOption other_opt = OPTION("",'\0',0);
 275   String option_name;
 276   while (i != argc) {
 277     if (argv[i][0] == '-') {
 278       bool have_parm = false;
 279       if (argv[i][1] == '-') {
 280         // a long arg
 281         const char * c = argv[i] + 2;
 282         while(*c != '=' && *c != '\0') ++c;
 283         o = find_option(argv[i] + 2, c);
 284         if (o == possible_options_end) {
 285           option_name.assign(argv[i] + 2, c - argv[i] - 2);
 286           other_opt.name    = option_name.c_str();
 287           other_opt.num_arg = -1;
 288           o = &other_opt;
 289         }
 290         if (*c == '=') {have_parm = true; ++c;}
 291         parm = c;
 292       } else {
 293         // a short arg
 294         const ModeAbrv * j = mode_abrvs;
 295         while (j != mode_abrvs_end && j->abrv != argv[i][1]) ++j;
 296         if (j == mode_abrvs_end) {
 297           o = find_option(argv[i][1]);
 298           if (argv[i][1] == 'v' && argv[i][2] == 'v')
 299             // Hack for -vv
 300             parm = argv[i] + 3;
 301           else
 302             parm = argv[i] + 2;
 303         } else { // mode option
 304           other_opt.name = "mode";
 305           other_opt.num_arg = 1;
 306           o = &other_opt;
 307           parm = j->mode + 5;
 308         }
 309         if (*parm) have_parm = true;
 310       }
 311       if (o == possible_options_end) {
 312         print_error(_("Invalid Option: %s"), argv[i]);
 313         return 1;
 314       }
 315       int num_parms;
 316       if (o->num_arg == 0) {
 317         num_parms = 0;
 318         if (parm[0] != '\0') {
 319           print_error(_(" does not take any parameters."),
 320                       String(argv[i], parm - argv[i]));
 321           return 1;
 322         }
 323         i += 1;
 324       } else if (have_parm) {
 325         num_parms = 1;
 326         i += 1;
 327       } else if (i + 1 == argc || argv[i+1][0] == '-') {
 328         if (o->num_arg == -1) {
 329           num_parms = 0;
 330           i += 1;
 331         } else {
 332           print_error(_("You must specify a parameter for \"%s\"."), argv[i]);
 333           return 1;
 334         }
 335       } else {
 336         num_parms = o->num_arg;
 337         parm = argv[i + 1];
 338         i += 2;
 339       }
 340       if (o->is_command) {
 341         args.push_back(o->name);
 342         if (o->num_arg == 1)
 343           args.push_back(parm);
 344       } else if (o->name[0] != '\0') {
 345         Config::Entry * entry = new Config::Entry;
 346         entry->key = o->name;
 347         entry->value = parm;
 348         entry->need_conv = true;
 349         if (num_parms == -1) {
 350           entry->place_holder = args.size();
 351           args.push_back(parm);
 352         }
 353         options->set(entry);
 354       }
 355     } else {
 356       args.push_back(argv[i]);
 357       i += 1;
 358     }
 359   }
 360
 361   options->read_in_settings();
 362
 363   const char * codeset = 0;
 364 #ifdef HAVE_LANGINFO_CODESET
 365   codeset = nl_langinfo(CODESET);
 366   if (ascii_encoding(*options, codeset)) codeset = 0;
 367 #endif
 368
 369 // #ifdef USE_LOCALE
 370 //   if (!options->have("encoding") && codeset)
 371 //     EXIT_ON_ERR(options->replace("encoding", codeset));
 372 // #endif
 373
 374   Vector<int> to_remove;
 375   EXIT_ON_ERR(options->commit_all(&to_remove, codeset));
 376   for (int i = to_remove.size() - 1; i >= 0; --i) {
 377     args.erase(args.begin() + to_remove[i]);
 378   }
 379
 380   if (args.empty()) {
 381     print_error(_("You must specify an action"));
 382     return 1;
 383   }
 384
 385   String action_str = args.front();
 386   args.pop_front();
 387   const PossibleOption * action_opt = find_option(action_str.str());
 388   if (!action_opt->is_command) {
 389     print_error(_("Unknown Action: %s"),  action_str);
 390     return 1;
 391   } else if (action_opt->num_arg == 1 && args.empty()) {
 392     print_error(_("You must specify a parameter for \"%s\"."), action_str);
 393     return 1;
 394   } else if (action_opt->num_arg > (int)args.size()) {
 395     CERR.printf(_("Error: You must specify at least %d parameters for \"%s\".\n"),
 396                 action_opt->num_arg, action_str.str());
 397     return 1;
 398   }
 399
 400   //
 401   // perform the requested action
 402   //
 403   if (action_str == "usage")
 404     print_help();
 405   else if (action_str == "help")
 406     print_help(true);
 407   else if (action_str == "version")
 408     print_ver();
 409   else if (action_str == "config")
 410     config();
 411   else if (action_str == "dicts")
 412     dicts();
 413   else if (action_str == "check")
 414     check();
 415   else if (action_str == "pipe")
 416     pipe();
 417   else if (action_str == "list")
 418     list();
 419   else if (action_str == "conv")
 420     convt();
 421   else if (action_str == "norm")
 422     normlz();
 423   else if (action_str == "filter")
 424     filter();
 425   else if (action_str == "soundslike")
 426     soundslike();
 427   else if (action_str == "munch")
 428     munch();
 429   else if (action_str == "expand")
 430     expand();
 431   else if (action_str == "combine")
 432     combine();
 433   else if (action_str == "munch-list")
 434     munch_list();
 435   else if (action_str == "clean")
 436     clean();
 437   else if (action_str == "filters")
 438     filters();
 439   else if (action_str == "modes")
 440     modes();
 441   else if (action_str == "dump")
 442     action = do_dump;
 443   else if (action_str == "create")
 444     action = do_create;
 445   else if (action_str == "merge")
 446     action = do_merge;
 447   else
 448     abort(); // this should not happen
 449
 450   if (action != do_other) {
 451     if (args.empty()) {
 452       print_error(_("Unknown Action: %s"),  action_str);
 453       return 1;
 454     }
 455     String what_str = args.front();
 456     args.pop_front();
 457     if (what_str == "config")
 458       config();
 459     else if (what_str == "dicts")
 460       dicts();
 461     else if (what_str == "filters")
 462       filters();
 463     else if (what_str == "modes")
 464       modes();
 465     else if (what_str == "master")
 466       master();
 467     else if (what_str == "personal")
 468       personal();
 469     else if (what_str == "repl")
 470       repl();
 471     else if (what_str == "affix")
 472       dump_affix();
 473     else {
 474       print_error(_("Unknown Action: %s"),
 475                   String(action_str + " " + what_str));
 476       return 1;
 477     }
 478   }
 479
 480   return 0;
 481
 482 }
 483
 484
 485 /////////////////////////////////////////////////////////
 486 //
 487 // Action Functions
 488 //
 489 //
 490
 491
 492 static Convert * setup_conv(const aspeller::Language * lang,
 493                                       Config * config)
 494 {
 495   if (config->retrieve("encoding") != "none") {
 496     PosibErr<Convert *> pe = new_convert_if_needed(*config,
 497                                                    lang->charmap(),
 498                                                    config->retrieve("encoding"),
 499                                                    NormTo);
 500     if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
 501     return pe.data;
 502   } else {
 503     return 0;
 504   }
 505 }
 506
 507 static Convert * setup_conv(Config * config,
 508                             const aspeller::Language * lang)
 509 {
 510   if (config->retrieve("encoding") != "none") {
 511     PosibErr<Convert *> pe = new_convert_if_needed(*config,
 512                                                    config->retrieve("encoding"),
 513                                                    lang->charmap(),
 514                                                    NormFrom);
 515     if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
 516     return pe.data;
 517   } else {
 518     return 0;
 519   }
 520 }
 521
 522 void setup_display_conv()
 523 {
 524   const char * gettext_enc = 0;
 525   const char * env_enc = 0;
 526   String doc_enc = options->retrieve("encoding");
 527   String enc;
 528 #ifdef ENABLE_NLS
 529   gettext_enc = bind_textdomain_codeset("aspell", 0);
 530   if (ascii_encoding(*options,gettext_enc)) gettext_enc = 0;
 531 #endif
 532 #ifdef HAVE_LANGINFO_CODESET
 533   env_enc = nl_langinfo(CODESET);
 534   if (ascii_encoding(*options, env_enc)) env_enc = 0;
 535 #endif
 536   if (gettext_enc && env_enc && strcmp(gettext_enc,env_enc) != 0)
 537   {
 538     fputs(("Error: bind_textdomain_codeset != nl_langinfo(CODESET)\n"), stderr);
 539     exit(-1);
 540   }
 541   if (gettext_enc)
 542     enc = gettext_enc;
 543   else if (env_enc)
 544     enc = env_enc;
 545   else
 546     enc = doc_enc;
 547
 548   EXIT_ON_ERR(dconv.setup(*options, doc_enc, enc, NormNone));
 549   EXIT_ON_ERR(uiconv.setup(*options, enc, doc_enc, NormNone));
 550 }
 551
 552
 553 ///////////////////////////
 554 //
 555 // config
 556 //
 557
 558 void config ()
 559 {
 560   if (args.size() == 0) {
 561     load_all_filters(options);
 562     options->write_to_stream(COUT);
 563   } else {
 564     EXIT_ON_ERR_SET(options->retrieve_any(args[0]), String, value);
 565     COUT << value << "\n";
 566   }
 567 }
 568
 569 ///////////////////////////
 570 //
 571 // dicts
 572 //
 573
 574 void dicts()
 575 {
 576   const DictInfoList * dlist = get_dict_info_list(options);
 577
 578   StackPtr<DictInfoEnumeration> dels(dlist->elements());
 579
 580   const DictInfo * entry;
 581
 582   while ( (entry = dels->next()) != 0)
 583     puts(entry->name);
 584 }
 585
 586 ///////////////////////////
 587 //
 588 // list available (filters/filter modes)
 589 //
 590
 591 void list_available(PosibErr<StringPairEnumeration *> (*fun)(Config *))
 592 {
 593   EXIT_ON_ERR_SET(fun(options), StringPairEnumeration *, els);
 594   StringPair sp;
 595   while (!els->at_end()) {
 596     sp = els->next();
 597     printf("%-14s %s\n", sp.first, gt_(sp.second));
 598   }
 599   delete els;
 600 }
 601
 602 void filters()
 603 {
 604   load_all_filters(options);
 605   list_available(available_filters);
 606 }
 607
 608 void modes()
 609 {
 610   list_available(available_filter_modes);
 611 }
 612
 613 ///////////////////////////
 614 //
 615 // pipe
 616 //
 617
 618 // precond: strlen(str) > 0
 619 char * trim_wspace (char * str)
 620 {
 621   int last = strlen(str) - 1;
 622   while (asc_isspace(str[0])) {
 623     ++str;
 624     --last;
 625   }
 626   while (last > 0 && asc_isspace(str[last])) {
 627     --last;
 628   }
 629   str[last + 1] = '\0';
 630   return str;
 631 }
 632
 633 bool get_word_pair(char * line, char * & w1, char * & w2)
 634 {
 635   w2 = strchr(line, ',');
 636   if (!w2) {
 637     print_error(_("Invalid Input"));
 638     return false;
 639   }
 640   *w2 = '\0';
 641   ++w2;
 642   w1 = trim_wspace(line);
 643   w2 = trim_wspace(w2);
 644   return true;
 645 }
 646
 647 void print_elements(const AspellWordList * wl) {
 648   AspellStringEnumeration * els = aspell_word_list_elements(wl);
 649   int count = 0;
 650   const char * w;
 651   String line;
 652   while ( (w = aspell_string_enumeration_next(els)) != 0 ) {
 653     ++count;
 654     line += w;
 655     line += ", ";
 656   }
 657   line.resize(line.size() - 2);
 658   COUT.printf("%u: %s\n", count, line.c_str());
 659 }
 660
 661 struct StatusFunInf
 662 {
 663   aspeller::SpellerImpl * real_speller;
 664   Conv oconv;
 665   bool verbose;
 666   StatusFunInf(Convert * c) : oconv(c) {}
 667 };
 668
 669 void status_fun(void * d, Token, int correct)
 670 {
 671   StatusFunInf * p = static_cast<StatusFunInf *>(d);
 672   if (p->verbose && correct) {
 673     const CheckInfo * ci = p->real_speller->check_info();
 674     if (ci->compound)
 675       COUT.put("-\n");
 676     else if (ci->pre_flag || ci->suf_flag)
 677       COUT.printf("+ %s\n", p->oconv(ci->word.str()));
 678     else
 679       COUT.put("*\n");
 680   }
 681 }
 682
 683 DocumentChecker * new_checker(AspellSpeller * speller,
 684                               StatusFunInf & status_fun_inf)
 685 {
 686   EXIT_ON_ERR_SET(new_document_checker(reinterpret_cast<Speller *>(speller)),
 687                   StackPtr<DocumentChecker>, checker);
 688   checker->set_status_fun(status_fun, &status_fun_inf);
 689   return checker.release();
 690 }
 691
 692 #define BREAK_ON_SPELLER_ERR\
 693   do {if (aspell_speller_error(speller)) {\
 694     print_error(aspell_speller_error_message(speller)); break;\
 695   } } while (false)
 696
 697 void pipe()
 698 {
 699   line_buffer();
 700
 701   bool terse_mode = true;
 702   bool do_time = options->retrieve_bool("time");
 703   bool suggest = options->retrieve_bool("suggest");
 704   bool include_guesses = options->retrieve_bool("guess");
 705   clock_t start,finish;
 706
 707   if (!options->have("mode") && !options->have("filter")) {
 708     PosibErrBase err(options->replace("mode", "nroff"));
 709     if (err.has_err())
 710       CERR.printf(_("WARNING: Unable to enter Nroff mode: %s\n"),
 711                   err.get_err()->mesg);
 712   }
 713
 714   start = clock();
 715
 716   AspellCanHaveError * ret
 717     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
 718   if (aspell_error(ret)) {
 719     print_error(aspell_error_message(ret));
 720     exit(1);
 721   }
 722   AspellSpeller * speller = to_aspell_speller(ret);
 723   aspeller::SpellerImpl * real_speller = reinterpret_cast<aspeller::SpellerImpl *>(speller);
 724   Config * config = real_speller->config();
 725   Conv iconv(setup_conv(config, &real_speller->lang()));
 726   Conv oconv(setup_conv(&real_speller->lang(), config));
 727   MBLen mb_len;
 728   if (!config->retrieve_bool("byte-offsets"))
 729     mb_len.setup(*config, config->retrieve("encoding"));
 730   if (do_time)
 731     COUT << _("Time to load word list: ")
 732          << (clock() - start)/(double)CLOCKS_PER_SEC << "\n";
 733   StatusFunInf status_fun_inf(setup_conv(&real_speller->lang(), config));
 734   status_fun_inf.real_speller = real_speller;
 735   bool & print_star = status_fun_inf.verbose;
 736   print_star = true;
 737   StackPtr<DocumentChecker> checker(new_checker(speller, status_fun_inf));
 738   int c;
 739   const char * w;
 740   CharVector buf;
 741   char * line;
 742   char * line0;
 743   char * word;
 744   char * word2;
 745   int    ignore;
 746   PosibErrBase err;
 747
 748   print_ver();
 749
 750   for (;;) {
 751     buf.clear();
 752     fflush(stdout);
 753     while (c = getchar(), c != '\n' && c != EOF)
 754       buf.push_back(static_cast<char>(c));
 755     buf.push_back('\n'); // always add new line so strlen > 0
 756     buf.push_back('\0');
 757     line = buf.data();
 758     ignore = 0;
 759     switch (line[0]) {
 760     case '\n':
 761       if (c != EOF) continue;
 762       else          break;
 763     case '*':
 764       word = trim_wspace(line + 1);
 765       aspell_speller_add_to_personal(speller, word, -1);
 766       BREAK_ON_SPELLER_ERR;
 767       break;
 768     case '&':
 769       word = trim_wspace(line + 1);
 770       aspell_speller_add_to_personal
 771         (speller,
 772          real_speller->to_lower(word), -1);
 773       BREAK_ON_SPELLER_ERR;
 774       break;
 775     case '@':
 776       word = trim_wspace(line + 1);
 777       aspell_speller_add_to_session(speller, word, -1);
 778       BREAK_ON_SPELLER_ERR;
 779       break;
 780     case '#':
 781       aspell_speller_save_all_word_lists(speller);
 782       BREAK_ON_SPELLER_ERR;
 783       break;
 784     case '+':
 785       word = trim_wspace(line + 1);
 786       err = config->replace("mode", word);
 787       if (err.get_err())
 788         config->replace("mode", "tex");
 789       reload_filters(real_speller);
 790       checker.del();
 791       checker = new_checker(speller, status_fun_inf);
 792       break;
 793     case '-':
 794       config->remove("filter");
 795       reload_filters(real_speller);
 796       checker.del();
 797       checker = new_checker(speller, status_fun_inf);
 798       break;
 799     case '~':
 800       break;
 801     case '!':
 802       terse_mode = true;
 803       print_star = false;
 804       break;
 805     case '%':
 806       terse_mode = false;
 807       print_star = true;
 808       break;
 809     case '$':
 810       if (line[1] == '$') {
 811         switch(line[2]) {
 812         case 'r':
 813           switch(line[3]) {
 814           case 'a':
 815             if (get_word_pair(line + 4, word, word2))
 816               aspell_speller_store_replacement(speller, word, -1, word2, -1);
 817             break;
 818           }
 819           break;
 820         case 'c':
 821           switch (line[3]) {
 822           case 's':
 823             if (get_word_pair(line + 4, word, word2))
 824               BREAK_ON_ERR(err = config->replace(word, word2));
 825             if (strcmp(word,"suggest") == 0)
 826               suggest = config->retrieve_bool("suggest");
 827             else if (strcmp(word,"time") == 0)
 828               do_time = config->retrieve_bool("time");
 829             else if (strcmp(word,"guess") == 0)
 830               include_guesses = config->retrieve_bool("guess");
 831             break;
 832           case 'r':
 833             word = trim_wspace(line + 4);
 834             BREAK_ON_ERR_SET(config->retrieve(word), String, ret);
 835             COUT.printl(ret);
 836             break;
 837           }
 838           break;
 839         case 'p':
 840           switch (line[3]) {
 841           case 'p':
 842             print_elements(aspell_speller_personal_word_list(speller));
 843             break;
 844           case 's':
 845             print_elements(aspell_speller_session_word_list(speller));
 846             break;
 847           }
 848           break;
 849         case 'l':
 850           COUT.printl(config->retrieve("lang"));
 851           break;
 852         }
 853         break;
 854       } else {
 855         // continue on (no break)
 856       }
 857     case '^':
 858       ignore = 1;
 859     default:
 860       line0 = line;
 861       line += ignore;
 862       checker->process(line, strlen(line));
 863       while (Token token = checker->next_misspelling()) {
 864         word = line + token.offset;
 865         word[token.len] = '\0';
 866         const char * cword = iconv(word);
 867         String guesses, guess;
 868         const CheckInfo * ci = real_speller->check_info();
 869         aspeller::CasePattern casep
 870           = real_speller->lang().case_pattern(cword);
 871         while (ci) {
 872           guess.clear();
 873           if (ci->pre_add && ci->pre_add[0])
 874             guess.append(ci->pre_add, ci->pre_add_len).append('+');
 875           guess.append(ci->word);
 876           if (ci->pre_strip_len > 0)
 877             guess.append('-').append(ci->word.str(), ci->pre_strip_len);
 878           if (ci->suf_strip_len > 0)
 879             guess.append('-').append(ci->word.str() + ci->word.size() - ci->suf_strip_len,
 880                                      ci->suf_strip_len);
 881           if (ci->suf_add && ci->suf_add[0])
 882             guess.append('+').append(ci->suf_add, ci->suf_add_len);
 883           real_speller->lang().fix_case(casep, guess.data(), guess.data());
 884           guesses << ", " << oconv(guess.str());
 885           ci = ci->next;
 886         }
 887         start = clock();
 888         const AspellWordList * suggestions = 0;
 889         if (suggest)
 890           suggestions = aspell_speller_suggest(speller, word, -1);
 891         finish = clock();
 892         unsigned offset = mb_len(line0, token.offset + ignore);
 893         if (suggestions && !aspell_word_list_empty(suggestions))
 894         {
 895           COUT.printf("& %s %u %u:", word,
 896                       aspell_word_list_size(suggestions), offset);
 897           AspellStringEnumeration * els
 898             = aspell_word_list_elements(suggestions);
 899           if (options->retrieve_bool("reverse")) {
 900             Vector<String> sugs;
 901             sugs.reserve(aspell_word_list_size(suggestions));
 902             while ( ( w = aspell_string_enumeration_next(els)) != 0)
 903               sugs.push_back(w);
 904             Vector<String>::reverse_iterator i = sugs.rbegin();
 905             while (true) {
 906               COUT.printf(" %s", i->c_str());
 907               ++i;
 908               if (i == sugs.rend()) break;
 909               COUT.put(',');
 910             }
 911           } else {
 912             while ( ( w = aspell_string_enumeration_next(els)) != 0) {
 913               COUT.printf(" %s%s", w,
 914                           aspell_string_enumeration_at_end(els) ? "" : ",");
 915             }
 916           }
 917           delete_aspell_string_enumeration(els);
 918           if (include_guesses)
 919             COUT.put(guesses);
 920           COUT.put('\n');
 921         } else {
 922           if (guesses.empty())
 923             COUT.printf("# %s %u\n", word, offset);
 924           else
 925             COUT.printf("? %s 0 %u: %s\n", word, offset,
 926                         guesses.c_str() + 2);
 927         }
 928         if (do_time)
 929           COUT.printf(_("Suggestion Time: %f\n"),
 930                       (finish-start)/(double)CLOCKS_PER_SEC);
 931       }
 932       COUT.put('\n');
 933     }
 934     if (c == EOF) break;
 935   }
 936
 937   delete_aspell_speller(speller);
 938 }
 939
 940 ///////////////////////////
 941 //
 942 // check
 943 //
 944
 945 enum UserChoice {None, Ignore, IgnoreAll, Replace, ReplaceAll,
 946                  Add, AddLower, Exit, Abort};
 947
 948 struct Mapping {
 949   char primary[9];
 950   UserChoice reverse[256];
 951   void to_aspell();
 952   void to_ispell();
 953   char & operator[] (UserChoice c) {return primary[c];}
 954   UserChoice & operator[] (char c)
 955     {return reverse[static_cast<unsigned char>(c)];}
 956 };
 957
 958 void abort_check();
 959
 960 void setup_display_conv();
 961
 962 void check()
 963 {
 964   String file_name;
 965   String new_name;
 966   FILE * in = 0;
 967   FILE * out = 0;
 968   Mapping mapping;
 969   bool changed = false;
 970
 971   if (args.size() == 0) {
 972     print_error(_("You must specify a file name."));
 973     exit(-1);
 974   } else if (args.size() > 1) {
 975     print_error(_("Only one file name may be specified."));
 976     exit(-1);
 977   }
 978
 979   file_name = args[0];
 980   new_name = file_name;
 981   new_name += ".new";
 982
 983   in = fopen(file_name.c_str(), "r");
 984   if (!in) {
 985     print_error(_("Could not open the file \"%s\" for reading"), file_name);
 986     exit(-1);
 987   }
 988
 989   if (!options->have("mode"))
 990     EXIT_ON_ERR(set_mode_from_extension(options, file_name));
 991
 992   String m = options->retrieve("keymapping");
 993   if (m == "aspell")
 994     mapping.to_aspell();
 995   else if (m == "ispell")
 996     mapping.to_ispell();
 997   else {
 998     print_error(_("Invalid keymapping: %s"), m);
 999     exit(-1);
1000   }
1001
1002   AspellCanHaveError * ret
1003     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1004   if (aspell_error(ret)) {
1005     print_error(aspell_error_message(ret));
1006     exit(1);
1007   }
1008
1009 #ifdef USE_FILE_INO
1010   {
1011     struct stat st;
1012     fstat(fileno(in), &st);
1013     int fd = open(new_name.c_str(), O_WRONLY | O_CREAT | O_TRUNC, st.st_mode);
1014     if (fd >= 0) out = fdopen(fd, "w");
1015   }
1016 #else
1017   out = fopen(new_name.c_str(), "w");
1018 #endif
1019   if (!out) {
1020     print_error(_("Could not open the file \"%s\" for writing. File not saved."), file_name);
1021     exit(-1);
1022   }
1023
1024   setup_display_conv();
1025
1026   AspellSpeller * speller = to_aspell_speller(ret);
1027
1028   state = new CheckerString(speller,in,out,64);
1029
1030   word_choices = new Choices;
1031
1032   menu_choices = new Choices;
1033   menu_choices->push_back(Choice(mapping[Ignore],     _("Ignore")));
1034   menu_choices->push_back(Choice(mapping[IgnoreAll],  _("Ignore all")));
1035   menu_choices->push_back(Choice(mapping[Replace],    _("Replace")));
1036   menu_choices->push_back(Choice(mapping[ReplaceAll], _("Replace all")));
1037   menu_choices->push_back(Choice(mapping[Add],        _("Add")));
1038   menu_choices->push_back(Choice(mapping[AddLower],   _("Add Lower")));
1039   menu_choices->push_back(Choice(mapping[Abort],      _("Abort")));
1040   menu_choices->push_back(Choice(mapping[Exit],       _("Exit")));
1041
1042   String word0, new_word;
1043   Vector<String> sug_con;
1044   StackPtr<StringMap> replace_list(new_string_map());
1045   const char * w;
1046
1047   begin_check();
1048
1049   while (state->next_misspelling()) {
1050
1051     char * word = state->get_real_word(word0);
1052
1053     //
1054     // check if it is in the replace list
1055     //
1056
1057     if ((w = replace_list->lookup(word)) != 0) {
1058       state->replace(w);
1059       continue;
1060     }
1061
1062     //
1063     // print the line with the misspelled word highlighted;
1064     //
1065
1066     display_misspelled_word();
1067
1068     //
1069     // print the suggestions and menu choices
1070     //
1071
1072     const AspellWordList * suggestions = aspell_speller_suggest(speller, word, -1);
1073     AspellStringEnumeration * els = aspell_word_list_elements(suggestions);
1074     sug_con.resize(0);
1075     while (sug_con.size() != 10
1076            && (w = aspell_string_enumeration_next(els)) != 0)
1077       sug_con.push_back(w);
1078     delete_aspell_string_enumeration(els);
1079
1080     // disable suspend
1081     unsigned int suggestions_size = sug_con.size();
1082     unsigned int suggestions_mid = suggestions_size / 2;
1083     if (suggestions_size % 2) suggestions_mid++; // if odd
1084     word_choices->resize(0);
1085     for (unsigned int j = 0; j != suggestions_mid; ++j) {
1086       word_choices->push_back(Choice('0' + j+1, sug_con[j]));
1087       if (j + suggestions_mid != suggestions_size)
1088         word_choices
1089           ->push_back(Choice(j+suggestions_mid+1 == 10
1090                              ? '0'
1091                              : '0' + j+suggestions_mid+1,
1092                              sug_con[j+suggestions_mid]));
1093     }
1094     //enable suspend
1095     display_menu();
1096
1097   choice_prompt:
1098
1099     prompt("? ");
1100
1101   choice_loop:
1102
1103     //
1104     // Handle the users choice
1105     //
1106
1107     int choice;
1108     get_choice(choice);
1109
1110     if (choice == '0') choice = '9' + 1;
1111
1112     switch (mapping[choice]) {
1113     case Exit:
1114       goto exit_loop;
1115     case Abort: {
1116       prompt(_("Are you sure you want to abort (y/n)? "));
1117       get_choice(choice);
1118       /* TRANSLATORS: The user may input any of these characters to say "yes".
1119          MUST ONLY CONSIST OF ASCII CHARACTERS. */
1120       const char * yes_characters = _("Yy");
1121       if (strchr(yes_characters, choice) != 0)
1122         goto abort_loop;
1123       goto choice_prompt;
1124     }
1125     case Ignore:
1126       break;
1127     case IgnoreAll:
1128       aspell_speller_add_to_session(speller, word, -1);
1129       break;
1130     case Add:
1131       aspell_speller_add_to_personal(speller, word, -1);
1132       break;
1133     case AddLower:
1134     {
1135       // Emulate the c function add_to_personal, but add extra step to
1136       // convert word to lowercase.  Yeah its a bit of a hack.
1137       Speller * sp = reinterpret_cast<Speller *>(speller);
1138       sp->temp_str_0.clear();
1139       sp->to_internal_->convert(word, -1, sp->temp_str_0);
1140       char * lower = sp->to_lower(sp->temp_str_0.mstr());
1141       PosibErr<void> ret = sp->add_to_personal(MutableString(lower));
1142       sp->err_.reset(ret.release_err());
1143       break;
1144     }
1145     case Replace:
1146     case ReplaceAll:
1147       // the string new_word is in the encoding of the document
1148       prompt(_("With: "));
1149       get_line(new_word);
1150       if (new_word.size() == 0)
1151         goto choice_prompt;
1152       if (new_word[0] >= '1' && new_word[0] < (char)suggestions_size + '1')
1153         new_word = sug_con[new_word[0]-'1'];
1154       state->replace(new_word);
1155       changed = true;
1156       if (mapping[choice] == ReplaceAll && (strcmp(word,new_word.str()) != 0))
1157         replace_list->replace(word, new_word);
1158       break;
1159     default:
1160       // the replasments are in the encoding of the document
1161       if (choice >= '1' && choice < (char)suggestions_size + '1') {
1162         state->replace(sug_con[choice-'1']);
1163         changed = true;
1164       } else {
1165         error(_("Sorry that is an invalid choice!"));
1166         goto choice_loop;
1167       }
1168     }
1169   }
1170 exit_loop:
1171   {
1172     aspell_speller_save_all_word_lists(speller);
1173     state.del(); // to close the file handles
1174     delete_aspell_speller(speller);
1175
1176     if (changed) {
1177
1178       bool keep_backup = options->retrieve_bool("backup");
1179       if (keep_backup) {
1180         String backup_name = file_name;
1181         backup_name += ".bak";
1182         rename_file(file_name, backup_name);
1183       }
1184       rename_file(new_name, file_name);
1185
1186     } else {
1187
1188       remove_file(new_name);
1189
1190     }
1191
1192     //end_check();
1193
1194     return;
1195   }
1196 abort_loop:
1197   {
1198     state.del(); // to close the file handles
1199     delete_aspell_speller(speller);
1200
1201     remove_file(new_name);
1202
1203     return;
1204   }
1205 }
1206
1207 #define U (unsigned char)
1208
1209 void Mapping::to_aspell()
1210 {
1211   memset(this, 0, sizeof(Mapping));
1212   primary[Ignore    ] = 'i';
1213   reverse[U'i'] = Ignore;
1214   reverse[U' '] = Ignore;
1215   reverse[U'\n'] = Ignore;
1216
1217   primary[IgnoreAll ] = 'I';
1218   reverse[U'I'] = IgnoreAll;
1219
1220   primary[Replace   ] = 'r';
1221   reverse[U'r'] = Replace;
1222
1223   primary[ReplaceAll] = 'R';
1224   reverse[U'R'] = ReplaceAll;
1225
1226   primary[Add       ] = 'a';
1227   reverse[U'A'] = Add;
1228   reverse[U'a'] = Add;
1229
1230   primary[AddLower  ] = 'l';
1231   reverse[U'L'] = AddLower;
1232   reverse[U'l'] = AddLower;
1233
1234   primary[Abort     ] = 'b';
1235   reverse[U'b'] = Abort;
1236   reverse[U'B'] = Abort;
1237   reverse[control('c')] = Abort;
1238
1239   primary[Exit      ] = 'x';
1240   reverse[U'x'] = Exit;
1241   reverse[U'X'] = Exit;
1242 }
1243
1244 void Mapping::to_ispell()
1245 {
1246   memset(this, 0, sizeof(Mapping));
1247   primary[Ignore    ] = ' ';
1248   reverse[U' '] = Ignore;
1249   reverse[U'\n'] = Ignore;
1250
1251   primary[IgnoreAll ] = 'A';
1252   reverse[U'A'] = IgnoreAll;
1253   reverse[U'a'] = IgnoreAll;
1254
1255   primary[Replace   ] = 'R';
1256   reverse[U'R'] = ReplaceAll;
1257   reverse[U'r'] = Replace;
1258
1259   primary[ReplaceAll] = 'E';
1260   reverse[U'E'] = ReplaceAll;
1261   reverse[U'e'] = Replace;
1262
1263   primary[Add       ] = 'I';
1264   reverse[U'I'] = Add;
1265   reverse[U'i'] = Add;
1266
1267   primary[AddLower  ] = 'U';
1268   reverse[U'U'] = AddLower;
1269   reverse[U'u'] = AddLower;
1270
1271   primary[Abort     ] = 'Q';
1272   reverse[U'Q'] = Abort;
1273   reverse[U'q'] = Abort;
1274   reverse[control('c')] = Abort;
1275
1276   primary[Exit      ] = 'X';
1277   reverse[U'X'] = Exit;
1278   reverse[U'x'] = Exit;
1279 }
1280 #undef U
1281
1282 ///////////////////////////
1283 //
1284 // list
1285 //
1286
1287 void list()
1288 {
1289   AspellCanHaveError * ret
1290     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1291   if (aspell_error(ret)) {
1292     print_error(aspell_error_message(ret));
1293     exit(1);
1294   }
1295   AspellSpeller * speller = to_aspell_speller(ret);
1296
1297   state = new CheckerString(speller,stdin,0,64);
1298
1299   String word;
1300
1301   while (state->next_misspelling()) {
1302
1303     state->get_real_word(word);
1304     COUT.printl(word);
1305
1306   }
1307
1308   state.del(); // to close the file handles
1309   delete_aspell_speller(speller);
1310 }
1311
1312 ///////////////////////////
1313 //
1314 // convt
1315 //
1316
1317 void convt()
1318 {
1319   Conv conv;
1320   String buf1, buf2;
1321   const char * from = fix_encoding_str(args[0], buf1);
1322   const char * to   = fix_encoding_str(args[1], buf2);
1323   Normalize norm = NormNone;
1324   if (strcmp(from, "utf-8") == 0 && strcmp(to, "utf-8") != 0)
1325     norm = NormFrom;
1326   else if (strcmp(from, "utf-8") != 0 && strcmp(to, "utf-8") == 0)
1327     norm = NormTo;
1328   if (args.size() > 2) {
1329     for (String::iterator i = args[2].begin(); i != args[2].end(); ++i)
1330       *i = asc_tolower(*i);
1331     options->replace("normalize", "true");
1332     if (args[2] == "none")
1333       options->replace("normalize", "false");
1334     else if (args[2] == "internal")
1335       options->replace("norm-strict", "false");
1336     else if (args[2] == "strict")
1337       options->replace("norm-strict", "true");
1338     else
1339       EXIT_ON_ERR(options->replace("norm-form", args[2]));
1340   }
1341   EXIT_ON_ERR(conv.setup(*options, args[0], args[1], norm));
1342   String line;
1343   while (CIN.getline(line))
1344     COUT.printl(conv(line));
1345 }
1346
1347 void normlz()
1348 {
1349   options->replace("normalize", "true");
1350   const char * from = args.size() < 3 ? "utf-8" : args[0].str();
1351   const char * to   = args.size() < 3 ? "utf-8" : args[2].str();
1352   const char * intr = args.size() < 3 ? args[0].str() : args[1].str();
1353   String * form = (args.size() == 2   ? &args[1]
1354                    : args.size() == 4 ? &args[3]
1355                    : 0);
1356   Normalize decode_norm = NormTo;
1357   if (form) {
1358     for (String::iterator i = form->begin(); i != form->end(); ++i)
1359       *i = asc_tolower(*i);
1360     if (*form == "internal") {
1361       options->replace("norm-strict", "false");
1362       decode_norm = NormNone;
1363     } else if (*form == "strict") {
1364       options->replace("norm-strict", "true");
1365       decode_norm = NormNone;
1366     }
1367     if (decode_norm == NormTo) EXIT_ON_ERR(options->replace("norm-form", *form));
1368   }
1369   Conv encode,decode;
1370   EXIT_ON_ERR(encode.setup(*options, from, intr, NormFrom));
1371   EXIT_ON_ERR(decode.setup(*options, intr, to, decode_norm));
1372   String line;
1373   while (CIN.getline(line))
1374     COUT.printl(decode(encode(line)));
1375 }
1376
1377 ///////////////////////////
1378 //
1379 // filter
1380 //
1381
1382 void filter()
1383 {
1384   //assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
1385   //assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
1386   CERR << _("Sorry \"filter\" is currently unimplemented.\n");
1387   exit(3);
1388 }
1389
1390
1391 ///////////////////////////
1392 //
1393 // print_ver
1394 //
1395
1396 void print_ver () {
1397   COUT.put("@(#) International Ispell Version 3.1.20 "
1398            "(but really Aspell " VERSION ")\n");
1399 }
1400
1401 ///////////////////////////////////////////////////////////////////////
1402 //
1403 // These functions use implementation details of the default speller
1404 // module
1405 //
1406
1407 class IstreamEnumeration : public StringEnumeration {
1408   FStream * in;
1409   String data;
1410 public:
1411   IstreamEnumeration(FStream & i) : in(&i) {}
1412   IstreamEnumeration * clone() const {
1413     return new IstreamEnumeration(*this);
1414   }
1415   void assign (const StringEnumeration * other) {
1416     *this = *static_cast<const IstreamEnumeration *>(other);
1417   }
1418   Value next() {
1419     if (!in->getline(data)) return 0;
1420     else return data.c_str();
1421   }
1422   bool at_end() const {return *in;}
1423 };
1424
1425 ///////////////////////////
1426 //
1427 // clean
1428 //
1429
1430 void clean()
1431 {
1432   using namespace aspeller;
1433
1434   bool strict = args.size() != 0 && args[0] == "strict";
1435
1436   Config * config = options;
1437
1438   CachePtr<Language> lang;
1439   find_language(*config);
1440   PosibErr<Language *> res = new_language(*config);
1441   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1442   lang.reset(res.data);
1443   IstreamEnumeration in(CIN);
1444   WordListIterator wl_itr(&in, lang, &CERR);
1445   config->replace("validate-words", "true");
1446   config->replace("validate-affixes", "true");
1447   if (!strict)
1448     config->replace("clean-words", "true");
1449   config->replace("clean-affixes", "true");
1450   config->replace("skip-invalid-words", "true");
1451   wl_itr.init(*config);
1452   Conv oconv, oconv2;
1453   if (config->have("encoding")) {
1454     EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo));
1455     oconv2.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo);
1456   } else {
1457     EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), lang->data_encoding(), NormTo));
1458     oconv2.setup(*config, lang->charmap(), lang->data_encoding(), NormTo);
1459   }
1460   while (wl_itr.adv()) {
1461     if (*wl_itr->aff.str)
1462       COUT.printf("%s/%s\n", oconv(wl_itr->word), oconv2(wl_itr->aff));
1463     else
1464       COUT.printl(oconv(wl_itr->word));
1465   }
1466 }
1467
1468 ///////////////////////////
1469 //
1470 // master
1471 //
1472
1473 void dump (aspeller::Dict * lws, Convert * conv)
1474 {
1475   using namespace aspeller;
1476
1477   switch (lws->basic_type) {
1478   case Dict::basic_dict:
1479     {
1480       Dictionary * ws = static_cast<Dictionary *>(lws);
1481       StackPtr<WordEntryEnumeration> els(ws->detailed_elements());
1482       WordEntry * wi;
1483       while (wi = els->next(), wi) {
1484         wi->write(COUT,*ws->lang(), conv);
1485         COUT << '\n';
1486       }
1487     }
1488     break;
1489   case Dict::multi_dict:
1490     {
1491       StackPtr<DictsEnumeration> els(lws->dictionaries());
1492       Dict * ws;
1493       while (ws = els->next(), ws)
1494         dump (ws, conv);
1495     }
1496     break;
1497   default:
1498     abort();
1499   }
1500 }
1501
1502 void master () {
1503   using namespace aspeller;
1504
1505   if (args.size() != 0) {
1506     options->replace("master", args[0].c_str());
1507   }
1508
1509   Config * config = options;
1510
1511   if (action == do_create) {
1512
1513     find_language(*config);
1514     EXIT_ON_ERR(create_default_readonly_dict
1515                 (new IstreamEnumeration(CIN),
1516                  *config));
1517
1518   } else if (action == do_merge) {
1519
1520     print_error(_("Can't merge a master word list yet. Sorry."));
1521     exit (1);
1522
1523   } else if (action == do_dump) {
1524
1525     EXIT_ON_ERR_SET(add_data_set(config->retrieve("master-path"), *config), Dict *, d);
1526     StackPtr<Convert> conv(setup_conv(d->lang(), config));
1527     dump(d, conv);
1528   }
1529 }
1530
1531 ///////////////////////////
1532 //
1533 // personal
1534 //
1535
1536 void personal () {
1537   using namespace aspeller;
1538
1539   if (args.size() != 0) {
1540     EXIT_ON_ERR(options->replace("personal", args[0]));
1541   }
1542   options->replace("module", "aspeller");
1543   if (action == do_create || action == do_merge) {
1544     CERR << _("Sorry \"create/merge personal\" is currently unimplemented.\n");
1545     exit(3);
1546
1547     // FIXME
1548 #if 0
1549     StackPtr<Speller> speller(new_speller(options));
1550
1551     if (action == do_create) {
1552       if (file_exists(speller->config()->retrieve("personal-path"))) {
1553         print_error(_("Sorry I won't overwrite \"%s\""),
1554                     speller->config()->retrieve("personal-path"));
1555         exit (1);
1556       }
1557       speller->personal_word_list().data->clear();
1558     }
1559
1560     String word;
1561     while (CIN >> word)
1562       speller->add_to_personal(word);
1563
1564     speller->save_all_word_lists();
1565 #endif
1566
1567   } else { // action == do_dump
1568
1569     // FIXME: This is currently broken
1570
1571     Config * config = options;
1572     Dictionary * per = new_default_writable_dict();
1573     per->load(config->retrieve("personal-path"), *config);
1574     StackPtr<WordEntryEnumeration> els(per->detailed_elements());
1575     StackPtr<Convert> conv(setup_conv(per->lang(), config));
1576
1577     WordEntry * wi;
1578     while (wi = els->next(), wi) {
1579       wi->write(COUT,*(per->lang()), conv);
1580       COUT.put('\n');
1581     }
1582     delete per;
1583   }
1584 }
1585
1586 ///////////////////////////
1587 //
1588 // repl
1589 //
1590
1591 void repl() {
1592   using namespace aspeller;
1593
1594   if (args.size() != 0) {
1595     options->replace("repl", args[0].c_str());
1596   }
1597
1598   if (action == do_create || action == do_merge) {
1599
1600     CERR << _("Sorry \"create/merge repl\" is currently unimplemented.\n");
1601     exit(3);
1602
1603     // FIXME
1604 #if 0
1605     SpellerImpl speller(options);
1606
1607     if (action == do_create) {
1608       if (file_exists(speller->config()->retrieve("repl-path"))) {
1609         print_error(_("Sorry I won't overwrite \"%s\""),
1610                     speller->config()->retrieve("repl-path"));
1611         exit (1);
1612       }
1613       speller->personal_repl().clear();
1614     }
1615
1616     try {
1617       String word,repl;
1618
1619       while (true) {
1620         get_word_pair(word,repl,':');
1621         EXIT_ON_ERR(speller->store_repl(word,repl,false));
1622       }
1623
1624     } catch (bad_cin) {}
1625
1626     EXIT_ON_ERR(speller->personal_repl().synchronize());
1627
1628 #endif
1629
1630   } else if (action == do_dump) {
1631
1632     // FIXME: This is currently broken
1633
1634     ReplacementDict * repl = new_default_replacement_dict();
1635     repl->load(options->retrieve("repl-path"), *options);
1636     StackPtr<WordEntryEnumeration> els(repl->detailed_elements());
1637
1638     WordEntry * rl = 0;
1639     WordEntry words;
1640     Conv conv(setup_conv(repl->lang(), options));
1641     while ((rl = els->next())) {
1642       repl->repl_lookup(*rl, words);
1643       do {
1644         COUT << conv(rl->word) << ": " << conv(words.word) << "\n";
1645       } while (words.adv());
1646     }
1647     delete repl;
1648   }
1649 }
1650
1651 //////////////////////////
1652 //
1653 // soundslike
1654 //
1655
1656 void soundslike() {
1657   using namespace aspeller;
1658   CachePtr<Language> lang;
1659   find_language(*options);
1660   PosibErr<Language *> res = new_language(*options);
1661   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1662   lang.reset(res.data);
1663   Conv iconv(setup_conv(options, lang));
1664   Conv oconv(setup_conv(lang, options));
1665   String word;
1666   String sl;
1667   line_buffer();
1668   while (CIN.getline(word)) {
1669     const char * w = iconv(word);
1670     lang->LangImpl::to_soundslike(sl, w);
1671     printf("%s\t%s\n", word.str(), oconv(sl));
1672   }
1673 }
1674
1675 //////////////////////////
1676 //
1677 // munch
1678 //
1679
1680 void munch()
1681 {
1682   using namespace aspeller;
1683   CachePtr<Language> lang;
1684   find_language(*options);
1685   PosibErr<Language *> res = new_language(*options);
1686   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1687   lang.reset(res.data);
1688   Conv iconv(setup_conv(options, lang));
1689   Conv oconv(setup_conv(lang, options));
1690   String word;
1691   GuessInfo gi;
1692   line_buffer();
1693   while (CIN.getline(word)) {
1694     lang->munch(iconv(word), &gi);
1695     COUT << word;
1696     for (const aspeller::CheckInfo * ci = gi.head; ci; ci = ci->next)
1697     {
1698       COUT << ' ' << oconv(ci->word) << '/';
1699       if (ci->pre_flag != 0) COUT << oconv(static_cast<char>(ci->pre_flag));
1700       if (ci->suf_flag != 0) COUT << oconv(static_cast<char>(ci->suf_flag));
1701     }
1702     COUT << '\n';
1703   }
1704 }
1705
1706 //////////////////////////
1707 //
1708 // expand
1709 //
1710
1711 void expand()
1712 {
1713   int level = 1;
1714   if (args.size() > 0)
1715     level = atoi(args[0].c_str()); //FIXME: More verbose
1716   int limit = INT_MAX;
1717   if (args.size() > 1)
1718     limit = atoi(args[1].c_str());
1719
1720   using namespace aspeller;
1721   CachePtr<Language> lang;
1722   find_language(*options);
1723   PosibErr<Language *> res = new_language(*options);
1724   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1725   lang.reset(res.data);
1726   Conv iconv(setup_conv(options, lang));
1727   Conv oconv(setup_conv(lang, options));
1728   String word, buf;
1729   ObjStack exp_buf;
1730   WordAff * exp_list;
1731   line_buffer();
1732   while (CIN.getline(word)) {
1733     buf = word;
1734     char * w = iconv(buf.mstr(), buf.size());
1735     char * af = strchr(w, '/');
1736     size_t s;
1737     if (af != 0) {
1738       s = af - w;
1739       *af++ = '\0';
1740     } else {
1741       s = strlen(w);
1742       af = w + s;
1743     }
1744     exp_buf.reset();
1745     exp_list = lang->expand(w, af, exp_buf, limit);
1746     if (level <= 2) {
1747       if (level == 2)
1748         COUT << word << ' ';
1749       WordAff * p = exp_list;
1750       while (p) {
1751         COUT << oconv(p->word);
1752         if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1753         p = p->next;
1754         if (p) COUT << ' ';
1755       }
1756       COUT << '\n';
1757     } else if (level >= 3) {
1758       double ratio = 0;
1759       if (level >= 4) {
1760         for (WordAff * p = exp_list; p; p = p->next)
1761           ratio += p->word.size;
1762         ratio /= exp_list->word.size; // it is assumed the first
1763                                       // expansion is just the root
1764       }
1765       for (WordAff * p = exp_list; p; p = p->next) {
1766         COUT << word << ' ' << oconv(p->word);
1767         if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1768         if (level >= 4) COUT.printf(" %f\n", ratio);
1769         else COUT << '\n';
1770       }
1771     }
1772   }
1773 }
1774
1775 //////////////////////////
1776 //
1777 // combine
1778 //
1779
1780 static void combine_aff(String & aff, const char * app)
1781 {
1782   for (; *app; ++app) {
1783     if (!memchr(aff.c_str(),*app,aff.size()))
1784       aff.push_back(*app);
1785   }
1786 }
1787
1788 static void print_wordaff(const String & base, const String & affs, Conv & oconv)
1789 {
1790   if (base.empty()) return;
1791   COUT << oconv(base);
1792   if (affs.empty())
1793     COUT << '\n';
1794   else
1795     COUT.printf("/%s\n", oconv(affs));
1796 }
1797
1798 static bool lower_equal(aspeller::Language * l, ParmString a, ParmString b)
1799 {
1800   if (a.size() != b.size()) return false;
1801   if (l->to_lower(a[0]) != l->to_lower(b[0])) return false;
1802   return memcmp(a + 1, b + 1, a.size() - 1) == 0;
1803 }
1804
1805 void combine()
1806 {
1807   using namespace aspeller;
1808   CachePtr<Language> lang;
1809   find_language(*options);
1810   PosibErr<Language *> res = new_language(*options);
1811   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1812   lang.reset(res.data);
1813   Conv iconv(setup_conv(options, lang));
1814   Conv oconv(setup_conv(lang, options));
1815   String word;
1816   String base;
1817   String affs;
1818   line_buffer();
1819   while (CIN.getline(word)) {
1820     word = iconv(word);
1821
1822     CharVector buf; buf.append(word.c_str(), word.size() + 1);
1823     char * w = buf.data();
1824     char * af = strchr(w, '/');
1825     size_t s;
1826     if (af != 0) {
1827       s = af - w;
1828       *af++ = '\0';
1829     } else {
1830       s = strlen(w);
1831       af = w + s;
1832     }
1833
1834     if (lower_equal(lang, base, w)) {
1835       if (lang->is_lower(base.str())) {
1836         combine_aff(affs, af);
1837       } else {
1838         base = w;
1839         combine_aff(affs, af);
1840       }
1841     } else {
1842       print_wordaff(base, affs, oconv);
1843       base = w;
1844       affs = af;
1845     }
1846
1847   }
1848   print_wordaff(base, affs, oconv);
1849 }
1850
1851 //////////////////////////
1852 //
1853 // munch list
1854 //
1855
1856 void munch_list_simple();
1857 void munch_list_complete(bool, bool);
1858
1859 void munch_list()
1860 {
1861   bool simple = false;
1862   bool multi = false;
1863   bool simplify = true;
1864
1865   for (unsigned i = 0; i < args.size(); ++i) {
1866     if (args[i] == "simple")      simple = true;
1867     else if (args[i] == "single") multi = false;
1868     else if (args[i] == "multi")  multi = true;
1869     else if (args[i] == "keep")   simplify = false;
1870     else
1871     {
1872       print_error(_("\"%s\" is not a valid flag for the \"munch-list\" command."),
1873                   args[i]);
1874       exit(1);
1875     }
1876   }
1877   if (simple)
1878     munch_list_simple();
1879   else
1880     munch_list_complete(multi, simplify);
1881 }
1882
1883 //
1884 // munch list (simple version)
1885 //
1886
1887 // This version works the same way as the myspell "munch" program.
1888 // However, because the results depends on the hash table used and the
1889 // order of the word list it wonn't produce identical results.
1890
1891 struct SML_WordEntry {
1892   const char * word;
1893   char * aff;
1894   bool keep; // boolean
1895   SML_WordEntry(const char * w = 0) : word(w), aff(0), keep(false) {}
1896 };
1897
1898 struct SML_Parms {
1899   typedef SML_WordEntry Value;
1900   typedef const char * Key;
1901   static const bool is_multi = false;
1902   acommon::hash<const char *> hash;
1903   bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
1904   Key key(const Value & v) {return v.word;}
1905 };
1906
1907 typedef HashTable<SML_Parms> SML_Table;
1908
1909 static inline void add_affix(SML_Table::iterator b, char aff)
1910 {
1911   char * p = b->aff;
1912   if (p) {while (*p) {if (*p == aff) return; ++p;}}
1913   int s = p - b->aff;
1914   b->aff = (char *)realloc(b->aff, s + 2);
1915   b->aff[s + 0] = aff;
1916   b->aff[s + 1] = '\0';
1917 }
1918
1919 void munch_list_simple()
1920 {
1921   using namespace aspeller;
1922   CachePtr<Language> lang;
1923   find_language(*options);
1924   PosibErr<Language *> res = new_language(*options);
1925   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1926   lang.reset(res.data);
1927   Conv iconv(setup_conv(options, lang));
1928   Conv oconv(setup_conv(lang, options));
1929   String word, buf;
1930   ObjStack exp_buf;
1931   WordAff * exp_list;
1932   GuessInfo gi;
1933   SML_Table table;
1934   ObjStack table_buf;
1935
1936   // add words to dictionary
1937   while (CIN.getline(word)) {
1938     buf = word;
1939     char * w = iconv(buf.mstr(), buf.size());
1940     char * af = strchr(w, '/');
1941     size_t s;
1942     if (af != 0) {
1943       s = af - w;
1944       *af++ = '\0';
1945     } else {
1946       s = strlen(w);
1947       af = w + s;
1948     }
1949     exp_buf.reset();
1950     exp_list = lang->expand(w, af, exp_buf);
1951     for (WordAff * q = exp_list; q; q = q->next) {
1952       table.insert(SML_WordEntry(table_buf.dup(q->word)));
1953     }
1954   }
1955
1956   // now try to munch each word in the dictionary
1957   SML_Table::iterator p = table.begin();
1958   SML_Table::iterator end = table.end();
1959   String flags;
1960   for (; p != end; ++p)
1961   {
1962     const aspeller::CheckInfo * best = 0;
1963     unsigned min_base_size = INT_MAX;
1964     lang->munch(p->word, &gi);
1965     const aspeller::CheckInfo * ci = gi.head;
1966     while (ci)
1967     { {
1968       // check if the base word is in the dictionary
1969       SML_Table::iterator b = table.find(ci->word);
1970       if (b == table.end()) goto cont;
1971
1972       // check if all the words once expanded are in the dictionary
1973       // this included the exiting flags due to pre-suf cross products
1974       if (b->aff) flags = b->aff;
1975       else        flags.clear();
1976       if (ci->pre_flag != 0) flags += ci->pre_flag;
1977       if (ci->suf_flag != 0) flags += ci->suf_flag;
1978       exp_buf.reset();
1979       exp_list = lang->expand(ci->word, flags, exp_buf);
1980       for (WordAff * q = exp_list; q; q = q->next) {
1981         if (!table.have(q->word)) goto cont;
1982       }
1983
1984       // the base word and flags are valid, now keep the one with the
1985       // smallest base word
1986       if (ci->word.size() < min_base_size) {
1987         min_base_size = ci->word.size();
1988         best = ci;
1989       }
1990
1991     } cont:
1992       ci = ci->next;
1993     }
1994     // now add the base to the keep list if one exists
1995     // otherwise just keep the orignal word
1996     if (best) {
1997       SML_Table::iterator b = table.find(best->word);
1998       assert(b != table.end());
1999       if (best->pre_flag) add_affix(b, best->pre_flag);
2000       if (best->suf_flag) add_affix(b, best->suf_flag);
2001       b->keep = true;
2002     } else {
2003       p->keep = true;
2004     }
2005   }
2006
2007   // Print the entries in the table marked as "to keep"
2008   p = table.begin();
2009   for (; p != end; ++p)
2010   {
2011     if (p->keep) {
2012       COUT << oconv(p->word);
2013       if (p->aff) {
2014         COUT << '/' << oconv(p->aff);
2015       }
2016       COUT << '\n';
2017     }
2018   }
2019
2020   p = table.begin();
2021   for (; p != end; ++p)
2022   {
2023     if (p->aff) free(p->aff);
2024     p->aff = 0;
2025   }
2026 }
2027
2028 //
2029 // munch list (complete version)
2030 //
2031 //
2032 // This version will produce a smaller list than the simple version.
2033 // It is very close to the optimum result.
2034 //
2035
2036 //
2037 // Hash table to store the words
2038 //
2039
2040 struct CML_Entry {
2041   const char * word;
2042   char * aff;
2043   CML_Entry * parent;
2044   CML_Entry * next;
2045   int rank;
2046   CML_Entry(const char * w = 0) : word(w), aff(0), parent(0), next(0), rank(0) {}
2047 };
2048
2049 struct CML_Parms {
2050   typedef CML_Entry Value;
2051   typedef const char * Key;
2052   static const bool is_multi = true;
2053   acommon::hash<const char *> hash;
2054   bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
2055   Key key(const Value & v) {return v.word;}
2056 };
2057
2058 typedef HashTable<CML_Parms> CML_Table;
2059
2060 //
2061 // add an affix to a word but keep the prefixes and suffixes separate
2062 //
2063
2064 static void add_affix(CML_Table::iterator b, char aff, bool prefix)
2065 {
2066   char * p = b->aff;
2067   int s = 3;
2068   if (p) {
2069     while (*p) {
2070       if (*p == aff) return;
2071       ++p;
2072     }
2073     s = (p - b->aff) + 2;
2074   }
2075   char * tmp = (char *)malloc(s);
2076   p = b->aff;
2077   char * q = tmp;
2078   if (p) {while (*p != '/') *q++ = *p++;}
2079   if (prefix) *q++ = aff;
2080   *q++ = '/';
2081   if (p) {p++; while (*p != '\0') *q++ = *p++;}
2082   if (!prefix) *q++ = aff;
2083   *q++ = '\0';
2084   assert(q - tmp == s);
2085   if (b->aff) free(b->aff);
2086   b->aff = tmp;
2087 }
2088
2089 //
2090 // Standard disjoint set algo with union by rank and path compression
2091 //
2092
2093 static void link(CML_Entry * x, CML_Entry * y)
2094 {
2095   if (x == y) return;
2096   if (x->rank > y->rank) {
2097     y->parent = x;
2098   } else {
2099     x->parent = y;
2100     if (x->rank == y->rank) y->rank++;
2101   }
2102 }
2103
2104 static CML_Entry * find_set (CML_Entry * x)
2105 {
2106   if (x->parent)
2107     return x->parent = find_set(x->parent);
2108   else
2109     return x;
2110 }
2111
2112 //
2113 // Stuff to manage prefix-suffix combinations
2114 //
2115
2116 struct PreSuf {
2117   String pre;
2118   String suf;
2119   String & get(int i) {return i == 0 ? pre : suf;}
2120   const String & get(int i) const {return i == 0 ? pre : suf;}
2121   PreSuf() : next(0) {}
2122   PreSuf * next;
2123 };
2124
2125 class PreSufList {
2126 public:
2127   PreSuf * head;
2128   PreSufList() : head(0) {}
2129   void add(PreSuf * to_add) {
2130     to_add->next = head;
2131     head = to_add;
2132   }
2133   void clear() {
2134     while (head) {
2135       PreSuf * tmp = head;
2136       head = head->next;
2137       delete tmp;
2138     }
2139   }
2140   void transfer(PreSufList & other) {
2141     clear();
2142     head = other.head;
2143     other.head = 0;
2144   }
2145   ~PreSufList() {
2146     clear();
2147   }
2148 };
2149
2150
2151 // Example of usage:
2152 //   combine(in, res, 0)
2153 //   Pre:  in =  [(ab, c) (ab, d) (c, de) (c, ef)]
2154 //   Post: res = [(ab, cd), (c, def)]
2155 static void combine(const PreSufList & in, PreSufList & res, int which)
2156 {
2157   const PreSuf * i = in.head;
2158   while (i) { {
2159     const String & s = i->get(which);
2160     for (const PreSuf * j = in.head; j != i; j = j->next) {
2161       if (j->get(which) == s) goto cont;
2162     }
2163     PreSuf * tmp = new PreSuf;
2164     tmp->pre = i->pre;
2165     tmp->suf = i->suf;
2166     String & b = tmp->get(!which);
2167     for (const PreSuf * j = i->next; j; j = j->next) {
2168       if (j->get(which) != s) continue;
2169       const String & a = j->get(!which);
2170       for (String::const_iterator x = a.begin(); x != a.end(); ++x) {
2171         if (memchr(b.data(), *x, b.size())) continue;
2172         b += *x;
2173       }
2174     }
2175     res.add(tmp);
2176   } cont:
2177     i = i->next;
2178   }
2179 }
2180
2181 //
2182 // Stuff used when pruning the list of base words
2183 //
2184
2185 struct Expansion {
2186   const char * word;
2187   char * aff; // modifying this will modify the affix entry in the hash table
2188   std::vector<bool> exp;
2189   std::vector<bool> orig_exp;
2190 };
2191
2192 // static void dump(const Vector<Expansion *> & working,
2193 //                  const Vector<CML_Table::iterator> & entries)
2194 // {
2195 //   for (unsigned i = 0; i != working.size(); ++i) {
2196 //     if (!working[i]) continue;
2197 //     CERR.printf("%s/%s ", working[i]->word, working[i]->aff);
2198 //     for (unsigned j = 0; j != working[i]->exp.size(); ++j) {
2199 //       if (working[i]->exp[j])
2200 //         CERR.printf("%s ", entries[j]->word);
2201 //     }
2202 //     CERR.put('\n');
2203 //   }
2204 //   CERR.put('\n');
2205 // }
2206
2207 // standard set algorithms on a bit vector
2208
2209 static bool subset(const std::vector<bool> & smaller,
2210                    const std::vector<bool> & larger)
2211 {
2212   assert(smaller.size() == larger.size());
2213   unsigned s = larger.size();
2214   for (unsigned i = 0; i != s; ++i) {
2215     if (smaller[i] && !larger[i]) return false;
2216   }
2217   return true;
2218 }
2219
2220 static void merge(std::vector<bool> & x, const std::vector<bool> & y)
2221 {
2222   assert(x.size() == y.size());
2223   unsigned s = x.size();
2224   for (unsigned i = 0; i != s; ++i) {
2225     if (y[i]) x[i] = true;
2226   }
2227 }
2228
2229 static void purge(std::vector<bool> & x, const std::vector<bool> & y)
2230 {
2231   assert(x.size() == y.size());
2232   unsigned s = x.size();
2233   for (unsigned i = 0; i != s; ++i) {
2234     if (y[i]) x[i] = false;
2235   }
2236 }
2237
2238 static inline unsigned count(const std::vector<bool> & x) {
2239   unsigned c = 0;
2240   for (unsigned i = 0; i != x.size(); ++i) {
2241     if (x[i]) ++c;
2242   }
2243   return c;
2244 }
2245
2246 //
2247
2248 struct WorkingLt {
2249   bool operator() (Expansion * x, Expansion * y) {
2250
2251     // LARGEST number of expansions
2252     unsigned x_s = count(x->exp);
2253     unsigned y_s = count(y->exp);
2254     if (x_s != y_s) return x_s > y_s;
2255
2256     // SMALLEST base word
2257     x_s = strlen(x->word);
2258     y_s = strlen(y->word);
2259     if (x_s != y_s) return x_s < y_s;
2260
2261     // LARGEST affix string
2262     x_s = strlen(x->aff);
2263     y_s = strlen(y->aff);
2264     if (x_s != y_s) return x_s > y_s;
2265
2266     //
2267     int cmp = strcmp(x->word, y->word);
2268     if (cmp != 0) return cmp < 0;
2269
2270     //
2271     cmp = strcmp(x->aff, y->aff);
2272     return cmp < 0;
2273   }
2274 };
2275
2276 //
2277 // Finally the function that does the real work
2278 //
2279
2280 void munch_list_complete(bool multi, bool simplify)
2281 {
2282   using namespace aspeller;
2283   CachePtr<Language> lang;
2284   find_language(*options);
2285   PosibErr<Language *> res = new_language(*options);
2286   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
2287   lang.reset(res.data);
2288   Conv iconv(setup_conv(options, lang));
2289   Conv oconv(setup_conv(lang, options));
2290   String word, buf;
2291   ObjStack exp_buf;
2292   WordAff * exp_list;
2293   GuessInfo gi;
2294   CML_Table table;
2295   ObjStack table_buf;
2296
2297   // add words to dictionary
2298   while (CIN.getline(word)) {
2299     buf = word;
2300     char * w = iconv(buf.mstr(), buf.size());
2301     char * af = strchr(w, '/');
2302     size_t s;
2303     if (af != 0) {
2304       s = af - w;
2305       *af++ = '\0';
2306     } else {
2307       s = strlen(w);
2308       af = w + s;
2309     }
2310     exp_buf.reset();
2311     exp_list = lang->expand(w, af, exp_buf);
2312     for (WordAff * q = exp_list; q; q = q->next) {
2313       if (!table.have(q->word)) // since it is a multi hash table
2314         table.insert(CML_Entry(table_buf.dup(q->word))).first;
2315     }
2316   }
2317
2318   // Now try to munch each word in the dictionary.  This will also
2319   // group the base words into disjoint sets based on there expansion.
2320   CML_Table::iterator p = table.begin();
2321   CML_Table::iterator end = table.end();
2322   String flags;
2323   for (; p != end; ++p)
2324   {
2325     lang->munch(p->word, &gi, false);
2326     const aspeller::CheckInfo * ci = gi.head;
2327     while (ci)
2328     { {
2329       // check if the base word is in the dictionary
2330       CML_Table::iterator b = table.find(ci->word);
2331       if (b == table.end()) goto cont;
2332
2333       // check if all the words once expanded are in the dictionary
2334       char flags[2];
2335       assert(!(ci->pre_flag && ci->suf_flag));
2336       if      (ci->pre_flag != 0) flags[0] = ci->pre_flag;
2337       else if (ci->suf_flag != 0) flags[0] = ci->suf_flag;
2338       flags[1] = '\0';
2339       exp_buf.reset();
2340       exp_list = lang->expand(ci->word, flags, exp_buf);
2341       for (WordAff * q = exp_list; q; q = q->next) {
2342         if (!table.have(q->word)) goto cont;
2343       }
2344
2345       // all the expansions are in the dictionary now add the affix to
2346       // the base word and figure out which disjoint set it belongs to
2347       add_affix(b, flags[0], ci->pre_flag != 0);
2348       CML_Entry * bs = find_set(&*b);
2349       for (WordAff * q = exp_list; q; q = q->next) {
2350         CML_Table::iterator w = table.find(q->word);
2351         assert(b != table.end());
2352         CML_Entry * ws = find_set(&*w);
2353         link(bs,ws);
2354       }
2355
2356     } cont:
2357       ci = ci->next;
2358     }
2359   }
2360
2361   // If a base word has both prefixes and suffixes try to combine them.
2362   // This can lead to multiple entries for the same base word.  If "multi"
2363   // is true than include all the entries.  Otherwise, only include the
2364   // one with the largest number of expansions.  This is a greedy choice
2365   // that may not be optimal, but is close to it.
2366   p = table.begin();
2367   String pre,suf;
2368   CML_Entry * extras = 0;
2369   for (; p != end; ++p)
2370   {
2371     pre.clear(); suf.clear();
2372     if (!p->aff) continue;
2373     char * s = p->aff;
2374     while (*s != '/') pre += *s++;
2375     ++s;
2376     while (*s != '\0') suf += *s++;
2377     if (pre.empty()) {
2378
2379       strcpy(p->aff, suf.str());
2380
2381     } else if (suf.empty()) {
2382
2383       strcpy(p->aff, pre.str());
2384
2385     } else {
2386
2387       // Try all possible combinations and keep the ones which expand
2388       // to legal words.
2389
2390       PreSufList cross,tmp1,tmp2;
2391       PreSuf * ps = 0;
2392
2393       for (String::iterator pi = pre.begin(); pi != pre.end(); ++pi) {
2394         String::iterator si = suf.begin();
2395         while (si != suf.end()) { {
2396           char flags[3] = {*pi, *si, '\0'};
2397           exp_buf.reset();
2398           exp_list = lang->expand(p->word, flags, exp_buf);
2399           for (WordAff * q = exp_list; q; q = q->next) {
2400             if (!table.have(q->word)) goto cont2;
2401           }
2402           ps = new PreSuf;
2403           ps->pre += *pi;
2404           ps->suf += *si;
2405           cross.add(ps);
2406         } cont2:
2407           ++si;
2408         }
2409       }
2410
2411       // Now combine the legal cross pairs with other ones when
2412       // possible.
2413
2414       // final res = [ (pre, []) ([],suf),
2415       //               (cross | combine first | combine second)
2416       //               (cross | combine second | combine first)
2417       //             | combine first
2418       //             | combine second
2419       //
2420       // combine first [(ab, c) (ab, d) (c, de) (c, ef)]
2421       //   =  [(ab, cd), (c, def)]
2422
2423       combine(cross, tmp1, 0);
2424       combine(tmp1,  tmp2, 1);
2425       tmp1.clear();
2426
2427       combine(cross, tmp1, 1);
2428       combine(tmp1,  tmp2, 0);
2429       tmp1.clear();
2430
2431       cross.clear();
2432
2433       ps = new PreSuf;
2434       ps->pre = pre;
2435       tmp2.add(ps);
2436       ps = new PreSuf;
2437       ps->suf = suf;
2438       tmp2.add(ps);
2439
2440       combine(tmp2, tmp1, 0);
2441       combine(tmp1, cross, 1);
2442
2443       if (multi) {
2444
2445         // It is OK to have multiple entries with the same base word
2446         // so use them all.
2447
2448         ps = cross.head;
2449         assert(ps);
2450         memcpy(p->aff, ps->pre.data(), ps->pre.size());
2451         memcpy(p->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2452
2453         ps = ps->next;
2454         CML_Entry * bs = find_set(&*p);
2455         for (; ps; ps = ps->next) {
2456
2457           CML_Entry * tmp = new CML_Entry;
2458           tmp->word = p->word;
2459           tmp->aff = (char *)malloc(ps->pre.size() + ps->suf.size() + 1);
2460           memcpy(tmp->aff, ps->pre.data(), ps->pre.size());
2461           memcpy(tmp->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2462
2463           tmp->parent = bs;
2464
2465           tmp->next = extras;
2466           extras = tmp;
2467         }
2468
2469       } else {
2470
2471         // chose the one which has the largest number of expansions
2472
2473         int max_exp = 0;
2474         PreSuf * best = 0;
2475         String flags;
2476
2477         for (ps = cross.head; ps; ps = ps->next) {
2478           flags  = ps->pre;
2479           flags += ps->suf;
2480           exp_buf.reset();
2481           exp_list = lang->expand(p->word, flags, exp_buf);
2482           int c = 0;
2483           for (WordAff * q = exp_list; q; q = q->next) ++c;
2484           if (c > max_exp) {max_exp = c; best = ps;}
2485         }
2486
2487         memcpy(p->aff, best->pre.data(), best->pre.size());
2488         memcpy(p->aff + best->pre.size(), best->suf.str(), best->suf.size() + 1);
2489       }
2490     }
2491   }
2492
2493   while (extras) {
2494     CML_Entry * tmp = extras;
2495     extras = extras->next;
2496     tmp->next = 0;
2497     table.insert(*tmp);
2498     delete tmp;
2499   }
2500
2501   // Create a linked list for each disjoint set
2502   p = table.begin();
2503   for (; p != end; ++p)
2504   {
2505     p->rank = -1;
2506     CML_Entry * bs = find_set(&*p);
2507     if (bs != &*p) {
2508       p->next = bs->next;
2509       bs->next = &*p;
2510     }
2511   }
2512
2513   // Now process each disjoint set independently
2514   p = table.begin();
2515   for (; p != end; ++p)
2516   {
2517     if (p->parent) continue;
2518
2519     Vector<CML_Table::iterator> entries;
2520     Vector<Expansion> expansions;
2521     Vector<Expansion *> to_keep;
2522     std::vector<bool> to_keep_exp;
2523     Vector<Expansion *> working;
2524     Vector<unsigned> to_remove;
2525
2526     // First assign numbers to each unique word.  The rank field is
2527     // no longer used so use it to store the number.
2528     for (CML_Entry * q = &*p; q; q = q->next) {
2529       CML_Table::iterator e = table.find(q->word);
2530       if (e->rank == -1) {
2531         e->rank = entries.size();
2532         q->rank = entries.size();
2533         entries.push_back(e);
2534       } else {
2535         q->rank = e->rank;
2536       }
2537       if (q->aff) {
2538         Expansion tmp;
2539         tmp.word = q->word;
2540         tmp.aff  = q->aff;
2541         expansions.push_back(tmp);
2542       }
2543     }
2544
2545     to_keep_exp.resize(entries.size());
2546     //for (int i = 0; i != to_keep_exp.size(); ++i) {
2547     //  printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2548     //}
2549
2550     // Store the expansion of each base word in a bit vector and
2551     // add it to the working set
2552     for (Vector<Expansion>::iterator q = expansions.begin();
2553          q != expansions.end();
2554          ++q)
2555     {
2556       q->exp.resize(entries.size());
2557       exp_buf.reset();
2558       exp_list = lang->expand(q->word, q->aff, exp_buf);
2559       for (WordAff * i = exp_list; i; i = i->next) {
2560         CML_Table::iterator e = table.find(i->word);
2561         assert(0 <= e->rank && e->rank < (int)entries.size());
2562         q->exp[e->rank] = true;
2563       }
2564       q->orig_exp = q->exp;
2565       working.push_back(&*q);
2566     }
2567
2568     unsigned prev_working_size = INT_MAX;
2569
2570     // This loop will repeat until the working set is empty.  This
2571     // will produce optimum results in most cases.  Non optimum
2572     // results may be possible if step (4) is necessary, but in
2573     // practice this step is rarly necessary.
2574     do {
2575       prev_working_size = working.size();
2576
2577       // Sort the list based on WorkingLt.  This is necessary every
2578       // time since the expansion list can change.
2579       std::sort(working.begin(), working.end(), WorkingLt());
2580
2581       // (1) Eliminate any elements which are a subset of others
2582       for (unsigned i = 0; i != working.size(); ++i) {
2583         if (!working[i]) continue;
2584         for (unsigned j = i + 1; j != working.size(); ++j) {
2585           if (!working[j]) continue;
2586           if (subset(working[j]->exp, working[i]->exp)) {
2587             working[j] = 0;
2588           }
2589         }
2590       }
2591
2592       // (2) Move any elements which expand to unique entree
2593       // into the to_keep list
2594       to_remove.clear();
2595       for (unsigned i = 0; i != entries.size(); ++i) {
2596         int n = -1;
2597         for (unsigned j = 0; j != working.size(); ++j) {
2598           if (working[j] && working[j]->exp[i]) {
2599             if (n == -1) n = j;
2600             else         n = -2;
2601           }
2602         }
2603         if (n >= 0) to_remove.push_back(n);
2604       }
2605       for (unsigned i = 0; i != to_remove.size(); ++i) {
2606         unsigned n = to_remove[i];
2607         if (!working[n]) continue;
2608         to_keep.push_back(working[n]);
2609         merge(to_keep_exp, working[n]->exp);
2610         working[n] = 0;
2611       }
2612
2613       // (3) Eliminate any elements which are a subset of all the
2614       // elements in the to_keep list
2615       for (unsigned i = 0; i != working.size(); ++i) {
2616         if (working[i] && subset(working[i]->exp, to_keep_exp)) {
2617           working[i] = 0;
2618         }
2619       }
2620
2621       // Compact the working list
2622       {
2623         int i = 0, j = 0;
2624         while (j != (int)working.size()) {
2625           if (working[j]) {
2626             working[i] = working[j];
2627             ++i;
2628           }
2629           ++j;
2630         }
2631         working.resize(i);
2632       }
2633
2634       // (4) If none of the entries in working have been removed via
2635       // the above methods then make a greedy choice and move the
2636       // first element into the to_keep list.
2637       if (working.size() > 0 && working.size() == prev_working_size)
2638       {
2639         to_keep.push_back(working[0]);
2640         //CERR.printf("Making greedy choice! Chosing %s/%s.\n",
2641         //            working[0]->word, working[0]->aff);
2642         merge(to_keep_exp, working[0]->exp);
2643         working.erase(working.begin(), working.begin() + 1);
2644       }
2645
2646       // (5) Trim the expansion list for any elements left in the
2647       // working set by removing the expansions that already exist in
2648       // the to_keep list
2649       for (unsigned i = 0; i != working.size(); ++i) {
2650         purge(working[i]->exp, to_keep_exp);
2651       }
2652
2653     } while (working.size() > 0);
2654
2655     if (simplify) {
2656
2657       // Remove unnecessary flags.  A flag is unnecessary if it does
2658       // does not expand to any new words, that is words that are not
2659       // already covered by an earlier entries in the list.
2660
2661       for (unsigned i = 0; i != to_keep.size(); ++i) {
2662         to_keep[i]->exp = to_keep[i]->orig_exp;
2663       }
2664
2665       std::sort(to_keep.begin(), to_keep.end(), WorkingLt());
2666
2667       std::vector<bool> tally(entries.size());
2668       std::vector<bool> backup(entries.size());
2669       std::vector<bool> working(entries.size());
2670       String flags;
2671
2672       for (unsigned i = 0; i != to_keep.size(); ++i) {
2673
2674         backup = tally;
2675
2676         merge(tally, to_keep[i]->exp);
2677
2678         String flags_to_keep = to_keep[i]->aff;
2679         bool something_changed;
2680         do {
2681           something_changed = false;
2682           for (unsigned j = 0; j != flags_to_keep.size(); ++j) {
2683             flags.assign(flags_to_keep.data(), j);
2684             flags.append(flags_to_keep.data(j+1),
2685                          flags_to_keep.size() - (j+1));
2686             working = backup;
2687             exp_buf.reset();
2688             exp_list = lang->expand(to_keep[i]->word, flags, exp_buf);
2689             for (WordAff * q = exp_list; q; q = q->next) {
2690               CML_Table::iterator e = table.find(q->word);
2691               working[e->rank] = true;
2692             }
2693             if (working == tally) {
2694               flags_to_keep = flags;
2695               something_changed = true;
2696               break;
2697             }
2698           }
2699         } while (something_changed);
2700
2701         if (flags_to_keep != to_keep[i]->aff) {
2702           memcpy(to_keep[i]->aff, flags_to_keep.str(), flags_to_keep.size() + 1);
2703         }
2704       }
2705
2706     }
2707
2708     // Finally print the resulting list
2709
2710     //printf("XXX %d %d\n", to_keep.size(), to_keep_exp.size());
2711     //for (int i = 0; i != to_keep_exp.size(); ++i) {
2712     //  printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2713     //}
2714
2715     for (unsigned i = 0; i != to_keep.size(); ++i) {
2716       COUT << oconv(to_keep[i]->word);
2717       if (to_keep[i]->aff[0]) {
2718         COUT << '/';
2719         COUT << oconv(to_keep[i]->aff);
2720       }
2721       COUT << '\n';
2722     }
2723     for (unsigned i = 0; i != to_keep_exp.size(); ++i) {
2724       if (!to_keep_exp[i]) {
2725         assert(!entries[i]->aff);
2726         COUT.printf("%s\n", oconv(entries[i]->word));
2727       }
2728     }
2729   }
2730
2731   p = table.begin();
2732   for (; p != end; ++p)
2733   {
2734     if (p->aff) free(p->aff);
2735     p->aff = 0;
2736   }
2737 }
2738
2739
2740 //////////////////////////
2741 //
2742 // dump affix
2743 //
2744
2745 void dump_affix()
2746 {
2747   FStream in;
2748   EXIT_ON_ERR(aspeller::open_affix_file(*options, in));
2749
2750   String line;
2751   while (in.getline(line))
2752     COUT << line << '\n';
2753 }
2754
2755
2756
2757 ///////////////////////////////////////////////////////////////////////
2758
2759
2760 ///////////////////////////
2761 //
2762 // print_help
2763 //
2764
2765 void print_help_line(char abrv, char dont_abrv, const char * name,
2766                      KeyInfoType type, const char * desc, bool no_dont = false)
2767 {
2768   String command;
2769   if (abrv != '\0') {
2770     command += '-';
2771     command += abrv;
2772     if (dont_abrv != '\0') {
2773       command += '|';
2774       command += '-';
2775       command += dont_abrv;
2776     }
2777     command += ',';
2778   }
2779   command += "--";
2780   if (type == KeyInfoBool && !no_dont) command += "[dont-]";
2781   if (type == KeyInfoList) command += "add|rem-";
2782   command += name;
2783   if (type == KeyInfoString || type == KeyInfoList)
2784     command += "=<str>";
2785   if (type == KeyInfoInt)
2786     command += "=<int>";
2787   const char * tdesc = _(desc);
2788   printf("  %-27s %s\n", command.c_str(), tdesc); // FIXME: consider word wrapping
2789 }
2790
2791 namespace acommon {
2792   PosibErr<ConfigModule *> get_dynamic_filter(Config * config, ParmStr value);
2793 }
2794
2795 static const char * usage_text[] =
2796 {
2797   /* TRANSLATORS: These should all be formated to fit in 80 column or
2798      less */
2799   N_("Usage: aspell [options] <command>"),
2800   N_("<command> is one of:"),
2801   N_("  -?|usage         display a brief usage message"),
2802   N_("  help             display a detailed help message"),
2803   N_("  -c|check <file>  to check a file"),
2804   N_("  -a|pipe          \"ispell -a\" compatibility mode"),
2805   N_("  [dump] config    dumps the current configuration to stdout"),
2806   N_("  config <key>     prints the current value of an option"),
2807   N_("  [dump] dicts | filters | modes"),
2808   N_("    lists available dictionaries / filters / filter modes"),
2809   N_("[options] is any of the following:")
2810 };
2811 static const unsigned usage_text_size = sizeof(usage_text)/sizeof(const char *);
2812
2813 static const char * help_text[] =
2814 {
2815   usage_text[0],
2816   "",
2817   usage_text[1],
2818   usage_text[2],
2819   usage_text[3],
2820   usage_text[4],
2821   usage_text[5],
2822   N_("  list             produce a list of misspelled words from standard input"),
2823   usage_text[6],
2824   usage_text[7],
2825   N_("  soundslike       returns the sounds like equivalent for each word entered"),
2826   N_("  munch            generate possible root words and affixes"),
2827   N_("  expand [1-4]     expands affix flags"),
2828   N_("  clean [strict]   cleans a word list so that every line is a valid word"),
2829   //N_("  filter           passes standard input through filters"),
2830   N_("  -v|version       prints a version line"),
2831   N_("  munch-list [simple] [single|multi] [keep]"),
2832   N_("    reduce the size of a word list via affix compression"),
2833   N_("  conv <from> <to> [<norm-form>]"),
2834   N_("    converts from one encoding to another"),
2835   N_("  norm (<norm-map> | <from> <norm-map> <to>) [<norm-form>]"),
2836   N_("    perform Unicode normalization"),
2837   usage_text[8],
2838   usage_text[9],
2839   N_("  dump|create|merge master|personal|repl [<name>]"),
2840   N_("    dumps, creates or merges a master, personal, or replacement dictionary."),
2841   "",
2842   /* TRANSLATORS: "none", "internal" and "strict" are literal values
2843      and should not be translated. */
2844   N_("  <norm-form>      normalization form to use, either none, internal, or strict"),
2845   "",
2846   usage_text[10],
2847   ""
2848 };
2849 static const unsigned help_text_size = sizeof(help_text)/sizeof(const char *);
2850
2851 void print_help (bool verbose) {
2852   load_all_filters(options);
2853   if (verbose) {
2854     printf(_("\n"
2855              "Aspell %s.  Copyright 2000-2011 by Kevin Atkinson.\n"
2856              "\n"), VERSION);
2857     for (unsigned i = 0; i < help_text_size; ++i)
2858       puts(gt_(help_text[i]));
2859   } else {
2860     for (unsigned i = 0; i < usage_text_size; ++i)
2861       puts(gt_(usage_text[i]));
2862   }
2863   StackPtr<KeyInfoEnumeration> els(options->possible_elements(true,false));
2864   const KeyInfo * k;
2865   while (k = els->next(), k) {
2866     if (k->desc == 0 || k->flags & KEYINFO_HIDDEN) continue;
2867     if (!verbose && !(k->flags & KEYINFO_COMMON)) continue;
2868     const PossibleOption * o = find_option(k->name);
2869     const char * name = k->name;
2870     print_help_line(o->abrv,
2871                     strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2872                     name, k->type, k->desc);
2873     if (verbose && strcmp(name, "mode") == 0) {
2874       for (const ModeAbrv * j = mode_abrvs;
2875            j != mode_abrvs_end;
2876            ++j)
2877       {
2878         print_help_line(j->abrv, '\0', j->mode, KeyInfoBool, j->desc, true);
2879       }
2880     }
2881   }
2882
2883   if (verbose) {
2884     //
2885     putchar('\n');
2886     putchar('\n');
2887     puts(
2888       _("Available Dictionaries:\n"
2889         "    Dictionaries can be selected directly via the \"-d\" or \"master\"\n"
2890         "    option.  They can also be selected indirectly via the \"lang\",\n"
2891         "    \"variety\", and \"size\" options.\n"));
2892
2893     const DictInfoList * dlist = get_dict_info_list(options);
2894
2895     StackPtr<DictInfoEnumeration> dels(dlist->elements());
2896
2897     const DictInfo * entry;
2898
2899     while ( (entry = dels->next()) != 0)
2900     {
2901       printf("  %s\n", entry->name);
2902     }
2903
2904
2905     //
2906     putchar('\n');
2907     putchar('\n');
2908     fputs(
2909       _("Available Filters (and associated options):\n"
2910         "    Filters can be added or removed via the \"filter\" option.\n"),
2911       stdout);
2912     for (Vector<ConfigModule>::const_iterator m = options->filter_modules.begin();
2913          m != options->filter_modules.end();
2914          ++m)
2915     {
2916       printf(_("\n  %s filter: %s\n"), m->name, gt_(m->desc));
2917       for (k = m->begin; k != m->end; ++k) {
2918         const PossibleOption * o = find_option(k->name);
2919         const char * name = k->name;
2920         const KeyInfo * ok = options->keyinfo(name + 2);
2921         if (k == ok) name += 2;
2922         print_help_line(o->abrv,
2923                         strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2924                         name, k->type, k->desc);
2925       }
2926     }
2927
2928     //
2929     putchar('\n');
2930     putchar('\n');
2931     puts(
2932       /* TRANSLATORS: This should be formated to fit in 80 column or less */
2933       _("Available Filter Modes:\n"
2934         "    Filter Modes are reconfigured combinations of filters optimized for\n"
2935         "    files of a specific type. A mode is selected via the \"mode\" option.\n"
2936         "    This will happen implicitly if Aspell is able to identify the file\n"
2937         "    type from the extension, and possibility the contents, of the file.\n"));
2938
2939     EXIT_ON_ERR_SET(available_filter_modes(options), StringPairEnumeration *, els);
2940     StringPair sp;
2941     while (!els->at_end()) {
2942       sp = els->next();
2943       printf("  %-14s %s\n", sp.first, gt_(sp.second));
2944     }
2945     delete els;
2946   }
2947 }
2948