prog/aspell.cpp

   1 // This file is part of The New Aspell Copyright (C)
   2 // 2002,2003,2004,2011,2019 by Kevin Atkinson under the GNU LGPL license
   3 // version 2.0 or 2.1.  You should have received a copy of the LGPL
   4 // license along with this library if you did not you can find it at
   5 // http://www.gnu.org/.
   6
   7 //
   8 // NOTE: This program currently uses a very ugly mix of the internal
   9 //       API and the external C interface.  The eventual goal is to
  10 //       use only the external C++ interface, however, the external
  11 //       C++ interface is currently incomplete.  The C interface is
  12 //       used in some places because without the strings will not get
  13 //       converted properly when the encoding is not the same as the
  14 //       internal encoding used by Aspell.
  15 //
  16
  17 #include <ctype.h>
  18 #include "settings.h"
  19
  20 #ifdef USE_LOCALE
  21 # include <locale.h>
  22 #endif
  23
  24 #ifdef HAVE_LANGINFO_CODESET
  25 # include <langinfo.h>
  26 #endif
  27
  28 #include "aspell.h"
  29
  30 #include <sys/types.h>
  31 #include <sys/stat.h>
  32
  33 #ifdef USE_FILE_INO
  34 # include <unistd.h>
  35 # include <fcntl.h>
  36 #endif
  37
  38 #include "asc_ctype.hpp"
  39 #include "check_funs.hpp"
  40 #include "config.hpp"
  41 #include "convert.hpp"
  42 #include "document_checker.hpp"
  43 #include "enumeration.hpp"
  44 #include "errors.hpp"
  45 #include "file_util.hpp"
  46 #include "fstream.hpp"
  47 #include "info.hpp"
  48 #include "iostream.hpp"
  49 #include "posib_err.hpp"
  50 #include "speller.hpp"
  51 #include "stack_ptr.hpp"
  52 #include "string_enumeration.hpp"
  53 #include "string_map.hpp"
  54 #include "word_list.hpp"
  55
  56 #include "string_list.hpp"
  57 #include "speller_impl.hpp"
  58 #include "data.hpp"
  59
  60 #include "hash-t.hpp"
  61 #include "hash_fun.hpp"
  62
  63 #include "gettext.h"
  64
  65 using namespace acommon;
  66
  67 using aspeller::Conv;
  68
  69 // action functions declarations
  70
  71 void print_ver();
  72 void print_help(bool verbose = false);
  73 void config();
  74
  75 void check();
  76 void pipe();
  77 void convt();
  78 void normlz();
  79 void filter();
  80 void list();
  81 void dicts();
  82 void modes();
  83 void filters();
  84
  85 void clean();
  86 void master();
  87 void personal();
  88 void repl();
  89 void soundslike();
  90 void munch();
  91 void expand();
  92 void combine();
  93 void munch_list();
  94 void dump_affix();
  95
  96 void print_error(ParmString msg)
  97 {
  98   CERR.printf(_("Error: %s\n"), msg.str());
  99 }
 100
 101 void print_error(ParmString msg, ParmString str)
 102 {
 103   CERR.put(_("Error: "));
 104   CERR.printf(msg.str(), str.str());
 105   CERR.put('\n');
 106 }
 107
 108 #define EXIT_ON_ERR(command) \
 109   do{PosibErrBase pe(command);\
 110   if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
 111   } while(false)
 112 #define EXIT_ON_ERR_SET(command, type, var)\
 113   type var;\
 114   do{PosibErr< type > pe(command);\
 115   if(pe.has_err()){print_error(pe.get_err()->mesg); exit(1);}\
 116   else {var=pe.data;}\
 117   } while(false)
 118 #define BREAK_ON_ERR(command) \
 119   do{PosibErrBase pe(command);\
 120   if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
 121   } while(false)
 122 #define BREAK_ON_ERR_SET(command, type, var)\
 123   type var;\
 124   do{PosibErr< type > pe(command);\
 125   if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
 126   else {var=pe.data;}\
 127   } while(false)
 128
 129
 130 /////////////////////////////////////////////////////////
 131 //
 132 // Command line options functions and classes
 133 // (including main)
 134 //
 135
 136 typedef Vector<String> Args;
 137 typedef Config         Options;
 138 enum Action {do_create, do_merge, do_dump, do_test, do_other};
 139
 140 Args              args;
 141 StackPtr<Options> options;
 142 Action            action  = do_other;
 143
 144 struct PossibleOption {
 145   const char * name;
 146   char         abrv;
 147   int          num_arg;
 148   bool         is_command;
 149 };
 150
 151 #define OPTION(name,abrv,num)         {name,abrv,num,false}
 152 #define COMMAND(name,abrv,num)        {name,abrv,num,true}
 153 #define ISPELL_COMP(abrv,num)         {"",abrv,num,false}
 154
 155 const PossibleOption possible_options[] = {
 156   OPTION("master",           'd', 1),
 157   OPTION("personal",         'p', 1),
 158   OPTION("ignore",           'W', 1),
 159   OPTION("lang",             'l', 1),
 160   OPTION("backup",           'b', 0),
 161   OPTION("dont-backup",      'x', 0),
 162   OPTION("run-together",     'C', 0),
 163   OPTION("dont-run-together",'B', 0),
 164   OPTION("guess",            'm', 0),
 165   OPTION("dont-guess",       'P', 0),
 166
 167   COMMAND("usage",     '?',  0),
 168   COMMAND("help",      '\0', 0),
 169   COMMAND("version",   'v',  0),
 170   COMMAND("config",    '\0', 0),
 171   COMMAND("dicts",     '\0', 0),
 172   COMMAND("check",     'c',  0),
 173   COMMAND("pipe",      'a',  0),
 174   COMMAND("list",      '\0', 0),
 175   COMMAND("conv",      '\0', 2),
 176   COMMAND("norm",      '\0', 1),
 177   COMMAND("filter",    '\0', 0),
 178   COMMAND("soundslike",'\0', 0),
 179   COMMAND("munch",     '\0', 0),
 180   COMMAND("expand",    '\0', 0),
 181   COMMAND("combine",   '\0', 0),
 182   COMMAND("munch-list",'\0', 0),
 183   COMMAND("clean",     '\0', 0),
 184   COMMAND("filters",   '\0', 0),
 185   COMMAND("modes",     '\0', 0),
 186
 187   COMMAND("dump",   '\0', 1),
 188   COMMAND("create", '\0', 1),
 189   COMMAND("merge",  '\0', 1),
 190
 191   ISPELL_COMP('S',0), ISPELL_COMP('w',1), ISPELL_COMP('T',1),
 192
 193   {"",'\0'}, {"",'\0'}
 194 };
 195
 196 const PossibleOption * possible_options_end = possible_options + sizeof(possible_options)/sizeof(PossibleOption) - 2;
 197
 198 struct ModeAbrv {
 199   char abrv;
 200   const char * mode;
 201   const char * desc;
 202 };
 203 static const ModeAbrv mode_abrvs[] = {
 204   {'e', "mode=email", N_("enter Email mode.")},
 205   {'H', "mode=html",  N_("enter HTML mode.")},
 206   {'t', "mode=tex",   N_("enter TeX mode.")},
 207   {'n', "mode=nroff", N_("enter Nroff mode.")}
 208 };
 209
 210 static const ModeAbrv *  mode_abrvs_end = mode_abrvs + 4;
 211
 212 const PossibleOption * find_option(char c) {
 213   const PossibleOption * i = possible_options;
 214   while (i != possible_options_end && i->abrv != c)
 215     ++i;
 216   return i;
 217 }
 218
 219 static inline bool str_equal(const char * begin, const char * end,
 220                              const char * other)
 221 {
 222   while(begin != end && *begin == *other)
 223     ++begin, ++other;
 224   return (begin == end && *other == '\0');
 225 }
 226
 227 static const PossibleOption * find_option(const char * begin, const char * end) {
 228   const PossibleOption * i = possible_options;
 229   while (i != possible_options_end
 230          && !str_equal(begin, end, i->name))
 231     ++i;
 232   return i;
 233 }
 234
 235 static const PossibleOption * find_option(const char * str) {
 236   const PossibleOption * i = possible_options;
 237   while (i != possible_options_end
 238          && strcmp(str, i->name) != 0)
 239     ++i;
 240   return i;
 241 }
 242
 243 static void line_buffer() {
 244 #ifndef WIN32
 245   // set up stdin and stdout to be line buffered
 246   assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
 247   assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
 248 #endif
 249 }
 250
 251 Conv dconv;
 252 Conv uiconv;
 253
 254 int main (int argc, const char *argv[])
 255 {
 256   options = new_config(); // this needs to be here because of a bug
 257                           // with static initlizers on Darwin.
 258 #ifdef USE_LOCALE
 259   setlocale (LC_ALL, "");
 260 #endif
 261   aspell_gettext_init();
 262
 263   options->set_committed_state(false);
 264
 265   if (argc == 1) {print_help(); return 0;}
 266
 267   int i = 1;
 268   const PossibleOption * o;
 269   const char           * parm;
 270
 271   //
 272   // process command line options by setting the appropriate options
 273   // in "options" and/or pushing non-options onto "argv"
 274   //
 275   PossibleOption other_opt = OPTION("",'\0',0);
 276   String option_name;
 277   while (i != argc) {
 278     if (argv[i][0] == '-') {
 279       bool have_parm = false;
 280       if (argv[i][1] == '-') {
 281         // a long arg
 282         const char * c = argv[i] + 2;
 283         while(*c != '=' && *c != '\0') ++c;
 284         o = find_option(argv[i] + 2, c);
 285         if (o == possible_options_end) {
 286           option_name.assign(argv[i] + 2, c - argv[i] - 2);
 287           other_opt.name    = option_name.c_str();
 288           other_opt.num_arg = -1;
 289           o = &other_opt;
 290         }
 291         if (*c == '=') {have_parm = true; ++c;}
 292         parm = c;
 293       } else {
 294         // a short arg
 295         const ModeAbrv * j = mode_abrvs;
 296         while (j != mode_abrvs_end && j->abrv != argv[i][1]) ++j;
 297         if (j == mode_abrvs_end) {
 298           o = find_option(argv[i][1]);
 299           if (argv[i][1] == 'v' && argv[i][2] == 'v')
 300             // Hack for -vv
 301             parm = argv[i] + 3;
 302           else
 303             parm = argv[i] + 2;
 304         } else { // mode option
 305           other_opt.name = "mode";
 306           other_opt.num_arg = 1;
 307           o = &other_opt;
 308           parm = j->mode + 5;
 309         }
 310         if (*parm) have_parm = true;
 311       }
 312       if (o == possible_options_end) {
 313         print_error(_("Invalid Option: %s"), argv[i]);
 314         return 1;
 315       }
 316       int num_parms;
 317       if (o->num_arg == 0) {
 318         num_parms = 0;
 319         if (parm[0] != '\0') {
 320           print_error(_(" does not take any parameters."),
 321                       String(argv[i], parm - argv[i]));
 322           return 1;
 323         }
 324         i += 1;
 325       } else if (have_parm) {
 326         num_parms = 1;
 327         i += 1;
 328       } else if (i + 1 == argc || argv[i+1][0] == '-') {
 329         if (o->num_arg == -1) {
 330           num_parms = 0;
 331           i += 1;
 332         } else {
 333           print_error(_("You must specify a parameter for \"%s\"."), argv[i]);
 334           return 1;
 335         }
 336       } else {
 337         num_parms = o->num_arg;
 338         parm = argv[i + 1];
 339         i += 2;
 340       }
 341       if (o->is_command) {
 342         args.push_back(o->name);
 343         if (o->num_arg == 1)
 344           args.push_back(parm);
 345       } else if (o->name[0] != '\0') {
 346         Config::Entry * entry = new Config::Entry;
 347         entry->key = o->name;
 348         entry->value = parm;
 349         entry->need_conv = true;
 350         if (num_parms == -1) {
 351           entry->place_holder = args.size();
 352           args.push_back(parm);
 353         }
 354         options->set(entry);
 355       }
 356     } else {
 357       args.push_back(argv[i]);
 358       i += 1;
 359     }
 360   }
 361
 362   options->read_in_settings();
 363
 364   const char * codeset = 0;
 365 #ifdef HAVE_LANGINFO_CODESET
 366   codeset = nl_langinfo(CODESET);
 367   if (ascii_encoding(*options, codeset)) codeset = 0;
 368 #endif
 369
 370 // #ifdef USE_LOCALE
 371 //   if (!options->have("encoding") && codeset)
 372 //     EXIT_ON_ERR(options->replace("encoding", codeset));
 373 // #endif
 374
 375   Vector<int> to_remove;
 376   EXIT_ON_ERR(options->commit_all(&to_remove, codeset));
 377   for (int i = to_remove.size() - 1; i >= 0; --i) {
 378     args.erase(args.begin() + to_remove[i]);
 379   }
 380
 381   if (args.empty()) {
 382     print_error(_("You must specify an action"));
 383     return 1;
 384   }
 385
 386   String action_str = args.front();
 387   args.pop_front();
 388   const PossibleOption * action_opt = find_option(action_str.str());
 389   if (!action_opt->is_command) {
 390     print_error(_("Unknown Action: %s"),  action_str);
 391     return 1;
 392   } else if (action_opt->num_arg == 1 && args.empty()) {
 393     print_error(_("You must specify a parameter for \"%s\"."), action_str);
 394     return 1;
 395   } else if (action_opt->num_arg > (int)args.size()) {
 396     CERR.printf(_("Error: You must specify at least %d parameters for \"%s\".\n"),
 397                 action_opt->num_arg, action_str.str());
 398     return 1;
 399   }
 400
 401   //
 402   // perform the requested action
 403   //
 404   if (action_str == "usage")
 405     print_help();
 406   else if (action_str == "help")
 407     print_help(true);
 408   else if (action_str == "version")
 409     print_ver();
 410   else if (action_str == "config")
 411     config();
 412   else if (action_str == "dicts")
 413     dicts();
 414   else if (action_str == "check")
 415     check();
 416   else if (action_str == "pipe")
 417     pipe();
 418   else if (action_str == "list")
 419     list();
 420   else if (action_str == "conv")
 421     convt();
 422   else if (action_str == "norm")
 423     normlz();
 424   else if (action_str == "filter")
 425     filter();
 426   else if (action_str == "soundslike")
 427     soundslike();
 428   else if (action_str == "munch")
 429     munch();
 430   else if (action_str == "expand")
 431     expand();
 432   else if (action_str == "combine")
 433     combine();
 434   else if (action_str == "munch-list")
 435     munch_list();
 436   else if (action_str == "clean")
 437     clean();
 438   else if (action_str == "filters")
 439     filters();
 440   else if (action_str == "modes")
 441     modes();
 442   else if (action_str == "dump")
 443     action = do_dump;
 444   else if (action_str == "create")
 445     action = do_create;
 446   else if (action_str == "merge")
 447     action = do_merge;
 448   else
 449     abort(); // this should not happen
 450
 451   if (action != do_other) {
 452     if (args.empty()) {
 453       print_error(_("Unknown Action: %s"),  action_str);
 454       return 1;
 455     }
 456     String what_str = args.front();
 457     args.pop_front();
 458     if (what_str == "config")
 459       config();
 460     else if (what_str == "dicts")
 461       dicts();
 462     else if (what_str == "filters")
 463       filters();
 464     else if (what_str == "modes")
 465       modes();
 466     else if (what_str == "master")
 467       master();
 468     else if (what_str == "personal")
 469       personal();
 470     else if (what_str == "repl")
 471       repl();
 472     else if (what_str == "affix")
 473       dump_affix();
 474     else {
 475       print_error(_("Unknown Action: %s"),
 476                   String(action_str + " " + what_str));
 477       return 1;
 478     }
 479   }
 480
 481   return 0;
 482
 483 }
 484
 485
 486 /////////////////////////////////////////////////////////
 487 //
 488 // Action Functions
 489 //
 490 //
 491
 492
 493 static Convert * setup_conv(const aspeller::Language * lang,
 494                                       Config * config)
 495 {
 496   if (config->retrieve("encoding") != "none") {
 497     PosibErr<Convert *> pe = new_convert_if_needed(*config,
 498                                                    lang->charmap(),
 499                                                    config->retrieve("encoding"),
 500                                                    NormTo);
 501     if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
 502     return pe.data;
 503   } else {
 504     return 0;
 505   }
 506 }
 507
 508 static Convert * setup_conv(Config * config,
 509                             const aspeller::Language * lang)
 510 {
 511   if (config->retrieve("encoding") != "none") {
 512     PosibErr<Convert *> pe = new_convert_if_needed(*config,
 513                                                    config->retrieve("encoding"),
 514                                                    lang->charmap(),
 515                                                    NormFrom);
 516     if (pe.has_err()) {print_error(pe.get_err()->mesg); exit(1);}
 517     return pe.data;
 518   } else {
 519     return 0;
 520   }
 521 }
 522
 523 void setup_display_conv()
 524 {
 525   const char * gettext_enc = 0;
 526   const char * env_enc = 0;
 527   String doc_enc = options->retrieve("encoding");
 528   String enc;
 529 #ifdef ENABLE_NLS
 530   gettext_enc = bind_textdomain_codeset("aspell", 0);
 531   if (ascii_encoding(*options,gettext_enc)) gettext_enc = 0;
 532 #endif
 533 #ifdef HAVE_LANGINFO_CODESET
 534   env_enc = nl_langinfo(CODESET);
 535   if (ascii_encoding(*options, env_enc)) env_enc = 0;
 536 #endif
 537   if (gettext_enc && env_enc && strcmp(gettext_enc,env_enc) != 0)
 538   {
 539     fputs(("Error: bind_textdomain_codeset != nl_langinfo(CODESET)\n"), stderr);
 540     exit(-1);
 541   }
 542   if (gettext_enc)
 543     enc = gettext_enc;
 544   else if (env_enc)
 545     enc = env_enc;
 546   else
 547     enc = doc_enc;
 548
 549   EXIT_ON_ERR(dconv.setup(*options, doc_enc, enc, NormNone));
 550   EXIT_ON_ERR(uiconv.setup(*options, enc, doc_enc, NormNone));
 551 }
 552
 553
 554 ///////////////////////////
 555 //
 556 // config
 557 //
 558
 559 void config ()
 560 {
 561   if (args.size() == 0) {
 562     load_all_filters(options);
 563     options->write_to_stream(COUT);
 564   } else {
 565     EXIT_ON_ERR_SET(options->retrieve_any(args[0]), String, value);
 566     COUT << value << "\n";
 567   }
 568 }
 569
 570 ///////////////////////////
 571 //
 572 // dicts
 573 //
 574
 575 void dicts()
 576 {
 577   const DictInfoList * dlist = get_dict_info_list(options);
 578
 579   StackPtr<DictInfoEnumeration> dels(dlist->elements());
 580
 581   const DictInfo * entry;
 582
 583   while ( (entry = dels->next()) != 0)
 584     puts(entry->name);
 585 }
 586
 587 ///////////////////////////
 588 //
 589 // list available (filters/filter modes)
 590 //
 591
 592 void list_available(PosibErr<StringPairEnumeration *> (*fun)(Config *))
 593 {
 594   EXIT_ON_ERR_SET(fun(options), StringPairEnumeration *, els);
 595   StringPair sp;
 596   while (!els->at_end()) {
 597     sp = els->next();
 598     printf("%-14s %s\n", sp.first, gt_(sp.second));
 599   }
 600   delete els;
 601 }
 602
 603 void filters()
 604 {
 605   load_all_filters(options);
 606   list_available(available_filters);
 607 }
 608
 609 void modes()
 610 {
 611   list_available(available_filter_modes);
 612 }
 613
 614 ///////////////////////////
 615 //
 616 // pipe
 617 //
 618
 619 // precond: strlen(str) > 0
 620 char * trim_wspace (char * str)
 621 {
 622   int last = strlen(str) - 1;
 623   while (asc_isspace(str[0])) {
 624     ++str;
 625     --last;
 626   }
 627   while (last > 0 && asc_isspace(str[last])) {
 628     --last;
 629   }
 630   str[last + 1] = '\0';
 631   return str;
 632 }
 633
 634 bool get_word_pair(char * line, char * & w1, char * & w2)
 635 {
 636   w2 = strchr(line, ',');
 637   if (!w2) {
 638     print_error(_("Invalid Input"));
 639     return false;
 640   }
 641   *w2 = '\0';
 642   ++w2;
 643   w1 = trim_wspace(line);
 644   w2 = trim_wspace(w2);
 645   return true;
 646 }
 647
 648 void print_elements(const AspellWordList * wl) {
 649   AspellStringEnumeration * els = aspell_word_list_elements(wl);
 650   int count = 0;
 651   const char * w;
 652   String line;
 653   while ( (w = aspell_string_enumeration_next(els)) != 0 ) {
 654     ++count;
 655     line += w;
 656     line += ", ";
 657   }
 658   line.resize(line.size() - 2);
 659   COUT.printf("%u: %s\n", count, line.c_str());
 660 }
 661
 662 struct StatusFunInf
 663 {
 664   aspeller::SpellerImpl * real_speller;
 665   Conv oconv;
 666   bool verbose;
 667   StatusFunInf(Convert * c) : oconv(c) {}
 668 };
 669
 670 void status_fun(void * d, Token, int correct)
 671 {
 672   StatusFunInf * p = static_cast<StatusFunInf *>(d);
 673   if (p->verbose && correct) {
 674     const CheckInfo * ci = p->real_speller->check_info();
 675     if (ci->compound)
 676       COUT.put("-\n");
 677     else if (ci->pre_flag || ci->suf_flag)
 678       COUT.printf("+ %s\n", p->oconv(ci->word.str()));
 679     else
 680       COUT.put("*\n");
 681   }
 682 }
 683
 684 DocumentChecker * new_checker(AspellSpeller * speller,
 685                               StatusFunInf & status_fun_inf)
 686 {
 687   EXIT_ON_ERR_SET(new_document_checker(reinterpret_cast<Speller *>(speller)),
 688                   StackPtr<DocumentChecker>, checker);
 689   checker->set_status_fun(status_fun, &status_fun_inf);
 690   return checker.release();
 691 }
 692
 693 #define BREAK_ON_SPELLER_ERR\
 694   do {if (aspell_speller_error(speller)) {\
 695     print_error(aspell_speller_error_message(speller)); break;\
 696   } } while (false)
 697
 698 void pipe()
 699 {
 700   line_buffer();
 701
 702   bool terse_mode = true;
 703   bool do_time = options->retrieve_bool("time");
 704   bool suggest = options->retrieve_bool("suggest");
 705   bool include_guesses = options->retrieve_bool("guess");
 706   clock_t start,finish;
 707
 708   if (!options->have("mode") && !options->have("filter")) {
 709     PosibErrBase err(options->replace("mode", "nroff"));
 710     if (err.has_err())
 711       CERR.printf(_("WARNING: Unable to enter Nroff mode: %s\n"),
 712                   err.get_err()->mesg);
 713   }
 714
 715   start = clock();
 716
 717   AspellCanHaveError * ret
 718     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
 719   if (aspell_error(ret)) {
 720     print_error(aspell_error_message(ret));
 721     exit(1);
 722   }
 723   AspellSpeller * speller = to_aspell_speller(ret);
 724   aspeller::SpellerImpl * real_speller = reinterpret_cast<aspeller::SpellerImpl *>(speller);
 725   Config * config = real_speller->config();
 726   Conv iconv(setup_conv(config, &real_speller->lang()));
 727   Conv oconv(setup_conv(&real_speller->lang(), config));
 728   MBLen mb_len;
 729   if (!config->retrieve_bool("byte-offsets"))
 730     mb_len.setup(*config, config->retrieve("encoding"));
 731   if (do_time)
 732     COUT << _("Time to load word list: ")
 733          << (clock() - start)/(double)CLOCKS_PER_SEC << "\n";
 734   StatusFunInf status_fun_inf(setup_conv(&real_speller->lang(), config));
 735   status_fun_inf.real_speller = real_speller;
 736   bool & print_star = status_fun_inf.verbose;
 737   print_star = true;
 738   StackPtr<DocumentChecker> checker(new_checker(speller, status_fun_inf));
 739   int c;
 740   const char * w;
 741   CharVector buf;
 742   char * line;
 743   char * line0;
 744   char * word;
 745   char * word2;
 746   int    ignore;
 747   PosibErrBase err;
 748
 749   print_ver();
 750
 751   for (;;) {
 752     buf.clear();
 753     fflush(stdout);
 754     while (c = getchar(), c != '\n' && c != EOF)
 755       buf.push_back(static_cast<char>(c));
 756     buf.push_back('\n'); // always add new line so strlen > 0
 757     buf.push_back('\0');
 758     line = buf.data();
 759     ignore = 0;
 760     switch (line[0]) {
 761     case '\n':
 762       if (c != EOF) continue;
 763       else          break;
 764     case '*':
 765       word = trim_wspace(line + 1);
 766       aspell_speller_add_to_personal(speller, word, -1);
 767       BREAK_ON_SPELLER_ERR;
 768       break;
 769     case '&':
 770       word = trim_wspace(line + 1);
 771       aspell_speller_add_to_personal
 772         (speller,
 773          real_speller->to_lower(word), -1);
 774       BREAK_ON_SPELLER_ERR;
 775       break;
 776     case '@':
 777       word = trim_wspace(line + 1);
 778       aspell_speller_add_to_session(speller, word, -1);
 779       BREAK_ON_SPELLER_ERR;
 780       break;
 781     case '#':
 782       aspell_speller_save_all_word_lists(speller);
 783       BREAK_ON_SPELLER_ERR;
 784       break;
 785     case '+':
 786       word = trim_wspace(line + 1);
 787       err = config->replace("mode", word);
 788       if (err.get_err())
 789         config->replace("mode", "tex");
 790       reload_filters(real_speller);
 791       checker.del();
 792       checker = new_checker(speller, status_fun_inf);
 793       break;
 794     case '-':
 795       config->remove("filter");
 796       reload_filters(real_speller);
 797       checker.del();
 798       checker = new_checker(speller, status_fun_inf);
 799       break;
 800     case '~':
 801       break;
 802     case '!':
 803       terse_mode = true;
 804       print_star = false;
 805       break;
 806     case '%':
 807       terse_mode = false;
 808       print_star = true;
 809       break;
 810     case '$':
 811       if (line[1] == '$') {
 812         switch(line[2]) {
 813         case 'r':
 814           switch(line[3]) {
 815           case 'a':
 816             if (get_word_pair(line + 4, word, word2))
 817               aspell_speller_store_replacement(speller, word, -1, word2, -1);
 818             break;
 819           }
 820           break;
 821         case 'c':
 822           switch (line[3]) {
 823           case 's':
 824             if (get_word_pair(line + 4, word, word2))
 825               BREAK_ON_ERR(err = config->replace(word, word2));
 826             if (strcmp(word,"suggest") == 0)
 827               suggest = config->retrieve_bool("suggest");
 828             else if (strcmp(word,"time") == 0)
 829               do_time = config->retrieve_bool("time");
 830             else if (strcmp(word,"guess") == 0)
 831               include_guesses = config->retrieve_bool("guess");
 832             break;
 833           case 'r':
 834             word = trim_wspace(line + 4);
 835             BREAK_ON_ERR_SET(config->retrieve(word), String, ret);
 836             COUT.printl(ret);
 837             break;
 838           }
 839           break;
 840         case 'p':
 841           switch (line[3]) {
 842           case 'p':
 843             print_elements(aspell_speller_personal_word_list(speller));
 844             break;
 845           case 's':
 846             print_elements(aspell_speller_session_word_list(speller));
 847             break;
 848           }
 849           break;
 850         case 'l':
 851           COUT.printl(config->retrieve("lang"));
 852           break;
 853         }
 854         break;
 855       } else {
 856         // continue on (no break)
 857       }
 858     case '^':
 859       ignore = 1;
 860     default:
 861       line0 = line;
 862       line += ignore;
 863       checker->process(line, strlen(line));
 864       while (Token token = checker->next_misspelling()) {
 865         word = line + token.offset;
 866         word[token.len] = '\0';
 867         const char * cword = iconv(word);
 868         String guesses, guess;
 869         const CheckInfo * ci = real_speller->check_info();
 870         aspeller::CasePattern casep
 871           = real_speller->lang().case_pattern(cword);
 872         while (ci) {
 873           guess.clear();
 874           if (ci->pre_add && ci->pre_add[0])
 875             guess.append(ci->pre_add, ci->pre_add_len).append('+');
 876           guess.append(ci->word);
 877           if (ci->pre_strip_len > 0)
 878             guess.append('-').append(ci->word.str(), ci->pre_strip_len);
 879           if (ci->suf_strip_len > 0)
 880             guess.append('-').append(ci->word.str() + ci->word.size() - ci->suf_strip_len,
 881                                      ci->suf_strip_len);
 882           if (ci->suf_add && ci->suf_add[0])
 883             guess.append('+').append(ci->suf_add, ci->suf_add_len);
 884           real_speller->lang().fix_case(casep, guess.data(), guess.data());
 885           guesses << ", " << oconv(guess.str());
 886           ci = ci->next;
 887         }
 888         start = clock();
 889         const AspellWordList * suggestions = 0;
 890         if (suggest)
 891           suggestions = aspell_speller_suggest(speller, word, -1);
 892         finish = clock();
 893         unsigned offset = mb_len(line0, token.offset + ignore);
 894         if (suggestions && !aspell_word_list_empty(suggestions))
 895         {
 896           COUT.printf("& %s %u %u:", word,
 897                       aspell_word_list_size(suggestions), offset);
 898           AspellStringEnumeration * els
 899             = aspell_word_list_elements(suggestions);
 900           if (options->retrieve_bool("reverse")) {
 901             Vector<String> sugs;
 902             sugs.reserve(aspell_word_list_size(suggestions));
 903             while ( ( w = aspell_string_enumeration_next(els)) != 0)
 904               sugs.push_back(w);
 905             Vector<String>::reverse_iterator i = sugs.rbegin();
 906             while (true) {
 907               COUT.printf(" %s", i->c_str());
 908               ++i;
 909               if (i == sugs.rend()) break;
 910               COUT.put(',');
 911             }
 912           } else {
 913             while ( ( w = aspell_string_enumeration_next(els)) != 0) {
 914               COUT.printf(" %s%s", w,
 915                           aspell_string_enumeration_at_end(els) ? "" : ",");
 916             }
 917           }
 918           delete_aspell_string_enumeration(els);
 919           if (include_guesses)
 920             COUT.put(guesses);
 921           COUT.put('\n');
 922         } else {
 923           if (guesses.empty())
 924             COUT.printf("# %s %u\n", word, offset);
 925           else
 926             COUT.printf("? %s 0 %u: %s\n", word, offset,
 927                         guesses.c_str() + 2);
 928         }
 929         if (do_time)
 930           COUT.printf(_("Suggestion Time: %f\n"),
 931                       (finish-start)/(double)CLOCKS_PER_SEC);
 932       }
 933       COUT.put('\n');
 934     }
 935     if (c == EOF) break;
 936   }
 937
 938   delete_aspell_speller(speller);
 939 }
 940
 941 ///////////////////////////
 942 //
 943 // check
 944 //
 945
 946 enum UserChoice {None, Ignore, IgnoreAll, Replace, ReplaceAll,
 947                  Add, AddLower, Exit, Abort};
 948
 949 struct Mapping {
 950   char primary[9];
 951   UserChoice reverse[256];
 952   void to_aspell();
 953   void to_ispell();
 954   char & operator[] (UserChoice c) {return primary[c];}
 955   UserChoice & operator[] (char c)
 956     {return reverse[static_cast<unsigned char>(c)];}
 957 };
 958
 959 void abort_check();
 960
 961 void setup_display_conv();
 962
 963 void check()
 964 {
 965   String file_name;
 966   String new_name;
 967   FILE * in = 0;
 968   FILE * out = 0;
 969   Mapping mapping;
 970   bool changed = false;
 971
 972   if (args.size() == 0) {
 973     print_error(_("You must specify a file name."));
 974     exit(-1);
 975   } else if (args.size() > 1) {
 976     print_error(_("Only one file name may be specified."));
 977     exit(-1);
 978   }
 979
 980   file_name = args[0];
 981   new_name = file_name;
 982   new_name += ".new";
 983
 984   in = fopen(file_name.c_str(), "r");
 985   if (!in) {
 986     print_error(_("Could not open the file \"%s\" for reading"), file_name);
 987     exit(-1);
 988   }
 989
 990   if (!options->have("mode"))
 991     EXIT_ON_ERR(set_mode_from_extension(options, file_name));
 992
 993   String m = options->retrieve("keymapping");
 994   if (m == "aspell")
 995     mapping.to_aspell();
 996   else if (m == "ispell")
 997     mapping.to_ispell();
 998   else {
 999     print_error(_("Invalid keymapping: %s"), m);
1000     exit(-1);
1001   }
1002
1003   AspellCanHaveError * ret
1004     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1005   if (aspell_error(ret)) {
1006     print_error(aspell_error_message(ret));
1007     exit(1);
1008   }
1009
1010   {
1011     struct stat st;
1012     fstat(fileno(in), &st);
1013     if (!S_ISREG(st.st_mode)) {
1014       print_error(_("\"%s\" is not a regular file"), file_name);
1015       exit(-1);
1016     }
1017 #ifdef USE_FILE_INO
1018     int fd = open(new_name.c_str(), O_WRONLY | O_CREAT | O_TRUNC, st.st_mode);
1019     if (fd >= 0) out = fdopen(fd, "w");
1020 #else
1021     out = fopen(new_name.c_str(), "w");
1022 #endif
1023   }
1024   if (!out) {
1025     print_error(_("Could not open the file \"%s\" for writing. File not saved."), file_name);
1026     exit(-1);
1027   }
1028
1029   setup_display_conv();
1030
1031   AspellSpeller * speller = to_aspell_speller(ret);
1032
1033   state = new CheckerString(speller,in,out,64);
1034
1035   word_choices = new Choices;
1036
1037   menu_choices = new Choices;
1038   menu_choices->push_back(Choice(mapping[Ignore],     _("Ignore")));
1039   menu_choices->push_back(Choice(mapping[IgnoreAll],  _("Ignore all")));
1040   menu_choices->push_back(Choice(mapping[Replace],    _("Replace")));
1041   menu_choices->push_back(Choice(mapping[ReplaceAll], _("Replace all")));
1042   menu_choices->push_back(Choice(mapping[Add],        _("Add")));
1043   menu_choices->push_back(Choice(mapping[AddLower],   _("Add Lower")));
1044   menu_choices->push_back(Choice(mapping[Abort],      _("Abort")));
1045   menu_choices->push_back(Choice(mapping[Exit],       _("Exit")));
1046
1047   String word0, new_word;
1048   Vector<String> sug_con;
1049   StackPtr<StringMap> replace_list(new_string_map());
1050   const char * w;
1051
1052   begin_check();
1053
1054   while (state->next_misspelling()) {
1055
1056     char * word = state->get_real_word(word0);
1057
1058     //
1059     // check if it is in the replace list
1060     //
1061
1062     if ((w = replace_list->lookup(word)) != 0) {
1063       state->replace(w);
1064       continue;
1065     }
1066
1067     //
1068     // print the line with the misspelled word highlighted;
1069     //
1070
1071     display_misspelled_word();
1072
1073     //
1074     // print the suggestions and menu choices
1075     //
1076
1077     const AspellWordList * suggestions = aspell_speller_suggest(speller, word, -1);
1078     AspellStringEnumeration * els = aspell_word_list_elements(suggestions);
1079     sug_con.resize(0);
1080     while (sug_con.size() != 10
1081            && (w = aspell_string_enumeration_next(els)) != 0)
1082       sug_con.push_back(w);
1083     delete_aspell_string_enumeration(els);
1084
1085     // disable suspend
1086     unsigned int suggestions_size = sug_con.size();
1087     unsigned int suggestions_mid = suggestions_size / 2;
1088     if (suggestions_size % 2) suggestions_mid++; // if odd
1089     word_choices->resize(0);
1090     for (unsigned int j = 0; j != suggestions_mid; ++j) {
1091       word_choices->push_back(Choice('0' + j+1, sug_con[j]));
1092       if (j + suggestions_mid != suggestions_size)
1093         word_choices
1094           ->push_back(Choice(j+suggestions_mid+1 == 10
1095                              ? '0'
1096                              : '0' + j+suggestions_mid+1,
1097                              sug_con[j+suggestions_mid]));
1098     }
1099     //enable suspend
1100     display_menu();
1101
1102   choice_prompt:
1103
1104     prompt("? ");
1105
1106   choice_loop:
1107
1108     //
1109     // Handle the users choice
1110     //
1111
1112     int choice;
1113     get_choice(choice);
1114
1115     if (choice == '0') choice = '9' + 1;
1116
1117     switch (mapping[choice]) {
1118     case Exit:
1119       goto exit_loop;
1120     case Abort: {
1121       prompt(_("Are you sure you want to abort (y/n)? "));
1122       get_choice(choice);
1123       /* TRANSLATORS: The user may input any of these characters to say "yes".
1124          MUST ONLY CONSIST OF ASCII CHARACTERS. */
1125       const char * yes_characters = _("Yy");
1126       if (strchr(yes_characters, choice) != 0)
1127         goto abort_loop;
1128       goto choice_prompt;
1129     }
1130     case Ignore:
1131       break;
1132     case IgnoreAll:
1133       aspell_speller_add_to_session(speller, word, -1);
1134       break;
1135     case Add:
1136       aspell_speller_add_to_personal(speller, word, -1);
1137       break;
1138     case AddLower:
1139     {
1140       // Emulate the c function add_to_personal, but add extra step to
1141       // convert word to lowercase.  Yeah its a bit of a hack.
1142       Speller * sp = reinterpret_cast<Speller *>(speller);
1143       sp->temp_str_0.clear();
1144       sp->to_internal_->convert(word, -1, sp->temp_str_0);
1145       char * lower = sp->to_lower(sp->temp_str_0.mstr());
1146       PosibErr<void> ret = sp->add_to_personal(MutableString(lower));
1147       sp->err_.reset(ret.release_err());
1148       break;
1149     }
1150     case Replace:
1151     case ReplaceAll:
1152       // the string new_word is in the encoding of the document
1153       prompt(_("With: "));
1154       get_line(new_word);
1155       if (new_word.size() == 0)
1156         goto choice_prompt;
1157       if (new_word[0] >= '1' && new_word[0] < (char)suggestions_size + '1')
1158         new_word = sug_con[new_word[0]-'1'];
1159       state->replace(new_word);
1160       changed = true;
1161       if (mapping[choice] == ReplaceAll && (strcmp(word,new_word.str()) != 0))
1162         replace_list->replace(word, new_word);
1163       break;
1164     default:
1165       // the replasments are in the encoding of the document
1166       if (choice >= '1' && choice < (char)suggestions_size + '1') {
1167         state->replace(sug_con[choice-'1']);
1168         changed = true;
1169       } else {
1170         error(_("Sorry that is an invalid choice!"));
1171         goto choice_loop;
1172       }
1173     }
1174   }
1175 exit_loop:
1176   {
1177     aspell_speller_save_all_word_lists(speller);
1178     state.del(); // to close the file handles
1179     delete_aspell_speller(speller);
1180
1181     if (changed) {
1182
1183       bool keep_backup = options->retrieve_bool("backup");
1184       if (keep_backup) {
1185         String backup_name = file_name;
1186         backup_name += ".bak";
1187         rename_file(file_name, backup_name);
1188       }
1189       rename_file(new_name, file_name);
1190
1191     } else {
1192
1193       remove_file(new_name);
1194
1195     }
1196
1197     //end_check();
1198
1199     return;
1200   }
1201 abort_loop:
1202   {
1203     state->abort(); // to close the file handles
1204     delete_aspell_speller(speller);
1205
1206     remove_file(new_name);
1207
1208     return;
1209   }
1210 }
1211
1212 #define U (unsigned char)
1213
1214 void Mapping::to_aspell()
1215 {
1216   memset(this, 0, sizeof(Mapping));
1217   primary[Ignore    ] = 'i';
1218   reverse[U'i'] = Ignore;
1219   reverse[U' '] = Ignore;
1220   reverse[U'\n'] = Ignore;
1221
1222   primary[IgnoreAll ] = 'I';
1223   reverse[U'I'] = IgnoreAll;
1224
1225   primary[Replace   ] = 'r';
1226   reverse[U'r'] = Replace;
1227
1228   primary[ReplaceAll] = 'R';
1229   reverse[U'R'] = ReplaceAll;
1230
1231   primary[Add       ] = 'a';
1232   reverse[U'A'] = Add;
1233   reverse[U'a'] = Add;
1234
1235   primary[AddLower  ] = 'l';
1236   reverse[U'L'] = AddLower;
1237   reverse[U'l'] = AddLower;
1238
1239   primary[Abort     ] = 'b';
1240   reverse[U'b'] = Abort;
1241   reverse[U'B'] = Abort;
1242   reverse[control('c')] = Abort;
1243
1244   primary[Exit      ] = 'x';
1245   reverse[U'x'] = Exit;
1246   reverse[U'X'] = Exit;
1247 }
1248
1249 void Mapping::to_ispell()
1250 {
1251   memset(this, 0, sizeof(Mapping));
1252   primary[Ignore    ] = ' ';
1253   reverse[U' '] = Ignore;
1254   reverse[U'\n'] = Ignore;
1255
1256   primary[IgnoreAll ] = 'A';
1257   reverse[U'A'] = IgnoreAll;
1258   reverse[U'a'] = IgnoreAll;
1259
1260   primary[Replace   ] = 'R';
1261   reverse[U'R'] = ReplaceAll;
1262   reverse[U'r'] = Replace;
1263
1264   primary[ReplaceAll] = 'E';
1265   reverse[U'E'] = ReplaceAll;
1266   reverse[U'e'] = Replace;
1267
1268   primary[Add       ] = 'I';
1269   reverse[U'I'] = Add;
1270   reverse[U'i'] = Add;
1271
1272   primary[AddLower  ] = 'U';
1273   reverse[U'U'] = AddLower;
1274   reverse[U'u'] = AddLower;
1275
1276   primary[Abort     ] = 'Q';
1277   reverse[U'Q'] = Abort;
1278   reverse[U'q'] = Abort;
1279   reverse[control('c')] = Abort;
1280
1281   primary[Exit      ] = 'X';
1282   reverse[U'X'] = Exit;
1283   reverse[U'x'] = Exit;
1284 }
1285 #undef U
1286
1287 ///////////////////////////
1288 //
1289 // list
1290 //
1291
1292 void list()
1293 {
1294   AspellCanHaveError * ret
1295     = new_aspell_speller(reinterpret_cast<AspellConfig *>(options.get()));
1296   if (aspell_error(ret)) {
1297     print_error(aspell_error_message(ret));
1298     exit(1);
1299   }
1300   AspellSpeller * speller = to_aspell_speller(ret);
1301
1302   state = new CheckerString(speller,stdin,0,64);
1303
1304   String word;
1305
1306   while (state->next_misspelling()) {
1307
1308     state->get_real_word(word);
1309     COUT.printl(word);
1310
1311   }
1312
1313   state.del(); // to close the file handles
1314   delete_aspell_speller(speller);
1315 }
1316
1317 ///////////////////////////
1318 //
1319 // convt
1320 //
1321
1322 void convt()
1323 {
1324   Conv conv;
1325   String buf1, buf2;
1326   const char * from = fix_encoding_str(args[0], buf1);
1327   const char * to   = fix_encoding_str(args[1], buf2);
1328   Normalize norm = NormNone;
1329   if (strcmp(from, "utf-8") == 0 && strcmp(to, "utf-8") != 0)
1330     norm = NormFrom;
1331   else if (strcmp(from, "utf-8") != 0 && strcmp(to, "utf-8") == 0)
1332     norm = NormTo;
1333   if (args.size() > 2) {
1334     for (String::iterator i = args[2].begin(); i != args[2].end(); ++i)
1335       *i = asc_tolower(*i);
1336     options->replace("normalize", "true");
1337     if (args[2] == "none")
1338       options->replace("normalize", "false");
1339     else if (args[2] == "internal")
1340       options->replace("norm-strict", "false");
1341     else if (args[2] == "strict")
1342       options->replace("norm-strict", "true");
1343     else
1344       EXIT_ON_ERR(options->replace("norm-form", args[2]));
1345   }
1346   EXIT_ON_ERR(conv.setup(*options, args[0], args[1], norm));
1347   String line;
1348   while (CIN.getline(line))
1349     COUT.printl(conv(line));
1350 }
1351
1352 void normlz()
1353 {
1354   options->replace("normalize", "true");
1355   const char * from = args.size() < 3 ? "utf-8" : args[0].str();
1356   const char * to   = args.size() < 3 ? "utf-8" : args[2].str();
1357   const char * intr = args.size() < 3 ? args[0].str() : args[1].str();
1358   String * form = (args.size() == 2   ? &args[1]
1359                    : args.size() == 4 ? &args[3]
1360                    : 0);
1361   Normalize decode_norm = NormTo;
1362   if (form) {
1363     for (String::iterator i = form->begin(); i != form->end(); ++i)
1364       *i = asc_tolower(*i);
1365     if (*form == "internal") {
1366       options->replace("norm-strict", "false");
1367       decode_norm = NormNone;
1368     } else if (*form == "strict") {
1369       options->replace("norm-strict", "true");
1370       decode_norm = NormNone;
1371     }
1372     if (decode_norm == NormTo) EXIT_ON_ERR(options->replace("norm-form", *form));
1373   }
1374   Conv encode,decode;
1375   EXIT_ON_ERR(encode.setup(*options, from, intr, NormFrom));
1376   EXIT_ON_ERR(decode.setup(*options, intr, to, decode_norm));
1377   String line;
1378   while (CIN.getline(line))
1379     COUT.printl(decode(encode(line)));
1380 }
1381
1382 ///////////////////////////
1383 //
1384 // filter
1385 //
1386
1387 void filter()
1388 {
1389   //assert(setvbuf(stdin, 0, _IOLBF, 0) == 0);
1390   //assert(setvbuf(stdout, 0, _IOLBF, 0) == 0);
1391   CERR << _("Sorry \"filter\" is currently unimplemented.\n");
1392   exit(3);
1393 }
1394
1395
1396 ///////////////////////////
1397 //
1398 // print_ver
1399 //
1400
1401 void print_ver () {
1402   printf("@(#) International Ispell Version 3.1.20 "
1403          "(but really Aspell %s)\n", aspell_version_string());
1404 }
1405
1406 ///////////////////////////////////////////////////////////////////////
1407 //
1408 // These functions use implementation details of the default speller
1409 // module
1410 //
1411
1412 class IstreamEnumeration : public StringEnumeration {
1413   FStream * in;
1414   String data;
1415 public:
1416   IstreamEnumeration(FStream & i) : in(&i) {}
1417   IstreamEnumeration * clone() const {
1418     return new IstreamEnumeration(*this);
1419   }
1420   void assign (const StringEnumeration * other) {
1421     *this = *static_cast<const IstreamEnumeration *>(other);
1422   }
1423   Value next() {
1424     if (!in->getline(data)) return 0;
1425     else return data.c_str();
1426   }
1427   bool at_end() const {return *in;}
1428 };
1429
1430 ///////////////////////////
1431 //
1432 // clean
1433 //
1434
1435 void clean()
1436 {
1437   using namespace aspeller;
1438
1439   bool strict = args.size() != 0 && args[0] == "strict";
1440
1441   Config * config = options;
1442
1443   CachePtr<Language> lang;
1444   find_language(*config);
1445   PosibErr<Language *> res = new_language(*config);
1446   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1447   lang.reset(res.data);
1448   IstreamEnumeration in(CIN);
1449   WordListIterator wl_itr(&in, lang, &CERR);
1450   config->replace("validate-words", "true");
1451   config->replace("validate-affixes", "true");
1452   if (!strict)
1453     config->replace("clean-words", "true");
1454   config->replace("clean-affixes", "true");
1455   config->replace("skip-invalid-words", "true");
1456   wl_itr.init(*config);
1457   Conv oconv, oconv2;
1458   if (config->have("encoding")) {
1459     EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo));
1460     oconv2.setup(*config, lang->charmap(), config->retrieve("encoding"), NormTo);
1461   } else {
1462     EXIT_ON_ERR(oconv.setup(*config, lang->charmap(), lang->data_encoding(), NormTo));
1463     oconv2.setup(*config, lang->charmap(), lang->data_encoding(), NormTo);
1464   }
1465   while (wl_itr.adv()) {
1466     if (*wl_itr->aff.str)
1467       COUT.printf("%s/%s\n", oconv(wl_itr->word), oconv2(wl_itr->aff));
1468     else
1469       COUT.printl(oconv(wl_itr->word));
1470   }
1471 }
1472
1473 ///////////////////////////
1474 //
1475 // master
1476 //
1477
1478 void dump (aspeller::Dict * lws, Convert * conv)
1479 {
1480   using namespace aspeller;
1481
1482   switch (lws->basic_type) {
1483   case Dict::basic_dict:
1484     {
1485       Dictionary * ws = static_cast<Dictionary *>(lws);
1486       StackPtr<WordEntryEnumeration> els(ws->detailed_elements());
1487       WordEntry * wi;
1488       while (wi = els->next(), wi) {
1489         wi->write(COUT,*ws->lang(), conv);
1490         COUT << '\n';
1491       }
1492     }
1493     break;
1494   case Dict::multi_dict:
1495     {
1496       StackPtr<DictsEnumeration> els(lws->dictionaries());
1497       Dict * ws;
1498       while (ws = els->next(), ws)
1499         dump (ws, conv);
1500     }
1501     break;
1502   default:
1503     abort();
1504   }
1505 }
1506
1507 void master () {
1508   using namespace aspeller;
1509
1510   if (args.size() != 0) {
1511     options->replace("master", args[0].c_str());
1512   }
1513
1514   Config * config = options;
1515
1516   if (action == do_create) {
1517
1518     find_language(*config);
1519     EXIT_ON_ERR(create_default_readonly_dict
1520                 (new IstreamEnumeration(CIN),
1521                  *config));
1522
1523   } else if (action == do_merge) {
1524
1525     print_error(_("Can't merge a master word list yet. Sorry."));
1526     exit (1);
1527
1528   } else if (action == do_dump) {
1529
1530     EXIT_ON_ERR_SET(add_data_set(config->retrieve("master-path"), *config), Dict *, d);
1531     StackPtr<Convert> conv(setup_conv(d->lang(), config));
1532     dump(d, conv);
1533   }
1534 }
1535
1536 ///////////////////////////
1537 //
1538 // personal
1539 //
1540
1541 void personal () {
1542   using namespace aspeller;
1543
1544   if (args.size() != 0) {
1545     EXIT_ON_ERR(options->replace("personal", args[0]));
1546   }
1547   options->replace("module", "aspeller");
1548   if (action == do_create || action == do_merge) {
1549     CERR << _("Sorry \"create/merge personal\" is currently unimplemented.\n");
1550     exit(3);
1551
1552     // FIXME
1553 #if 0
1554     StackPtr<Speller> speller(new_speller(options));
1555
1556     if (action == do_create) {
1557       if (file_exists(speller->config()->retrieve("personal-path"))) {
1558         print_error(_("Sorry I won't overwrite \"%s\""),
1559                     speller->config()->retrieve("personal-path"));
1560         exit (1);
1561       }
1562       speller->personal_word_list().data->clear();
1563     }
1564
1565     String word;
1566     while (CIN >> word)
1567       speller->add_to_personal(word);
1568
1569     speller->save_all_word_lists();
1570 #endif
1571
1572   } else { // action == do_dump
1573
1574     // FIXME: This is currently broken
1575
1576     Config * config = options;
1577     Dictionary * per = new_default_writable_dict();
1578     EXIT_ON_ERR(per->load(config->retrieve("personal-path"), *config));
1579     StackPtr<WordEntryEnumeration> els(per->detailed_elements());
1580     StackPtr<Convert> conv(setup_conv(per->lang(), config));
1581
1582     WordEntry * wi;
1583     while (wi = els->next(), wi) {
1584       wi->write(COUT,*(per->lang()), conv);
1585       COUT.put('\n');
1586     }
1587     delete per;
1588   }
1589 }
1590
1591 ///////////////////////////
1592 //
1593 // repl
1594 //
1595
1596 void repl() {
1597   using namespace aspeller;
1598
1599   if (args.size() != 0) {
1600     options->replace("repl", args[0].c_str());
1601   }
1602
1603   if (action == do_create || action == do_merge) {
1604
1605     CERR << _("Sorry \"create/merge repl\" is currently unimplemented.\n");
1606     exit(3);
1607
1608     // FIXME
1609 #if 0
1610     SpellerImpl speller(options);
1611
1612     if (action == do_create) {
1613       if (file_exists(speller->config()->retrieve("repl-path"))) {
1614         print_error(_("Sorry I won't overwrite \"%s\""),
1615                     speller->config()->retrieve("repl-path"));
1616         exit (1);
1617       }
1618       speller->personal_repl().clear();
1619     }
1620
1621     try {
1622       String word,repl;
1623
1624       while (true) {
1625         get_word_pair(word,repl,':');
1626         EXIT_ON_ERR(speller->store_repl(word,repl,false));
1627       }
1628
1629     } catch (bad_cin) {}
1630
1631     EXIT_ON_ERR(speller->personal_repl().synchronize());
1632
1633 #endif
1634
1635   } else if (action == do_dump) {
1636
1637     // FIXME: This is currently broken
1638
1639     ReplacementDict * repl = new_default_replacement_dict();
1640     repl->load(options->retrieve("repl-path"), *options);
1641     StackPtr<WordEntryEnumeration> els(repl->detailed_elements());
1642
1643     WordEntry * rl = 0;
1644     WordEntry words;
1645     Conv conv(setup_conv(repl->lang(), options));
1646     while ((rl = els->next())) {
1647       repl->repl_lookup(*rl, words);
1648       do {
1649         COUT << conv(rl->word) << ": " << conv(words.word) << "\n";
1650       } while (words.adv());
1651     }
1652     delete repl;
1653   }
1654 }
1655
1656 //////////////////////////
1657 //
1658 // soundslike
1659 //
1660
1661 void soundslike() {
1662   using namespace aspeller;
1663   CachePtr<Language> lang;
1664   find_language(*options);
1665   PosibErr<Language *> res = new_language(*options);
1666   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1667   lang.reset(res.data);
1668   Conv iconv(setup_conv(options, lang));
1669   Conv oconv(setup_conv(lang, options));
1670   String word;
1671   String sl;
1672   line_buffer();
1673   while (CIN.getline(word)) {
1674     const char * w = iconv(word);
1675     lang->LangImpl::to_soundslike(sl, w);
1676     printf("%s\t%s\n", word.str(), oconv(sl));
1677   }
1678 }
1679
1680 //////////////////////////
1681 //
1682 // munch
1683 //
1684
1685 void munch()
1686 {
1687   using namespace aspeller;
1688   CachePtr<Language> lang;
1689   find_language(*options);
1690   PosibErr<Language *> res = new_language(*options);
1691   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1692   lang.reset(res.data);
1693   Conv iconv(setup_conv(options, lang));
1694   Conv oconv(setup_conv(lang, options));
1695   String word;
1696   GuessInfo gi;
1697   line_buffer();
1698   while (CIN.getline(word)) {
1699     lang->munch(iconv(word), &gi);
1700     COUT << word;
1701     for (const aspeller::CheckInfo * ci = gi.head; ci; ci = ci->next)
1702     {
1703       COUT << ' ' << oconv(ci->word) << '/';
1704       if (ci->pre_flag != 0) COUT << oconv(static_cast<char>(ci->pre_flag));
1705       if (ci->suf_flag != 0) COUT << oconv(static_cast<char>(ci->suf_flag));
1706     }
1707     COUT << '\n';
1708   }
1709 }
1710
1711 //////////////////////////
1712 //
1713 // expand
1714 //
1715
1716 void expand()
1717 {
1718   int level = 1;
1719   if (args.size() > 0)
1720     level = atoi(args[0].c_str()); //FIXME: More verbose
1721   int limit = INT_MAX;
1722   if (args.size() > 1)
1723     limit = atoi(args[1].c_str());
1724
1725   using namespace aspeller;
1726   CachePtr<Language> lang;
1727   find_language(*options);
1728   PosibErr<Language *> res = new_language(*options);
1729   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1730   lang.reset(res.data);
1731   Conv iconv(setup_conv(options, lang));
1732   Conv oconv(setup_conv(lang, options));
1733   String word, buf;
1734   ObjStack exp_buf;
1735   WordAff * exp_list;
1736   line_buffer();
1737   while (CIN.getline(word)) {
1738     buf = word;
1739     char * w = iconv(buf.mstr(), buf.size());
1740     char * af = strchr(w, '/');
1741     size_t s;
1742     if (af != 0) {
1743       s = af - w;
1744       *af++ = '\0';
1745     } else {
1746       s = strlen(w);
1747       af = w + s;
1748     }
1749     exp_buf.reset();
1750     exp_list = lang->expand(w, af, exp_buf, limit);
1751     if (level <= 2) {
1752       if (level == 2)
1753         COUT << word << ' ';
1754       WordAff * p = exp_list;
1755       while (p) {
1756         COUT << oconv(p->word);
1757         if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1758         p = p->next;
1759         if (p) COUT << ' ';
1760       }
1761       COUT << '\n';
1762     } else if (level >= 3) {
1763       double ratio = 0;
1764       if (level >= 4) {
1765         for (WordAff * p = exp_list; p; p = p->next)
1766           ratio += p->word.size;
1767         ratio /= exp_list->word.size; // it is assumed the first
1768                                       // expansion is just the root
1769       }
1770       for (WordAff * p = exp_list; p; p = p->next) {
1771         COUT << word << ' ' << oconv(p->word);
1772         if (limit < INT_MAX && p->aff[0]) COUT << '/' << oconv((const char *)p->aff);
1773         if (level >= 4) COUT.printf(" %f\n", ratio);
1774         else COUT << '\n';
1775       }
1776     }
1777   }
1778 }
1779
1780 //////////////////////////
1781 //
1782 // combine
1783 //
1784
1785 static void combine_aff(String & aff, const char * app)
1786 {
1787   for (; *app; ++app) {
1788     if (!memchr(aff.c_str(),*app,aff.size()))
1789       aff.push_back(*app);
1790   }
1791 }
1792
1793 static void print_wordaff(const String & base, const String & affs, Conv & oconv)
1794 {
1795   if (base.empty()) return;
1796   COUT << oconv(base);
1797   if (affs.empty())
1798     COUT << '\n';
1799   else
1800     COUT.printf("/%s\n", oconv(affs));
1801 }
1802
1803 static bool lower_equal(aspeller::Language * l, ParmString a, ParmString b)
1804 {
1805   if (a.size() != b.size()) return false;
1806   if (l->to_lower(a[0]) != l->to_lower(b[0])) return false;
1807   return memcmp(a + 1, b + 1, a.size() - 1) == 0;
1808 }
1809
1810 void combine()
1811 {
1812   using namespace aspeller;
1813   CachePtr<Language> lang;
1814   find_language(*options);
1815   PosibErr<Language *> res = new_language(*options);
1816   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1817   lang.reset(res.data);
1818   Conv iconv(setup_conv(options, lang));
1819   Conv oconv(setup_conv(lang, options));
1820   String word;
1821   String base;
1822   String affs;
1823   line_buffer();
1824   while (CIN.getline(word)) {
1825     word = iconv(word);
1826
1827     CharVector buf; buf.append(word.c_str(), word.size() + 1);
1828     char * w = buf.data();
1829     char * af = strchr(w, '/');
1830     size_t s;
1831     if (af != 0) {
1832       s = af - w;
1833       *af++ = '\0';
1834     } else {
1835       s = strlen(w);
1836       af = w + s;
1837     }
1838
1839     if (lower_equal(lang, base, w)) {
1840       if (lang->is_lower(base.str())) {
1841         combine_aff(affs, af);
1842       } else {
1843         base = w;
1844         combine_aff(affs, af);
1845       }
1846     } else {
1847       print_wordaff(base, affs, oconv);
1848       base = w;
1849       affs = af;
1850     }
1851
1852   }
1853   print_wordaff(base, affs, oconv);
1854 }
1855
1856 //////////////////////////
1857 //
1858 // munch list
1859 //
1860
1861 void munch_list_simple();
1862 void munch_list_complete(bool, bool);
1863
1864 void munch_list()
1865 {
1866   bool simple = false;
1867   bool multi = false;
1868   bool simplify = true;
1869
1870   for (unsigned i = 0; i < args.size(); ++i) {
1871     if (args[i] == "simple")      simple = true;
1872     else if (args[i] == "single") multi = false;
1873     else if (args[i] == "multi")  multi = true;
1874     else if (args[i] == "keep")   simplify = false;
1875     else
1876     {
1877       print_error(_("\"%s\" is not a valid flag for the \"munch-list\" command."),
1878                   args[i]);
1879       exit(1);
1880     }
1881   }
1882   if (simple)
1883     munch_list_simple();
1884   else
1885     munch_list_complete(multi, simplify);
1886 }
1887
1888 //
1889 // munch list (simple version)
1890 //
1891
1892 // This version works the same way as the myspell "munch" program.
1893 // However, because the results depends on the hash table used and the
1894 // order of the word list it wonn't produce identical results.
1895
1896 struct SML_WordEntry {
1897   const char * word;
1898   char * aff;
1899   bool keep; // boolean
1900   SML_WordEntry(const char * w = 0) : word(w), aff(0), keep(false) {}
1901 };
1902
1903 struct SML_Parms {
1904   typedef SML_WordEntry Value;
1905   typedef const char * Key;
1906   static const bool is_multi = false;
1907   acommon::hash<const char *> hash;
1908   bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
1909   Key key(const Value & v) {return v.word;}
1910 };
1911
1912 typedef HashTable<SML_Parms> SML_Table;
1913
1914 static inline void add_affix(SML_Table::iterator b, char aff)
1915 {
1916   char * p = b->aff;
1917   if (p) {while (*p) {if (*p == aff) return; ++p;}}
1918   int s = p - b->aff;
1919   b->aff = (char *)realloc(b->aff, s + 2);
1920   b->aff[s + 0] = aff;
1921   b->aff[s + 1] = '\0';
1922 }
1923
1924 void munch_list_simple()
1925 {
1926   using namespace aspeller;
1927   CachePtr<Language> lang;
1928   find_language(*options);
1929   PosibErr<Language *> res = new_language(*options);
1930   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
1931   lang.reset(res.data);
1932   Conv iconv(setup_conv(options, lang));
1933   Conv oconv(setup_conv(lang, options));
1934   String word, buf;
1935   ObjStack exp_buf;
1936   WordAff * exp_list;
1937   GuessInfo gi;
1938   SML_Table table;
1939   ObjStack table_buf;
1940
1941   // add words to dictionary
1942   while (CIN.getline(word)) {
1943     buf = word;
1944     char * w = iconv(buf.mstr(), buf.size());
1945     char * af = strchr(w, '/');
1946     size_t s;
1947     if (af != 0) {
1948       s = af - w;
1949       *af++ = '\0';
1950     } else {
1951       s = strlen(w);
1952       af = w + s;
1953     }
1954     exp_buf.reset();
1955     exp_list = lang->expand(w, af, exp_buf);
1956     for (WordAff * q = exp_list; q; q = q->next) {
1957       table.insert(SML_WordEntry(table_buf.dup(q->word)));
1958     }
1959   }
1960
1961   // now try to munch each word in the dictionary
1962   SML_Table::iterator p = table.begin();
1963   SML_Table::iterator end = table.end();
1964   String flags;
1965   for (; p != end; ++p)
1966   {
1967     const aspeller::CheckInfo * best = 0;
1968     unsigned min_base_size = INT_MAX;
1969     lang->munch(p->word, &gi);
1970     const aspeller::CheckInfo * ci = gi.head;
1971     while (ci)
1972     { {
1973       // check if the base word is in the dictionary
1974       SML_Table::iterator b = table.find(ci->word);
1975       if (b == table.end()) goto cont;
1976
1977       // check if all the words once expanded are in the dictionary
1978       // this included the exiting flags due to pre-suf cross products
1979       if (b->aff) flags = b->aff;
1980       else        flags.clear();
1981       if (ci->pre_flag != 0) flags += ci->pre_flag;
1982       if (ci->suf_flag != 0) flags += ci->suf_flag;
1983       exp_buf.reset();
1984       exp_list = lang->expand(ci->word, flags, exp_buf);
1985       for (WordAff * q = exp_list; q; q = q->next) {
1986         if (!table.have(q->word)) goto cont;
1987       }
1988
1989       // the base word and flags are valid, now keep the one with the
1990       // smallest base word
1991       if (ci->word.size() < min_base_size) {
1992         min_base_size = ci->word.size();
1993         best = ci;
1994       }
1995
1996     } cont:
1997       ci = ci->next;
1998     }
1999     // now add the base to the keep list if one exists
2000     // otherwise just keep the original word
2001     if (best) {
2002       SML_Table::iterator b = table.find(best->word);
2003       assert(b != table.end());
2004       if (best->pre_flag) add_affix(b, best->pre_flag);
2005       if (best->suf_flag) add_affix(b, best->suf_flag);
2006       b->keep = true;
2007     } else {
2008       p->keep = true;
2009     }
2010   }
2011
2012   // Print the entries in the table marked as "to keep"
2013   p = table.begin();
2014   for (; p != end; ++p)
2015   {
2016     if (p->keep) {
2017       COUT << oconv(p->word);
2018       if (p->aff) {
2019         COUT << '/' << oconv(p->aff);
2020       }
2021       COUT << '\n';
2022     }
2023   }
2024
2025   p = table.begin();
2026   for (; p != end; ++p)
2027   {
2028     if (p->aff) free(p->aff);
2029     p->aff = 0;
2030   }
2031 }
2032
2033 //
2034 // munch list (complete version)
2035 //
2036 //
2037 // This version will produce a smaller list than the simple version.
2038 // It is very close to the optimum result.
2039 //
2040
2041 //
2042 // Hash table to store the words
2043 //
2044
2045 struct CML_Entry {
2046   const char * word;
2047   char * aff;
2048   CML_Entry * parent;
2049   CML_Entry * next;
2050   int rank;
2051   CML_Entry(const char * w = 0) : word(w), aff(0), parent(0), next(0), rank(0) {}
2052 };
2053
2054 struct CML_Parms {
2055   typedef CML_Entry Value;
2056   typedef const char * Key;
2057   static const bool is_multi = true;
2058   acommon::hash<const char *> hash;
2059   bool equal(Key x, Key y) {return strcmp(x,y) == 0;}
2060   Key key(const Value & v) {return v.word;}
2061 };
2062
2063 typedef HashTable<CML_Parms> CML_Table;
2064
2065 //
2066 // add an affix to a word but keep the prefixes and suffixes separate
2067 //
2068
2069 static void add_affix(CML_Table::iterator b, char aff, bool prefix)
2070 {
2071   char * p = b->aff;
2072   int s = 3;
2073   if (p) {
2074     while (*p) {
2075       if (*p == aff) return;
2076       ++p;
2077     }
2078     s = (p - b->aff) + 2;
2079   }
2080   char * tmp = (char *)malloc(s);
2081   p = b->aff;
2082   char * q = tmp;
2083   if (p) {while (*p != '/') *q++ = *p++;}
2084   if (prefix) *q++ = aff;
2085   *q++ = '/';
2086   if (p) {p++; while (*p != '\0') *q++ = *p++;}
2087   if (!prefix) *q++ = aff;
2088   *q++ = '\0';
2089   assert(q - tmp == s);
2090   if (b->aff) free(b->aff);
2091   b->aff = tmp;
2092 }
2093
2094 //
2095 // Standard disjoint set algo with union by rank and path compression
2096 //
2097
2098 static void link(CML_Entry * x, CML_Entry * y)
2099 {
2100   if (x == y) return;
2101   if (x->rank > y->rank) {
2102     y->parent = x;
2103   } else {
2104     x->parent = y;
2105     if (x->rank == y->rank) y->rank++;
2106   }
2107 }
2108
2109 static CML_Entry * find_set (CML_Entry * x)
2110 {
2111   if (x->parent)
2112     return x->parent = find_set(x->parent);
2113   else
2114     return x;
2115 }
2116
2117 //
2118 // Stuff to manage prefix-suffix combinations
2119 //
2120
2121 struct PreSuf {
2122   String pre;
2123   String suf;
2124   String & get(int i) {return i == 0 ? pre : suf;}
2125   const String & get(int i) const {return i == 0 ? pre : suf;}
2126   PreSuf() : next(0) {}
2127   PreSuf * next;
2128 };
2129
2130 class PreSufList {
2131 public:
2132   PreSuf * head;
2133   PreSufList() : head(0) {}
2134   void add(PreSuf * to_add) {
2135     to_add->next = head;
2136     head = to_add;
2137   }
2138   void clear() {
2139     while (head) {
2140       PreSuf * tmp = head;
2141       head = head->next;
2142       delete tmp;
2143     }
2144   }
2145   void transfer(PreSufList & other) {
2146     clear();
2147     head = other.head;
2148     other.head = 0;
2149   }
2150   ~PreSufList() {
2151     clear();
2152   }
2153 };
2154
2155
2156 // Example of usage:
2157 //   combine(in, res, 0)
2158 //   Pre:  in =  [(ab, c) (ab, d) (c, de) (c, ef)]
2159 //   Post: res = [(ab, cd), (c, def)]
2160 static void combine(const PreSufList & in, PreSufList & res, int which)
2161 {
2162   const PreSuf * i = in.head;
2163   while (i) { {
2164     const String & s = i->get(which);
2165     for (const PreSuf * j = in.head; j != i; j = j->next) {
2166       if (j->get(which) == s) goto cont;
2167     }
2168     PreSuf * tmp = new PreSuf;
2169     tmp->pre = i->pre;
2170     tmp->suf = i->suf;
2171     String & b = tmp->get(!which);
2172     for (const PreSuf * j = i->next; j; j = j->next) {
2173       if (j->get(which) != s) continue;
2174       const String & a = j->get(!which);
2175       for (String::const_iterator x = a.begin(); x != a.end(); ++x) {
2176         if (memchr(b.data(), *x, b.size())) continue;
2177         b += *x;
2178       }
2179     }
2180     res.add(tmp);
2181   } cont:
2182     i = i->next;
2183   }
2184 }
2185
2186 //
2187 // Stuff used when pruning the list of base words
2188 //
2189
2190 struct Expansion {
2191   const char * word;
2192   char * aff; // modifying this will modify the affix entry in the hash table
2193   std::vector<bool> exp;
2194   std::vector<bool> orig_exp;
2195 };
2196
2197 // static void dump(const Vector<Expansion *> & working,
2198 //                  const Vector<CML_Table::iterator> & entries)
2199 // {
2200 //   for (unsigned i = 0; i != working.size(); ++i) {
2201 //     if (!working[i]) continue;
2202 //     CERR.printf("%s/%s ", working[i]->word, working[i]->aff);
2203 //     for (unsigned j = 0; j != working[i]->exp.size(); ++j) {
2204 //       if (working[i]->exp[j])
2205 //         CERR.printf("%s ", entries[j]->word);
2206 //     }
2207 //     CERR.put('\n');
2208 //   }
2209 //   CERR.put('\n');
2210 // }
2211
2212 // standard set algorithms on a bit vector
2213
2214 static bool subset(const std::vector<bool> & smaller,
2215                    const std::vector<bool> & larger)
2216 {
2217   assert(smaller.size() == larger.size());
2218   unsigned s = larger.size();
2219   for (unsigned i = 0; i != s; ++i) {
2220     if (smaller[i] && !larger[i]) return false;
2221   }
2222   return true;
2223 }
2224
2225 static void merge(std::vector<bool> & x, const std::vector<bool> & y)
2226 {
2227   assert(x.size() == y.size());
2228   unsigned s = x.size();
2229   for (unsigned i = 0; i != s; ++i) {
2230     if (y[i]) x[i] = true;
2231   }
2232 }
2233
2234 static void purge(std::vector<bool> & x, const std::vector<bool> & y)
2235 {
2236   assert(x.size() == y.size());
2237   unsigned s = x.size();
2238   for (unsigned i = 0; i != s; ++i) {
2239     if (y[i]) x[i] = false;
2240   }
2241 }
2242
2243 static inline unsigned count(const std::vector<bool> & x) {
2244   unsigned c = 0;
2245   for (unsigned i = 0; i != x.size(); ++i) {
2246     if (x[i]) ++c;
2247   }
2248   return c;
2249 }
2250
2251 //
2252
2253 struct WorkingLt {
2254   bool operator() (Expansion * x, Expansion * y) {
2255
2256     // LARGEST number of expansions
2257     unsigned x_s = count(x->exp);
2258     unsigned y_s = count(y->exp);
2259     if (x_s != y_s) return x_s > y_s;
2260
2261     // SMALLEST base word
2262     x_s = strlen(x->word);
2263     y_s = strlen(y->word);
2264     if (x_s != y_s) return x_s < y_s;
2265
2266     // LARGEST affix string
2267     x_s = strlen(x->aff);
2268     y_s = strlen(y->aff);
2269     if (x_s != y_s) return x_s > y_s;
2270
2271     //
2272     int cmp = strcmp(x->word, y->word);
2273     if (cmp != 0) return cmp < 0;
2274
2275     //
2276     cmp = strcmp(x->aff, y->aff);
2277     return cmp < 0;
2278   }
2279 };
2280
2281 //
2282 // Finally the function that does the real work
2283 //
2284
2285 void munch_list_complete(bool multi, bool simplify)
2286 {
2287   using namespace aspeller;
2288   CachePtr<Language> lang;
2289   find_language(*options);
2290   PosibErr<Language *> res = new_language(*options);
2291   if (res.has_err()) {print_error(res.get_err()->mesg); exit(1);}
2292   lang.reset(res.data);
2293   Conv iconv(setup_conv(options, lang));
2294   Conv oconv(setup_conv(lang, options));
2295   String word, buf;
2296   ObjStack exp_buf;
2297   WordAff * exp_list;
2298   GuessInfo gi;
2299   CML_Table table;
2300   ObjStack table_buf;
2301
2302   // add words to dictionary
2303   while (CIN.getline(word)) {
2304     buf = word;
2305     char * w = iconv(buf.mstr(), buf.size());
2306     char * af = strchr(w, '/');
2307     size_t s;
2308     if (af != 0) {
2309       s = af - w;
2310       *af++ = '\0';
2311     } else {
2312       s = strlen(w);
2313       af = w + s;
2314     }
2315     exp_buf.reset();
2316     exp_list = lang->expand(w, af, exp_buf);
2317     for (WordAff * q = exp_list; q; q = q->next) {
2318       if (!table.have(q->word)) // since it is a multi hash table
2319         table.insert(CML_Entry(table_buf.dup(q->word))).first;
2320     }
2321   }
2322
2323   // Now try to munch each word in the dictionary.  This will also
2324   // group the base words into disjoint sets based on there expansion.
2325   CML_Table::iterator p = table.begin();
2326   CML_Table::iterator end = table.end();
2327   String flags;
2328   for (; p != end; ++p)
2329   {
2330     lang->munch(p->word, &gi, false);
2331     const aspeller::CheckInfo * ci = gi.head;
2332     while (ci)
2333     { {
2334       // check if the base word is in the dictionary
2335       CML_Table::iterator b = table.find(ci->word);
2336       if (b == table.end()) goto cont;
2337
2338       // check if all the words once expanded are in the dictionary
2339       char flags[2];
2340       assert(!(ci->pre_flag && ci->suf_flag));
2341       if      (ci->pre_flag != 0) flags[0] = ci->pre_flag;
2342       else if (ci->suf_flag != 0) flags[0] = ci->suf_flag;
2343       flags[1] = '\0';
2344       exp_buf.reset();
2345       exp_list = lang->expand(ci->word, flags, exp_buf);
2346       for (WordAff * q = exp_list; q; q = q->next) {
2347         if (!table.have(q->word)) goto cont;
2348       }
2349
2350       // all the expansions are in the dictionary now add the affix to
2351       // the base word and figure out which disjoint set it belongs to
2352       add_affix(b, flags[0], ci->pre_flag != 0);
2353       CML_Entry * bs = find_set(&*b);
2354       for (WordAff * q = exp_list; q; q = q->next) {
2355         CML_Table::iterator w = table.find(q->word);
2356         assert(b != table.end());
2357         CML_Entry * ws = find_set(&*w);
2358         link(bs,ws);
2359       }
2360
2361     } cont:
2362       ci = ci->next;
2363     }
2364   }
2365
2366   // If a base word has both prefixes and suffixes try to combine them.
2367   // This can lead to multiple entries for the same base word.  If "multi"
2368   // is true than include all the entries.  Otherwise, only include the
2369   // one with the largest number of expansions.  This is a greedy choice
2370   // that may not be optimal, but is close to it.
2371   p = table.begin();
2372   String pre,suf;
2373   CML_Entry * extras = 0;
2374   for (; p != end; ++p)
2375   {
2376     pre.clear(); suf.clear();
2377     if (!p->aff) continue;
2378     char * s = p->aff;
2379     while (*s != '/') pre += *s++;
2380     ++s;
2381     while (*s != '\0') suf += *s++;
2382     if (pre.empty()) {
2383
2384       strcpy(p->aff, suf.str());
2385
2386     } else if (suf.empty()) {
2387
2388       strcpy(p->aff, pre.str());
2389
2390     } else {
2391
2392       // Try all possible combinations and keep the ones which expand
2393       // to legal words.
2394
2395       PreSufList cross,tmp1,tmp2;
2396       PreSuf * ps = 0;
2397
2398       for (String::iterator pi = pre.begin(); pi != pre.end(); ++pi) {
2399         String::iterator si = suf.begin();
2400         while (si != suf.end()) { {
2401           char flags[3] = {*pi, *si, '\0'};
2402           exp_buf.reset();
2403           exp_list = lang->expand(p->word, flags, exp_buf);
2404           for (WordAff * q = exp_list; q; q = q->next) {
2405             if (!table.have(q->word)) goto cont2;
2406           }
2407           ps = new PreSuf;
2408           ps->pre += *pi;
2409           ps->suf += *si;
2410           cross.add(ps);
2411         } cont2:
2412           ++si;
2413         }
2414       }
2415
2416       // Now combine the legal cross pairs with other ones when
2417       // possible.
2418
2419       // final res = [ (pre, []) ([],suf),
2420       //               (cross | combine first | combine second)
2421       //               (cross | combine second | combine first)
2422       //             | combine first
2423       //             | combine second
2424       //
2425       // combine first [(ab, c) (ab, d) (c, de) (c, ef)]
2426       //   =  [(ab, cd), (c, def)]
2427
2428       combine(cross, tmp1, 0);
2429       combine(tmp1,  tmp2, 1);
2430       tmp1.clear();
2431
2432       combine(cross, tmp1, 1);
2433       combine(tmp1,  tmp2, 0);
2434       tmp1.clear();
2435
2436       cross.clear();
2437
2438       ps = new PreSuf;
2439       ps->pre = pre;
2440       tmp2.add(ps);
2441       ps = new PreSuf;
2442       ps->suf = suf;
2443       tmp2.add(ps);
2444
2445       combine(tmp2, tmp1, 0);
2446       combine(tmp1, cross, 1);
2447
2448       if (multi) {
2449
2450         // It is OK to have multiple entries with the same base word
2451         // so use them all.
2452
2453         ps = cross.head;
2454         assert(ps);
2455         memcpy(p->aff, ps->pre.data(), ps->pre.size());
2456         memcpy(p->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2457
2458         ps = ps->next;
2459         CML_Entry * bs = find_set(&*p);
2460         for (; ps; ps = ps->next) {
2461
2462           CML_Entry * tmp = new CML_Entry;
2463           tmp->word = p->word;
2464           tmp->aff = (char *)malloc(ps->pre.size() + ps->suf.size() + 1);
2465           memcpy(tmp->aff, ps->pre.data(), ps->pre.size());
2466           memcpy(tmp->aff + ps->pre.size(), ps->suf.str(), ps->suf.size() + 1);
2467
2468           tmp->parent = bs;
2469
2470           tmp->next = extras;
2471           extras = tmp;
2472         }
2473
2474       } else {
2475
2476         // chose the one which has the largest number of expansions
2477
2478         int max_exp = 0;
2479         PreSuf * best = 0;
2480         String flags;
2481
2482         for (ps = cross.head; ps; ps = ps->next) {
2483           flags  = ps->pre;
2484           flags += ps->suf;
2485           exp_buf.reset();
2486           exp_list = lang->expand(p->word, flags, exp_buf);
2487           int c = 0;
2488           for (WordAff * q = exp_list; q; q = q->next) ++c;
2489           if (c > max_exp) {max_exp = c; best = ps;}
2490         }
2491
2492         memcpy(p->aff, best->pre.data(), best->pre.size());
2493         memcpy(p->aff + best->pre.size(), best->suf.str(), best->suf.size() + 1);
2494       }
2495     }
2496   }
2497
2498   while (extras) {
2499     CML_Entry * tmp = extras;
2500     extras = extras->next;
2501     tmp->next = 0;
2502     table.insert(*tmp);
2503     delete tmp;
2504   }
2505
2506   // Create a linked list for each disjoint set
2507   p = table.begin();
2508   for (; p != end; ++p)
2509   {
2510     p->rank = -1;
2511     CML_Entry * bs = find_set(&*p);
2512     if (bs != &*p) {
2513       p->next = bs->next;
2514       bs->next = &*p;
2515     }
2516   }
2517
2518   // Now process each disjoint set independently
2519   p = table.begin();
2520   for (; p != end; ++p)
2521   {
2522     if (p->parent) continue;
2523
2524     Vector<CML_Table::iterator> entries;
2525     Vector<Expansion> expansions;
2526     Vector<Expansion *> to_keep;
2527     std::vector<bool> to_keep_exp;
2528     Vector<Expansion *> working;
2529     Vector<unsigned> to_remove;
2530
2531     // First assign numbers to each unique word.  The rank field is
2532     // no longer used so use it to store the number.
2533     for (CML_Entry * q = &*p; q; q = q->next) {
2534       CML_Table::iterator e = table.find(q->word);
2535       if (e->rank == -1) {
2536         e->rank = entries.size();
2537         q->rank = entries.size();
2538         entries.push_back(e);
2539       } else {
2540         q->rank = e->rank;
2541       }
2542       if (q->aff) {
2543         Expansion tmp;
2544         tmp.word = q->word;
2545         tmp.aff  = q->aff;
2546         expansions.push_back(tmp);
2547       }
2548     }
2549
2550     to_keep_exp.resize(entries.size());
2551     //for (int i = 0; i != to_keep_exp.size(); ++i) {
2552     //  printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2553     //}
2554
2555     // Store the expansion of each base word in a bit vector and
2556     // add it to the working set
2557     for (Vector<Expansion>::iterator q = expansions.begin();
2558          q != expansions.end();
2559          ++q)
2560     {
2561       q->exp.resize(entries.size());
2562       exp_buf.reset();
2563       exp_list = lang->expand(q->word, q->aff, exp_buf);
2564       for (WordAff * i = exp_list; i; i = i->next) {
2565         CML_Table::iterator e = table.find(i->word);
2566         assert(0 <= e->rank && e->rank < (int)entries.size());
2567         q->exp[e->rank] = true;
2568       }
2569       q->orig_exp = q->exp;
2570       working.push_back(&*q);
2571     }
2572
2573     unsigned prev_working_size = INT_MAX;
2574
2575     // This loop will repeat until the working set is empty.  This
2576     // will produce optimum results in most cases.  Non optimum
2577     // results may be possible if step (4) is necessary, but in
2578     // practice this step is rarly necessary.
2579     do {
2580       prev_working_size = working.size();
2581
2582       // Sort the list based on WorkingLt.  This is necessary every
2583       // time since the expansion list can change.
2584       std::sort(working.begin(), working.end(), WorkingLt());
2585
2586       // (1) Eliminate any elements which are a subset of others
2587       for (unsigned i = 0; i != working.size(); ++i) {
2588         if (!working[i]) continue;
2589         for (unsigned j = i + 1; j != working.size(); ++j) {
2590           if (!working[j]) continue;
2591           if (subset(working[j]->exp, working[i]->exp)) {
2592             working[j] = 0;
2593           }
2594         }
2595       }
2596
2597       // (2) Move any elements which expand to unique entree
2598       // into the to_keep list
2599       to_remove.clear();
2600       for (unsigned i = 0; i != entries.size(); ++i) {
2601         int n = -1;
2602         for (unsigned j = 0; j != working.size(); ++j) {
2603           if (working[j] && working[j]->exp[i]) {
2604             if (n == -1) n = j;
2605             else         n = -2;
2606           }
2607         }
2608         if (n >= 0) to_remove.push_back(n);
2609       }
2610       for (unsigned i = 0; i != to_remove.size(); ++i) {
2611         unsigned n = to_remove[i];
2612         if (!working[n]) continue;
2613         to_keep.push_back(working[n]);
2614         merge(to_keep_exp, working[n]->exp);
2615         working[n] = 0;
2616       }
2617
2618       // (3) Eliminate any elements which are a subset of all the
2619       // elements in the to_keep list
2620       for (unsigned i = 0; i != working.size(); ++i) {
2621         if (working[i] && subset(working[i]->exp, to_keep_exp)) {
2622           working[i] = 0;
2623         }
2624       }
2625
2626       // Compact the working list
2627       {
2628         int i = 0, j = 0;
2629         while (j != (int)working.size()) {
2630           if (working[j]) {
2631             working[i] = working[j];
2632             ++i;
2633           }
2634           ++j;
2635         }
2636         working.resize(i);
2637       }
2638
2639       // (4) If none of the entries in working have been removed via
2640       // the above methods then make a greedy choice and move the
2641       // first element into the to_keep list.
2642       if (working.size() > 0 && working.size() == prev_working_size)
2643       {
2644         to_keep.push_back(working[0]);
2645         //CERR.printf("Making greedy choice! Choosing %s/%s.\n",
2646         //            working[0]->word, working[0]->aff);
2647         merge(to_keep_exp, working[0]->exp);
2648         working.erase(working.begin(), working.begin() + 1);
2649       }
2650
2651       // (5) Trim the expansion list for any elements left in the
2652       // working set by removing the expansions that already exist in
2653       // the to_keep list
2654       for (unsigned i = 0; i != working.size(); ++i) {
2655         purge(working[i]->exp, to_keep_exp);
2656       }
2657
2658     } while (working.size() > 0);
2659
2660     if (simplify) {
2661
2662       // Remove unnecessary flags.  A flag is unnecessary if it does
2663       // does not expand to any new words, that is words that are not
2664       // already covered by an earlier entries in the list.
2665
2666       for (unsigned i = 0; i != to_keep.size(); ++i) {
2667         to_keep[i]->exp = to_keep[i]->orig_exp;
2668       }
2669
2670       std::sort(to_keep.begin(), to_keep.end(), WorkingLt());
2671
2672       std::vector<bool> tally(entries.size());
2673       std::vector<bool> backup(entries.size());
2674       std::vector<bool> working(entries.size());
2675       String flags;
2676
2677       for (unsigned i = 0; i != to_keep.size(); ++i) {
2678
2679         backup = tally;
2680
2681         merge(tally, to_keep[i]->exp);
2682
2683         String flags_to_keep = to_keep[i]->aff;
2684         bool something_changed;
2685         do {
2686           something_changed = false;
2687           for (unsigned j = 0; j != flags_to_keep.size(); ++j) {
2688             flags.assign(flags_to_keep.data(), j);
2689             flags.append(flags_to_keep.data(j+1),
2690                          flags_to_keep.size() - (j+1));
2691             working = backup;
2692             exp_buf.reset();
2693             exp_list = lang->expand(to_keep[i]->word, flags, exp_buf);
2694             for (WordAff * q = exp_list; q; q = q->next) {
2695               CML_Table::iterator e = table.find(q->word);
2696               working[e->rank] = true;
2697             }
2698             if (working == tally) {
2699               flags_to_keep = flags;
2700               something_changed = true;
2701               break;
2702             }
2703           }
2704         } while (something_changed);
2705
2706         if (flags_to_keep != to_keep[i]->aff) {
2707           memcpy(to_keep[i]->aff, flags_to_keep.str(), flags_to_keep.size() + 1);
2708         }
2709       }
2710
2711     }
2712
2713     // Finally print the resulting list
2714
2715     //printf("XXX %d %d\n", to_keep.size(), to_keep_exp.size());
2716     //for (int i = 0; i != to_keep_exp.size(); ++i) {
2717     //  printf(">>> %d %d\n", i, (int)to_keep_exp[i]);
2718     //}
2719
2720     for (unsigned i = 0; i != to_keep.size(); ++i) {
2721       COUT << oconv(to_keep[i]->word);
2722       if (to_keep[i]->aff[0]) {
2723         COUT << '/';
2724         COUT << oconv(to_keep[i]->aff);
2725       }
2726       COUT << '\n';
2727     }
2728     for (unsigned i = 0; i != to_keep_exp.size(); ++i) {
2729       if (!to_keep_exp[i]) {
2730         assert(!entries[i]->aff);
2731         COUT.printf("%s\n", oconv(entries[i]->word));
2732       }
2733     }
2734   }
2735
2736   p = table.begin();
2737   for (; p != end; ++p)
2738   {
2739     if (p->aff) free(p->aff);
2740     p->aff = 0;
2741   }
2742 }
2743
2744
2745 //////////////////////////
2746 //
2747 // dump affix
2748 //
2749
2750 void dump_affix()
2751 {
2752   FStream in;
2753   EXIT_ON_ERR(aspeller::open_affix_file(*options, in));
2754
2755   String line;
2756   while (in.getline(line))
2757     COUT << line << '\n';
2758 }
2759
2760
2761
2762 ///////////////////////////////////////////////////////////////////////
2763
2764
2765 ///////////////////////////
2766 //
2767 // print_help
2768 //
2769
2770 void print_help_line(char abrv, char dont_abrv, const char * name,
2771                      KeyInfoType type, const char * desc, bool no_dont = false)
2772 {
2773   String command;
2774   if (abrv != '\0') {
2775     command += '-';
2776     command += abrv;
2777     if (dont_abrv != '\0') {
2778       command += '|';
2779       command += '-';
2780       command += dont_abrv;
2781     }
2782     command += ',';
2783   }
2784   command += "--";
2785   if (type == KeyInfoBool && !no_dont) command += "[dont-]";
2786   if (type == KeyInfoList) command += "add|rem-";
2787   command += name;
2788   if (type == KeyInfoString || type == KeyInfoList)
2789     command += "=<str>";
2790   if (type == KeyInfoInt)
2791     command += "=<int>";
2792   const char * tdesc = _(desc);
2793   printf("  %-27s %s\n", command.c_str(), tdesc); // FIXME: consider word wrapping
2794 }
2795
2796 namespace acommon {
2797   PosibErr<ConfigModule *> get_dynamic_filter(Config * config, ParmStr value);
2798 }
2799
2800 static const char * usage_text[] =
2801 {
2802   /* TRANSLATORS: These should all be formated to fit in 80 column or
2803      less */
2804   N_("Usage: aspell [options] <command>"),
2805   N_("<command> is one of:"),
2806   N_("  -?|usage         display a brief usage message"),
2807   N_("  help             display a detailed help message"),
2808   N_("  -c|check <file>  to check a file"),
2809   N_("  -a|pipe          \"ispell -a\" compatibility mode"),
2810   N_("  [dump] config    dumps the current configuration to stdout"),
2811   N_("  config <key>     prints the current value of an option"),
2812   N_("  [dump] dicts | filters | modes"),
2813   N_("    lists available dictionaries / filters / filter modes"),
2814   N_("[options] is any of the following:")
2815 };
2816 static const unsigned usage_text_size = sizeof(usage_text)/sizeof(const char *);
2817
2818 static const char * help_text[] =
2819 {
2820   usage_text[0],
2821   "",
2822   usage_text[1],
2823   usage_text[2],
2824   usage_text[3],
2825   usage_text[4],
2826   usage_text[5],
2827   N_("  list             produce a list of misspelled words from standard input"),
2828   usage_text[6],
2829   usage_text[7],
2830   N_("  soundslike       returns the sounds like equivalent for each word entered"),
2831   N_("  munch            generate possible root words and affixes"),
2832   N_("  expand [1-4]     expands affix flags"),
2833   N_("  clean [strict]   cleans a word list so that every line is a valid word"),
2834   //N_("  filter           passes standard input through filters"),
2835   N_("  -v|version       prints a version line"),
2836   N_("  munch-list [simple] [single|multi] [keep]"),
2837   N_("    reduce the size of a word list via affix compression"),
2838   N_("  conv <from> <to> [<norm-form>]"),
2839   N_("    converts from one encoding to another"),
2840   N_("  norm (<norm-map> | <from> <norm-map> <to>) [<norm-form>]"),
2841   N_("    perform Unicode normalization"),
2842   usage_text[8],
2843   usage_text[9],
2844   N_("  dump|create|merge master|personal|repl [<name>]"),
2845   N_("    dumps, creates or merges a master, personal, or replacement dictionary."),
2846   "",
2847   /* TRANSLATORS: "none", "internal" and "strict" are literal values
2848      and should not be translated. */
2849   N_("  <norm-form>      normalization form to use, either none, internal, or strict"),
2850   "",
2851   usage_text[10],
2852   ""
2853 };
2854 static const unsigned help_text_size = sizeof(help_text)/sizeof(const char *);
2855
2856 void print_help (bool verbose) {
2857   load_all_filters(options);
2858   if (verbose) {
2859     printf(_("\n"
2860              "Aspell %s.  Copyright 2000-2011 by Kevin Atkinson.\n"
2861              "\n"), aspell_version_string());
2862     for (unsigned i = 0; i < help_text_size; ++i)
2863       puts(gt_(help_text[i]));
2864   } else {
2865     for (unsigned i = 0; i < usage_text_size; ++i)
2866       puts(gt_(usage_text[i]));
2867   }
2868   StackPtr<KeyInfoEnumeration> els(options->possible_elements(true,false));
2869   const KeyInfo * k;
2870   while (k = els->next(), k) {
2871     if (k->desc == 0 || k->flags & KEYINFO_HIDDEN) continue;
2872     if (!verbose && !(k->flags & KEYINFO_COMMON)) continue;
2873     const PossibleOption * o = find_option(k->name);
2874     const char * name = k->name;
2875     print_help_line(o->abrv,
2876                     strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2877                     name, k->type, k->desc);
2878     if (verbose && strcmp(name, "mode") == 0) {
2879       for (const ModeAbrv * j = mode_abrvs;
2880            j != mode_abrvs_end;
2881            ++j)
2882       {
2883         print_help_line(j->abrv, '\0', j->mode, KeyInfoBool, j->desc, true);
2884       }
2885     }
2886   }
2887
2888   if (verbose) {
2889     //
2890     putchar('\n');
2891     putchar('\n');
2892     puts(
2893       _("Available Dictionaries:\n"
2894         "    Dictionaries can be selected directly via the \"-d\" or \"master\"\n"
2895         "    option.  They can also be selected indirectly via the \"lang\",\n"
2896         "    \"variety\", and \"size\" options.\n"));
2897
2898     const DictInfoList * dlist = get_dict_info_list(options);
2899
2900     StackPtr<DictInfoEnumeration> dels(dlist->elements());
2901
2902     const DictInfo * entry;
2903
2904     while ( (entry = dels->next()) != 0)
2905     {
2906       printf("  %s\n", entry->name);
2907     }
2908
2909
2910     //
2911     putchar('\n');
2912     putchar('\n');
2913     fputs(
2914       _("Available Filters (and associated options):\n"
2915         "    Filters can be added or removed via the \"filter\" option.\n"),
2916       stdout);
2917     for (Vector<ConfigModule>::const_iterator m = options->filter_modules.begin();
2918          m != options->filter_modules.end();
2919          ++m)
2920     {
2921       printf(_("\n  %s filter: %s\n"), m->name, gt_(m->desc));
2922       for (k = m->begin; k != m->end; ++k) {
2923         const PossibleOption * o = find_option(k->name);
2924         const char * name = k->name;
2925         const KeyInfo * ok = options->keyinfo(name + 2);
2926         if (k == ok) name += 2;
2927         print_help_line(o->abrv,
2928                         strncmp((o+1)->name, "dont-", 5) == 0 ? (o+1)->abrv : '\0',
2929                         name, k->type, k->desc);
2930       }
2931     }
2932
2933     //
2934     putchar('\n');
2935     putchar('\n');
2936     puts(
2937       /* TRANSLATORS: This should be formated to fit in 80 column or less */
2938       _("Available Filter Modes:\n"
2939         "    Filter Modes are reconfigured combinations of filters optimized for\n"
2940         "    files of a specific type. A mode is selected via the \"mode\" option.\n"
2941         "    This will happen implicitly if Aspell is able to identify the file\n"
2942         "    type from the extension, and possibility the contents, of the file.\n"));
2943
2944     EXIT_ON_ERR_SET(available_filter_modes(options), StringPairEnumeration *, els);
2945     StringPair sp;
2946     while (!els->at_end()) {
2947       sp = els->next();
2948       printf("  %-14s %s\n", sp.first, gt_(sp.second));
2949     }
2950     delete els;
2951   }
2952 }
2953