src/ispell/correct.cpp

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * correct.c - Routines to manage the higher-level aspects of spell-checking
   4  *
   5  * This code originally resided in ispell.c, but was moved here to keep
   6  * file sizes smaller.
   7  *
   8  * Copyright (c), 1983, by Pace Willisson
   9  *
  10  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
  11  * All rights reserved.
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  *
  17  * 1. Redistributions of source code must retain the above copyright
  18  *    notice, this list of conditions and the following disclaimer.
  19  * 2. Redistributions in binary form must reproduce the above copyright
  20  *    notice, this list of conditions and the following disclaimer in the
  21  *    documentation and/or other materials provided with the distribution.
  22  * 3. All modifications to the source code must be clearly marked as
  23  *    such.  Binary redistributions based on modified source code
  24  *    must be clearly marked as modified versions in the documentation
  25  *    and/or other materials provided with the distribution.
  26  * 4. All advertising materials mentioning features or use of this software
  27  *    must display the following acknowledgment:
  28  *      This product includes software developed by Geoff Kuenning and
  29  *      other unpaid contributors.
  30  * 5. The name of Geoff Kuenning may not be used to endorse or promote
  31  *    products derived from this software without specific prior
  32  *    written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
  35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  37  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
  38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  44  * SUCH DAMAGE.
  45  */
  46
  47 /*
  48  * $Log$
  49  * Revision 1.4  2003/08/14 17:51:26  dom
  50  * update license - exception clause should be Lesser GPL
  51  *
  52  * Revision 1.3  2003/07/28 20:40:25  dom
  53  * fix up the license clause, further win32-registry proof some directory getting functions
  54  *
  55  * Revision 1.2  2003/07/16 22:52:35  dom
  56  * LGPL + exception license
  57  *
  58  * Revision 1.1  2003/07/15 01:15:04  dom
  59  * ispell enchant backend
  60  *
  61  * Revision 1.2  2003/01/29 05:50:11  hippietrail
  62  *
  63  * Fixed my mess in EncodingManager.
  64  * Changed many C casts to C++ casts.
  65  *
  66  * Revision 1.1  2003/01/24 05:52:31  hippietrail
  67  *
  68  * Refactored ispell code. Old ispell global variables had been put into
  69  * an allocated structure, a pointer to which was passed to many functions.
  70  * I have now made all such functions and variables private members of the
  71  * ISpellChecker class. It was C OO, now it's C++ OO.
  72  *
  73  * I've fixed the makefiles and tested compilation but am unable to test
  74  * operation. Please back out my changes if they cause problems which
  75  * are not obvious or easy to fix.
  76  *
  77  * Revision 1.7  2002/09/19 05:31:15  hippietrail
  78  *
  79  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
  80  * K&R function declarations removed, converted to Doxygen style comments
  81  * where possible.  No code has been changed (I hope).  Compiles for me but
  82  * unable to test.
  83  *
  84  * Revision 1.6  2002/09/17 03:03:28  hippietrail
  85  *
  86  * After seeking permission on the developer list I've reformatted all the
  87  * spelling source which seemed to have parts which used 2, 3, 4, and 8
  88  * spaces for tabs.  It should all look good with our standard 4-space
  89  * tabs now.
  90  * I've concentrated just on indentation in the actual code.  More prettying
  91  * could be done.
  92  * * NO code changes were made *
  93  *
  94  * Revision 1.5  2002/09/13 17:20:12  mpritchett
  95  * Fix more warnings for Linux build
  96  *
  97  * Revision 1.4  2002/03/06 08:27:16  fjfranklin
  98  * o Only activate compound handling when the hash file says so (Per Larsson)
  99  *
 100  * Revision 1.3  2001/05/14 09:52:50  hub
 101  * Removed newMain.c from GNUmakefile.am
 102  *
 103  * C++ comments are not C comment. Changed to C comments
 104  *
 105  * Revision 1.2  2001/05/12 16:05:42  thomasf
 106  * Big pseudo changes to ispell to make it pass around a structure rather
 107  * than rely on all sorts of gloabals willy nilly here and there.  Also
 108  * fixed our spelling class to work with accepting suggestions once more.
 109  * This code is dirty, gross and ugly (not to mention still not supporting
 110  * multiple hash sized just yet) but it works on my machine and will no
 111  * doubt break other machines.
 112  *
 113  * Revision 1.1  2001/04/15 16:01:24  tomas_f
 114  * moving to spell/xp
 115  *
 116  * Revision 1.2  1999/10/05 16:17:28  paul
 117  * Fixed build, and other tidyness.
 118  * Spell dialog enabled by default, with keyboard binding of F7.
 119  *
 120  * Revision 1.1  1999/09/29 23:33:32  justin
 121  * Updates to the underlying ispell-based code to support suggested corrections.
 122  *
 123  * Revision 1.59  1995/08/05  23:19:43  geoff
 124  * Fix a bug that caused offsets for long lines to be confused if the
 125  * line started with a quoting uparrow.
 126  *
 127  * Revision 1.58  1994/11/02  06:56:00  geoff
 128  * Remove the anyword feature, which I've decided is a bad idea.
 129  *
 130  * Revision 1.57  1994/10/26  05:12:39  geoff
 131  * Try boundary characters when inserting or substituting letters, except
 132  * (naturally) at word boundaries.
 133  *
 134  * Revision 1.56  1994/10/25  05:46:30  geoff
 135  * Fix an assignment inside a conditional that could generate spurious
 136  * warnings (as well as being bad style).  Add support for the FF_ANYWORD
 137  * option.
 138  *
 139  * Revision 1.55  1994/09/16  04:48:24  geoff
 140  * Don't pass newlines from the input to various other routines, and
 141  * don't assume that those routines leave the input unchanged.
 142  *
 143  * Revision 1.54  1994/09/01  06:06:41  geoff
 144  * Change erasechar/killchar to uerasechar/ukillchar to avoid
 145  * shared-library problems on HP systems.
 146  *
 147  * Revision 1.53  1994/08/31  05:58:38  geoff
 148  * Add code to handle extremely long lines in -a mode without splitting
 149  * words or reporting incorrect offsets.
 150  *
 151  * Revision 1.52  1994/05/25  04:29:24  geoff
 152  * Fix a bug that caused line widths to be calculated incorrectly when
 153  * displaying lines containing tabs.  Fix a couple of places where
 154  * characters were sign-extended incorrectly, which could cause 8-bit
 155  * characters to be displayed wrong.
 156  *
 157  * Revision 1.51  1994/05/17  06:44:05  geoff
 158  * Add support for controlled compound formation and the COMPOUNDONLY
 159  * option to affix flags.
 160  *
 161  * Revision 1.50  1994/04/27  05:20:14  geoff
 162  * Allow compound words to be formed from more than two components
 163  *
 164  * Revision 1.49  1994/04/27  01:50:31  geoff
 165  * Add support to correctly capitalize words generated as a result of a
 166  * missing-space suggestion.
 167  *
 168  * Revision 1.48  1994/04/03  23:23:02  geoff
 169  * Clean up the code in missingspace() to be a bit simpler and more
 170  * efficient.
 171  *
 172  * Revision 1.47  1994/03/15  06:24:23  geoff
 173  * Fix the +/-/~ commands to be independent.  Allow the + command to
 174  * receive a suffix which is a deformatter type (currently hardwired to
 175  * be either tex or nroff/troff).
 176  *
 177  * Revision 1.46  1994/02/21  00:20:03  geoff
 178  * Fix some bugs that could cause bad displays in the interaction between
 179  * TeX parsing and string characters.  Show_char now will not overrun
 180  * the inverse-video display area by accident.
 181  *
 182  * Revision 1.45  1994/02/14  00:34:51  geoff
 183  * Fix correct to accept length parameters for ctok and itok, so that it
 184  * can pass them to the to/from ichar routines.
 185  *
 186  * Revision 1.44  1994/01/25  07:11:22  geoff
 187  * Get rid of all old RCS log lines in preparation for the 3.1 release.
 188  *
 189  */
 190
 191 #include <stdlib.h>
 192 #include <string.h>
 193 #include <ctype.h>
 194 #include "ispell_checker.h"
 195 #include "msgs.h"
 196
 197 /*
 198 extern void upcase P ((ichar_t * string));
 199 extern void lowcase P ((ichar_t * string));
 200 extern ichar_t * strtosichar P ((char * in, int canonical));
 201
 202 int compoundflag = COMPOUND_CONTROLLED;
 203 */
 204
 205 /*
 206  * \param a
 207  * \param b
 208  * \param canonical NZ for canonical string chars
 209  *
 210  * \return
 211  */
 212 int
 213 ISpellChecker::casecmp (char *a, char *b, int canonical)
 214 {
 215     register ichar_t *  ap;
 216     register ichar_t *  bp;
 217     ichar_t             inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
 218     ichar_t             intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
 219
 220     strtoichar (inta, a, sizeof inta, canonical);
 221     strtoichar (intb, b, sizeof intb, canonical);
 222     for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++)
 223         {
 224                 if (*ap != *bp)
 225             {
 226                         if (*bp == '\0')
 227                                 return m_hashheader.sortorder[*ap];
 228                         else if (mylower (*ap))
 229                         {
 230                                 if (mylower (*bp)  ||  mytoupper (*ap) != *bp)
 231                                         return static_cast<int>(m_hashheader.sortorder[*ap])
 232                                           - static_cast<int>(m_hashheader.sortorder[*bp]);
 233                         }
 234                         else
 235                         {
 236                                 if (myupper (*bp)  ||  mytolower (*ap) != *bp)
 237                                         return static_cast<int>(m_hashheader.sortorder[*ap])
 238                                           - static_cast<int>(m_hashheader.sortorder[*bp]);
 239                         }
 240             }
 241         }
 242     if (*bp != '\0')
 243                 return -static_cast<int>(m_hashheader.sortorder[*bp]);
 244     for (ap = inta, bp = intb;  *ap;  ap++, bp++)
 245         {
 246                 if (*ap != *bp)
 247             {
 248                         return static_cast<int>(m_hashheader.sortorder[*ap])
 249                           - static_cast<int>(m_hashheader.sortorder[*bp]);
 250             }
 251         }
 252     return 0;
 253 }
 254
 255 /*
 256  * \param word
 257  */
 258 void
 259 ISpellChecker::makepossibilities (ichar_t *word)
 260 {
 261     register int        i;
 262
 263     for (i = 0; i < MAXPOSSIBLE; i++)
 264         m_possibilities[i][0] = 0;
 265     m_pcount = 0;
 266     m_maxposslen = 0;
 267     m_easypossibilities = 0;
 268
 269 #ifndef NO_CAPITALIZATION_SUPPORT
 270     wrongcapital (word);
 271 #endif
 272
 273 /*
 274  * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
 275  * page 363, the correct order for this is:
 276  * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
 277  * thus, it was exactly backwards in the old version. -- PWP
 278  */
 279
 280     if (m_pcount < MAXPOSSIBLE)
 281                 missingletter (word);           /* omission */
 282     if (m_pcount < MAXPOSSIBLE)
 283                 transposedletter (word);        /* transposition */
 284     if (m_pcount < MAXPOSSIBLE)
 285                 extraletter (word);             /* insertion */
 286     if (m_pcount < MAXPOSSIBLE)
 287                 wrongletter (word);             /* substitution */
 288
 289     if ((m_hashheader.compoundflag != COMPOUND_ANYTIME)  &&
 290                   m_pcount < MAXPOSSIBLE)
 291                 missingspace (word);    /* two words */
 292
 293 }
 294
 295 /*
 296  * \param word
 297  *
 298  * \return
 299  */
 300 int
 301 ISpellChecker::insert (ichar_t *word)
 302 {
 303     register int        i;
 304     register char *     realword;
 305
 306     realword = ichartosstr (word, 0);
 307     for (i = 0; i < m_pcount; i++)
 308         {
 309                 if (strcmp (m_possibilities[i], realword) == 0)
 310                         return (0);
 311         }
 312
 313     strcpy (m_possibilities[m_pcount++], realword);
 314     i = strlen (realword);
 315     if (i > m_maxposslen)
 316                 m_maxposslen = i;
 317     if (m_pcount >= MAXPOSSIBLE)
 318                 return (-1);
 319     else
 320                 return (0);
 321 }
 322
 323 #ifndef NO_CAPITALIZATION_SUPPORT
 324 /*
 325  * \param word
 326  */
 327 void
 328 ISpellChecker::wrongcapital (ichar_t *word)
 329 {
 330     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN];
 331
 332     /*
 333     ** When the third parameter to "good" is nonzero, it ignores
 334     ** case.  If the word matches this way, "ins_cap" will recapitalize
 335     ** it correctly.
 336     */
 337     if (good (word, 0, 1, 0, 0))
 338         {
 339                 icharcpy (newword, word);
 340                 upcase (newword);
 341                 ins_cap (newword, word);
 342         }
 343 }
 344 #endif
 345
 346 /*
 347  * \param word
 348  */
 349 void
 350 ISpellChecker::wrongletter (ichar_t *word)
 351 {
 352     register int        i;
 353     register int        j;
 354     register int        n;
 355     ichar_t             savechar;
 356     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN];
 357
 358     n = icharlen (word);
 359     icharcpy (newword, word);
 360 #ifndef NO_CAPITALIZATION_SUPPORT
 361     upcase (newword);
 362 #endif
 363
 364     for (i = 0; i < n; i++)
 365         {
 366                 savechar = newword[i];
 367                 for (j=0; j < m_Trynum; ++j)
 368                 {
 369                         if (m_Try[j] == savechar)
 370                                 continue;
 371                         else if (isboundarych (m_Try[j])  &&  (i == 0  ||  i == n - 1))
 372                                 continue;
 373                         newword[i] = m_Try[j];
 374                         if (good (newword, 0, 1, 0, 0))
 375                         {
 376                                 if (ins_cap (newword, word) < 0)
 377                                         return;
 378                         }
 379                 }
 380                 newword[i] = savechar;
 381         }
 382 }
 383
 384 /*
 385  * \param word
 386  */
 387 void
 388 ISpellChecker::extraletter (ichar_t *word)
 389 {
 390     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN];
 391     register ichar_t *  p;
 392     register ichar_t *  r;
 393
 394     if (icharlen (word) < 2)
 395                 return;
 396
 397     icharcpy (newword, word + 1);
 398     for (p = word, r = newword;  *p != 0;  )
 399         {
 400                 if (good (newword, 0, 1, 0, 0))
 401                 {
 402                         if (ins_cap (newword, word) < 0)
 403                                 return;
 404                 }
 405                 *r++ = *p++;
 406         }
 407 }
 408
 409 /*
 410  * \param word
 411  */
 412 void
 413 ISpellChecker::missingletter (ichar_t *word)
 414 {
 415     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
 416     register ichar_t *  p;
 417     register ichar_t *  r;
 418     register int        i;
 419
 420     icharcpy (newword + 1, word);
 421     for (p = word, r = newword;  *p != 0;  )
 422         {
 423                 for (i = 0;  i < m_Trynum;  i++)
 424             {
 425                         if (isboundarych (m_Try[i])  &&  r == newword)
 426                                 continue;
 427                         *r = m_Try[i];
 428                         if (good (newword, 0, 1, 0, 0))
 429                         {
 430                                 if (ins_cap (newword, word) < 0)
 431                                         return;
 432                         }
 433             }
 434                 *r++ = *p++;
 435         }
 436     for (i = 0;  i < m_Trynum;  i++)
 437         {
 438                 if (isboundarych (m_Try[i]))
 439                         continue;
 440                 *r = m_Try[i];
 441                 if (good (newword, 0, 1, 0, 0))
 442                 {
 443                         if (ins_cap (newword, word) < 0)
 444                                 return;
 445                 }
 446         }
 447 }
 448
 449 /*
 450  * \param word
 451  */
 452 void ISpellChecker::missingspace (ichar_t *word)
 453 {
 454     ichar_t             firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
 455     int                 firstno;        /* Index into first */
 456     ichar_t *           firstp;         /* Ptr into current firsthalf word */
 457     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
 458     int                 nfirsthalf;     /* No. words saved in 1st half */
 459     int                 nsecondhalf;    /* No. words saved in 2nd half */
 460     register ichar_t *  p;
 461     ichar_t             secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
 462     int                 secondno;       /* Index into second */
 463
 464     /*
 465     ** We don't do words of length less than 3;  this keeps us from
 466     ** splitting all two-letter words into two single letters.  We
 467     ** also don't do maximum-length words, since adding the space
 468     ** would exceed the size of the "possibilities" array.
 469     */
 470     nfirsthalf = icharlen (word);
 471     if (nfirsthalf < 3  ||  nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
 472                 return;
 473     icharcpy (newword + 1, word);
 474     for (p = newword + 1;  p[1] != '\0';  p++)
 475         {
 476                 p[-1] = *p;
 477                 *p = '\0';
 478                 if (good (newword, 0, 1, 0, 0))
 479                 {
 480                         /*
 481                          * Save_cap must be called before good() is called on the
 482                          * second half, because it uses state left around by
 483                          * good().  This is unfortunate because it wastes a bit of
 484                          * time, but I don't think it's a significant performance
 485                          * problem.
 486                          */
 487                         nfirsthalf = save_cap (newword, word, firsthalf);
 488                         if (good (p + 1, 0, 1, 0, 0))
 489                         {
 490                                 nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
 491                                 for (firstno = 0;  firstno < nfirsthalf;  firstno++)
 492                                 {
 493                                         firstp = &firsthalf[firstno][p - newword];
 494                                         for (secondno = 0;  secondno < nsecondhalf;  secondno++)
 495                                         {
 496                                                 *firstp = ' ';
 497                                                 icharcpy (firstp + 1, secondhalf[secondno]);
 498                                                 if (insert (firsthalf[firstno]) < 0)
 499                                                         return;
 500                                                 *firstp = '-';
 501                                                 if (insert (firsthalf[firstno]) < 0)
 502                                                         return;
 503                                         }
 504                                 }
 505                         }
 506                 }
 507         }
 508 }
 509
 510 /*
 511  * \param word
 512  * \param pfxopts Options to apply to prefixes
 513  */
 514 int
 515 ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
 516 {
 517     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN];
 518     register ichar_t *  p;
 519     register ichar_t    savech;
 520     long                secondcap;      /* Capitalization of 2nd half */
 521
 522     /*
 523     ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
 524     */
 525     if (m_hashheader.compoundflag == COMPOUND_NEVER)
 526                 return 0;
 527     /*
 528     ** Test for a possible compound word (for languages like German that
 529     ** form lots of compounds).
 530     **
 531     ** This is similar to missingspace, except we quit on the first hit,
 532     ** and we won't allow either member of the compound to be a single
 533     ** letter.
 534     **
 535     ** We don't do words of length less than 2 * compoundmin, since
 536     ** both halves must at least compoundmin letters.
 537     */
 538     if (icharlen (word) < 2 * m_hashheader.compoundmin)
 539                 return 0;
 540     icharcpy (newword, word);
 541     p = newword + m_hashheader.compoundmin;
 542     for (  ;  p[m_hashheader.compoundmin - 1] != 0;  p++)
 543         {
 544                 savech = *p;
 545                 *p = 0;
 546                 if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
 547             {
 548                         *p = savech;
 549                         if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
 550                           ||  compoundgood (p, FF_COMPOUNDONLY))
 551                         {
 552                                 secondcap = whatcap (p);
 553                                 switch (whatcap (newword))
 554                                 {
 555                                 case ANYCASE:
 556                                 case CAPITALIZED:
 557                                 case FOLLOWCASE:        /* Followcase can have l.c. suffix */
 558                                         return secondcap == ANYCASE;
 559                                 case ALLCAPS:
 560                                         return secondcap == ALLCAPS;
 561                                 }
 562                         }
 563             }
 564                 else
 565                         *p = savech;
 566         }
 567     return 0;
 568 }
 569
 570 /*
 571  * \param word
 572  */
 573 void
 574 ISpellChecker::transposedletter (ichar_t *word)
 575 {
 576     ichar_t             newword[INPUTWORDLEN + MAXAFFIXLEN];
 577     register ichar_t *  p;
 578     register ichar_t    temp;
 579
 580     icharcpy (newword, word);
 581     for (p = newword;  p[1] != 0;  p++)
 582         {
 583                 temp = *p;
 584                 *p = p[1];
 585                 p[1] = temp;
 586                 if (good (newword, 0, 1, 0, 0))
 587             {
 588                         if (ins_cap (newword, word) < 0)
 589                                 return;
 590             }
 591                 temp = *p;
 592                 *p = p[1];
 593                 p[1] = temp;
 594         }
 595 }
 596
 597 /*!
 598  * Insert one or more correctly capitalized versions of word
 599  *
 600  * \param word
 601  * \param pattern
 602  *
 603  * \return
 604  */
 605 int
 606 ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
 607 {
 608     int                 i;              /* Index into savearea */
 609     int                 nsaved;         /* No. of words saved */
 610     ichar_t             savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
 611
 612     nsaved = save_cap (word, pattern, savearea);
 613     for (i = 0;  i < nsaved;  i++)
 614         {
 615                 if (insert (savearea[i]) < 0)
 616                         return -1;
 617         }
 618     return 0;
 619 }
 620
 621 /*!
 622  * Save one or more correctly capitalized versions of word
 623  *
 624  * \param word Word to save
 625  * \param pattern Prototype capitalization pattern
 626  * \param savearea Room to save words
 627  *
 628  * \return
 629  */
 630 int
 631 ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern,
 632                                         ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
 633 {
 634     int                 hitno;          /* Index into hits array */
 635     int                 nsaved;         /* Number of words saved */
 636     int                 preadd;         /* No. chars added to front of root */
 637     int                 prestrip;       /* No. chars stripped from front */
 638     int                 sufadd;         /* No. chars added to back of root */
 639     int                 sufstrip;       /* No. chars stripped from back */
 640
 641     if (*word == 0)
 642                 return 0;
 643
 644     for (hitno = m_numhits, nsaved = 0;  --hitno >= 0  &&  nsaved < MAX_CAPS;  )
 645         {
 646                 if (m_hits[hitno].prefix)
 647             {
 648                         prestrip = m_hits[hitno].prefix->stripl;
 649                         preadd = m_hits[hitno].prefix->affl;
 650             }
 651                 else
 652                         prestrip = preadd = 0;
 653                 if (m_hits[hitno].suffix)
 654             {
 655                         sufstrip = m_hits[hitno].suffix->stripl;
 656                         sufadd = m_hits[hitno].suffix->affl;
 657             }
 658                 else
 659                         sufadd = sufstrip = 0;
 660                 save_root_cap (word, pattern, prestrip, preadd,
 661                         sufstrip, sufadd,
 662                         m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
 663                         savearea, &nsaved);
 664         }
 665     return nsaved;
 666 }
 667
 668 /*
 669  * \param word
 670  * \param pattern
 671  * \param prestrip
 672  * \param preadd
 673  * \param sufstrip
 674  * \param sufadd
 675  * \param firstdent
 676  * \param pfxent
 677  * \param sufent
 678  *
 679  * \return
 680  */
 681 int
 682 ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern,
 683                                  int prestrip, int preadd, int sufstrip, int sufadd,
 684                                  struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
 685 {
 686     int                 i;              /* Index into savearea */
 687     ichar_t             savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
 688     int                 nsaved;         /* Number of words saved */
 689
 690     nsaved = 0;
 691     save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
 692       firstdent, pfxent, sufent, savearea, &nsaved);
 693     for (i = 0;  i < nsaved;  i++)
 694         {
 695                 if (insert (savearea[i]) < 0)
 696                         return -1;
 697         }
 698     return 0;
 699 }
 700
 701 /* ARGSUSED */
 702 /*!
 703  * \param word Word to be saved
 704  * \param pattern Capitalization pattern
 705  * \param prestrip No. chars stripped from front
 706  * \param preadd No. chars added to front of root
 707  * \param sufstrip No. chars stripped from back
 708  * \param sufadd No. chars added to back of root
 709  * \param firstdent First dent for root
 710  * \param pfxent Pfx-flag entry for word
 711  * \param sufent Sfx-flag entry for word
 712  * \param savearea Room to save words
 713  * \param nsaved Number saved so far (updated)
 714  */
 715 void
 716 ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern,
 717                                                   int prestrip, int preadd, int sufstrip, int sufadd,
 718                                                   struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent,
 719                                                   ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
 720                                               int * nsaved)
 721 {
 722 #ifndef NO_CAPITALIZATION_SUPPORT
 723     register struct dent * dent;
 724 #endif /* NO_CAPITALIZATION_SUPPORT */
 725     int                 firstisupper;
 726     ichar_t             newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
 727 #ifndef NO_CAPITALIZATION_SUPPORT
 728     register ichar_t *  p;
 729     int                 len;
 730     int                 i;
 731     int                 limit;
 732 #endif /* NO_CAPITALIZATION_SUPPORT */
 733
 734     if (*nsaved >= MAX_CAPS)
 735                 return;
 736     icharcpy (newword, word);
 737     firstisupper = myupper (pattern[0]);
 738 #ifdef NO_CAPITALIZATION_SUPPORT
 739     /*
 740     ** Apply the old, simple-minded capitalization rules.
 741     */
 742     if (firstisupper)
 743         {
 744                 if (myupper (pattern[1]))
 745                         upcase (newword);
 746                 else
 747             {
 748                         lowcase (newword);
 749                         newword[0] = mytoupper (newword[0]);
 750             }
 751         }
 752     else
 753                 lowcase (newword);
 754     icharcpy (savearea[*nsaved], newword);
 755     (*nsaved)++;
 756     return;
 757 #else /* NO_CAPITALIZATION_SUPPORT */
 758 #define flagsareok(dent)    \
 759     ((pfxent == NULL \
 760         ||  TSTMASKBIT (dent->mask, pfxent->flagbit)) \
 761       &&  (sufent == NULL \
 762         ||  TSTMASKBIT (dent->mask, sufent->flagbit)))
 763
 764     dent = firstdent;
 765     if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
 766         {
 767                 upcase (newword);       /* Uppercase required */
 768                 icharcpy (savearea[*nsaved], newword);
 769                 (*nsaved)++;
 770                 return;
 771         }
 772     for (p = pattern;  *p;  p++)
 773         {
 774                 if (mylower (*p))
 775                         break;
 776         }
 777     if (*p == 0)
 778         {
 779                 upcase (newword);       /* Pattern was all caps */
 780                 icharcpy (savearea[*nsaved], newword);
 781                 (*nsaved)++;
 782                 return;
 783         }
 784     for (p = pattern + 1;  *p;  p++)
 785         {
 786                 if (myupper (*p))
 787                         break;
 788         }
 789     if (*p == 0)
 790         {
 791                 /*
 792                 ** The pattern was all-lower or capitalized.  If that's
 793                 ** legal, insert only that version.
 794                 */
 795                 if (firstisupper)
 796                 {
 797                         if (captype (dent->flagfield) == CAPITALIZED
 798                           ||  captype (dent->flagfield) == ANYCASE)
 799                         {
 800                                 lowcase (newword);
 801                                 newword[0] = mytoupper (newword[0]);
 802                                 icharcpy (savearea[*nsaved], newword);
 803                                 (*nsaved)++;
 804                                 return;
 805                         }
 806                 }
 807                 else
 808                 {
 809                         if (captype (dent->flagfield) == ANYCASE)
 810                         {
 811                                 lowcase (newword);
 812                                 icharcpy (savearea[*nsaved], newword);
 813                                 (*nsaved)++;
 814                                 return;
 815                         }
 816                 }
 817                 while (dent->flagfield & MOREVARIANTS)
 818                 {
 819                         dent = dent->next;
 820                         if (captype (dent->flagfield) == FOLLOWCASE
 821                           ||  !flagsareok (dent))
 822                                 continue;
 823                         if (firstisupper)
 824                         {
 825                                 if (captype (dent->flagfield) == CAPITALIZED)
 826                                 {
 827                                         lowcase (newword);
 828                                         newword[0] = mytoupper (newword[0]);
 829                                         icharcpy (savearea[*nsaved], newword);
 830                                         (*nsaved)++;
 831                                         return;
 832                                 }
 833                         }
 834                         else
 835                         {
 836                                 if (captype (dent->flagfield) == ANYCASE)
 837                                 {
 838                                         lowcase (newword);
 839                                         icharcpy (savearea[*nsaved], newword);
 840                                         (*nsaved)++;
 841                                         return;
 842                                 }
 843                         }
 844             }
 845         }
 846     /*
 847     ** Either the sample had complex capitalization, or the simple
 848     ** capitalizations (all-lower or capitalized) are illegal.
 849     ** Insert all legal capitalizations, including those that are
 850     ** all-lower or capitalized.  If the prototype is capitalized,
 851     ** capitalized all-lower samples.  Watch out for affixes.
 852     */
 853     dent = firstdent;
 854     p = strtosichar (dent->word, 1);
 855     len = icharlen (p);
 856     if (dent->flagfield & MOREVARIANTS)
 857                 dent = dent->next;      /* Skip place-holder entry */
 858     for (  ;  ;  )
 859         {
 860                 if (flagsareok (dent))
 861             {
 862                         if (captype (dent->flagfield) != FOLLOWCASE)
 863                         {
 864                                 lowcase (newword);
 865                                 if (firstisupper  ||  captype (dent->flagfield) == CAPITALIZED)
 866                                         newword[0] = mytoupper (newword[0]);
 867                                 icharcpy (savearea[*nsaved], newword);
 868                                 (*nsaved)++;
 869                                 if (*nsaved >= MAX_CAPS)
 870                                         return;
 871                         }
 872                         else
 873                         {
 874                                 /* Followcase is the tough one. */
 875                                 p = strtosichar (dent->word, 1);
 876                                 memmove (
 877                                   reinterpret_cast<char *>(newword + preadd),
 878                                   reinterpret_cast<char *>(p + prestrip),
 879                                   (len - prestrip - sufstrip) * sizeof (ichar_t));
 880                                 if (myupper (p[prestrip]))
 881                                 {
 882                                         for (i = 0;  i < preadd;  i++)
 883                                                 newword[i] = mytoupper (newword[i]);
 884                                 }
 885                                 else
 886                                 {
 887                                         for (i = 0;  i < preadd;  i++)
 888                                                 newword[i] = mytolower (newword[i]);
 889                                 }
 890                                 limit = len + preadd + sufadd - prestrip - sufstrip;
 891                                 i = len + preadd - prestrip - sufstrip;
 892                                 p += len - sufstrip - 1;
 893                                 if (myupper (*p))
 894                                 {
 895                                         for (p = newword + i;  i < limit;  i++, p++)
 896                                                 *p = mytoupper (*p);
 897                                 }
 898                                 else
 899                                 {
 900                                         for (p = newword + i;  i < limit;  i++, p++)
 901                                                 *p = mytolower (*p);
 902                                 }
 903                                 icharcpy (savearea[*nsaved], newword);
 904                                 (*nsaved)++;
 905                                 if (*nsaved >= MAX_CAPS)
 906                                         return;
 907                         }
 908             }
 909                 if ((dent->flagfield & MOREVARIANTS) == 0)
 910                         break;          /* End of the line */
 911                 dent = dent->next;
 912         }
 913     return;
 914 #endif /* NO_CAPITALIZATION_SUPPORT */
 915 }
 916
 917