src/ispell/good.cpp

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * good.c - see if a word or its root word
   4  * is in the dictionary.
   5  *
   6  * Pace Willisson, 1983
   7  *
   8  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
   9  * All rights reserved.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  *
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. All modifications to the source code must be clearly marked as
  21  *    such.  Binary redistributions based on modified source code
  22  *    must be clearly marked as modified versions in the documentation
  23  *    and/or other materials provided with the distribution.
  24  * 4. All advertising materials mentioning features or use of this software
  25  *    must display the following acknowledgment:
  26  *      This product includes software developed by Geoff Kuenning and
  27  *      other unpaid contributors.
  28  * 5. The name of Geoff Kuenning may not be used to endorse or promote
  29  *    products derived from this software without specific prior
  30  *    written permission.
  31  *
  32  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
  33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
  36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  42  * SUCH DAMAGE.
  43  */
  44
  45 /*
  46  * $Log$
  47  * Revision 1.4  2003/08/14 17:51:26  dom
  48  * update license - exception clause should be Lesser GPL
  49  *
  50  * Revision 1.3  2003/07/28 20:40:25  dom
  51  * fix up the license clause, further win32-registry proof some directory getting functions
  52  *
  53  * Revision 1.2  2003/07/16 22:52:37  dom
  54  * LGPL + exception license
  55  *
  56  * Revision 1.1  2003/07/15 01:15:04  dom
  57  * ispell enchant backend
  58  *
  59  * Revision 1.2  2003/01/29 05:50:11  hippietrail
  60  *
  61  * Fixed my mess in EncodingManager.
  62  * Changed many C casts to C++ casts.
  63  *
  64  * Revision 1.1  2003/01/24 05:52:32  hippietrail
  65  *
  66  * Refactored ispell code. Old ispell global variables had been put into
  67  * an allocated structure, a pointer to which was passed to many functions.
  68  * I have now made all such functions and variables private members of the
  69  * ISpellChecker class. It was C OO, now it's C++ OO.
  70  *
  71  * I've fixed the makefiles and tested compilation but am unable to test
  72  * operation. Please back out my changes if they cause problems which
  73  * are not obvious or easy to fix.
  74  *
  75  * Revision 1.6  2003/01/06 18:48:38  dom
  76  * ispell cleanup, start of using new 'add' save features
  77  *
  78  * Revision 1.5  2002/09/19 05:31:15  hippietrail
  79  *
  80  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
  81  * K&R function declarations removed, converted to Doxygen style comments
  82  * where possible.  No code has been changed (I hope).  Compiles for me but
  83  * unable to test.
  84  *
  85  * Revision 1.4  2002/09/17 03:03:29  hippietrail
  86  *
  87  * After seeking permission on the developer list I've reformatted all the
  88  * spelling source which seemed to have parts which used 2, 3, 4, and 8
  89  * spaces for tabs.  It should all look good with our standard 4-space
  90  * tabs now.
  91  * I've concentrated just on indentation in the actual code.  More prettying
  92  * could be done.
  93  * * NO code changes were made *
  94  *
  95  * Revision 1.3  2002/09/13 17:20:12  mpritchett
  96  * Fix more warnings for Linux build
  97  *
  98  * Revision 1.2  2001/05/12 16:05:42  thomasf
  99  * Big pseudo changes to ispell to make it pass around a structure rather
 100  * than rely on all sorts of gloabals willy nilly here and there.  Also
 101  * fixed our spelling class to work with accepting suggestions once more.
 102  * This code is dirty, gross and ugly (not to mention still not supporting
 103  * multiple hash sized just yet) but it works on my machine and will no
 104  * doubt break other machines.
 105  *
 106  * Revision 1.1  2001/04/15 16:01:24  tomas_f
 107  * moving to spell/xp
 108  *
 109  * Revision 1.5  2000/02/09 22:35:25  sterwill
 110  * Clean up some warnings
 111  *
 112  * Revision 1.4  1998/12/29 14:55:32  eric
 113  *
 114  * I've doctored the ispell code pretty extensively here.  It is now
 115  * warning-free on Win32.  It also *works* on Win32 now, since I
 116  * replaced all the I/O calls with ANSI standard ones.
 117  *
 118  * Revision 1.3  1998/12/28 23:11:30  eric
 119  *
 120  * modified spell code and integration to build on Windows.
 121  * This is still a hack.
 122  *
 123  * Actually, it doesn't yet WORK on Windows.  It just builds.
 124  * SpellCheckInit is failing for some reason.
 125  *
 126  * Revision 1.2  1998/12/28 22:16:22  eric
 127  *
 128  * These changes begin to incorporate the spell checker into AbiWord.  Most
 129  * of this is a hack.
 130  *
 131  * 1.  added other/spell to the -I list in config/abi_defs
 132  * 2.  replaced other/spell/Makefile with one which is more like
 133  *      our build system.
 134  * 3.  added other/spell to other/Makefile so that the build will now
 135  *      dive down and build the spell check library.
 136  * 4.  added the AbiSpell library to the Makefiles in wp/main
 137  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
 138  *      This call is a HACK and should be replaced with something
 139  *      proper later.
 140  * 6.  added code to fv_View.cpp as follows:
 141  *      whenever you double-click on a word, the spell checker
 142  *      verifies that word and prints its status to stdout.
 143  *
 144  * Caveats:
 145  * 1.  This will break the Windows build.  I'm going to work on fixing it
 146  *      now.
 147  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
 148  *      The dictionary location is currently hard-coded.  This will be
 149  *      fixed as well.
 150  *
 151  * Anyway, such as it is, it works.
 152  *
 153  * Revision 1.1  1998/12/28 18:04:43  davet
 154  * Spell checker code stripped from ispell.  At this point, there are
 155  * two external routines...  the Init routine, and a check-a-word routine
 156  * which returns a boolean value, and takes a 16 bit char string.
 157  * The code resembles the ispell code as much as possible still.
 158  *
 159  * Revision 1.43  1994/11/02  06:56:05  geoff
 160  * Remove the anyword feature, which I've decided is a bad idea.
 161  *
 162  * Revision 1.42  1994/10/25  05:45:59  geoff
 163  * Add support for an affix that will work with any word, even if there's
 164  * no explicit flag.
 165  *
 166  * Revision 1.41  1994/05/24  06:23:06  geoff
 167  * Let tgood decide capitalization questions, rather than doing it ourselves.
 168  *
 169  * Revision 1.40  1994/05/17  06:44:10  geoff
 170  * Add support for controlled compound formation and the COMPOUNDONLY
 171  * option to affix flags.
 172  *
 173  * Revision 1.39  1994/01/25  07:11:31  geoff
 174  * Get rid of all old RCS log lines in preparation for the 3.1 release.
 175  *
 176  */
 177
 178 #include <ctype.h>
 179 #include <stdio.h>
 180 #include <stdlib.h>
 181 #include <string.h>
 182
 183 #include "ispell_checker.h"
 184
 185
 186 int             good P ((ichar_t * word, int ignoreflagbits, int allhits,
 187                          int pfxopts, int sfxopts));
 188
 189 #ifndef NO_CAPITALIZATION_SUPPORT
 190
 191 /*!
 192 ** See if this particular capitalization (dent) is legal with these
 193 ** particular affixes.
 194 **
 195 ** \param dent
 196 ** \param hit
 197 **
 198 ** \return
 199 */
 200 static int entryhasaffixes (struct dent *dent, struct success *hit)
 201 {
 202     if (hit->prefix  &&  !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
 203                 return 0;
 204     if (hit->suffix  &&  !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
 205                 return 0;
 206     return 1;                   /* Yes, these affixes are legal */
 207 }
 208
 209 /*
 210  * \param word
 211  * \param hit
 212  * \param len
 213  *
 214  * \return
 215  */
 216 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
 217 {
 218     register ichar_t *          dword;
 219     register ichar_t *          w;
 220     register struct dent *      dent;
 221     ichar_t                     dentword[INPUTWORDLEN + MAXAFFIXLEN];
 222     int                         preadd;
 223     int                         prestrip;
 224     int                         sufadd;
 225     ichar_t *           limit;
 226     long                        thiscap;
 227     long                        dentcap;
 228
 229     thiscap = whatcap (word);
 230     /*
 231     ** All caps is always legal, regardless of affixes.
 232     */
 233     preadd = prestrip = sufadd = 0;
 234     if (thiscap == ALLCAPS)
 235                 return 1;
 236     else if (thiscap == FOLLOWCASE)
 237         {
 238                 /* Set up some constants for the while(1) loop below */
 239                 if (hit->prefix)
 240                 {
 241                         preadd = hit->prefix->affl;
 242                         prestrip = hit->prefix->stripl;
 243                 }
 244                 else
 245                         preadd = prestrip = 0;
 246                 sufadd = hit->suffix ? hit->suffix->affl : 0;
 247         }
 248     /*
 249     ** Search the variants for one that matches what we have.  Note
 250     ** that thiscap can't be ALLCAPS, since we already returned
 251     ** for that case.
 252     */
 253     dent = hit->dictent;
 254     for (  ;  ;  )
 255         {
 256                 dentcap = captype (dent->flagfield);
 257                 if (dentcap != thiscap)
 258                 {
 259                         if (dentcap == ANYCASE  &&  thiscap == CAPITALIZED
 260                          &&  entryhasaffixes (dent, hit))
 261                                 return 1;
 262                 }
 263                 else                            /* captypes match */
 264                 {
 265                         if (thiscap != FOLLOWCASE)
 266                         {
 267                                 if (entryhasaffixes (dent, hit))
 268                                         return 1;
 269                         }
 270                         else
 271                         {
 272                                 /*
 273                                 ** Make sure followcase matches exactly.
 274                                 ** Life is made more difficult by the
 275                                 ** possibility of affixes.  Start with
 276                                 ** the prefix.
 277                                 */
 278                                 strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
 279                                 dword = dentword;
 280                                 limit = word + preadd;
 281                                 if (myupper (dword[prestrip]))
 282                                 {
 283                                         for (w = word;  w < limit;  w++)
 284                                         {
 285                                                 if (mylower (*w))
 286                                                         goto doublecontinue;
 287                                         }
 288                                 }
 289                                 else
 290                                 {
 291                                         for (w = word;  w < limit;  w++)
 292                                         {
 293                                                 if (myupper (*w))
 294                                                         goto doublecontinue;
 295                                         }
 296                                 }
 297                                 dword += prestrip;
 298                                 /* Do root part of word */
 299                                 limit = dword + len - preadd - sufadd;
 300                                 while (dword < limit)
 301                                 {
 302                                         if (*dword++ != *w++)
 303                                                 goto doublecontinue;
 304                                 }
 305                                 /* Do suffix */
 306                                 dword = limit - 1;
 307                                 if (myupper (*dword))
 308                                 {
 309                                         for (  ;  *w;  w++)
 310                                         {
 311                                                 if (mylower (*w))
 312                                                         goto doublecontinue;
 313                                         }
 314                                 }
 315                                 else
 316                                 {
 317                                         for (  ;  *w;  w++)
 318                                         {
 319                                                 if (myupper (*w))
 320                                                         goto doublecontinue;
 321                                         }
 322                                 }
 323                                 /*
 324                                 ** All failure paths go to "doublecontinue,"
 325                                 ** so if we get here it must match.
 326                                 */
 327                                 if (entryhasaffixes (dent, hit))
 328                                         return 1;
 329                                 doublecontinue: ;
 330                         }
 331                 }
 332                 if ((dent->flagfield & MOREVARIANTS) == 0)
 333                         break;
 334                 dent = dent->next;
 335         }
 336
 337     /* No matches found */
 338     return 0;
 339 }
 340 #endif
 341
 342 #ifndef NO_CAPITALIZATION_SUPPORT
 343 /*!
 344  * \param w Word to look up
 345  * \param ignoreflagbits NZ to ignore affix flags in dict
 346  * \param allhits NZ to ignore case, get every hit
 347  * \param pfxopts Options to apply to prefixes
 348  * \param sfxopts Options to apply to suffixes
 349  *
 350  * \return
 351  */
 352 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
 353 #else
 354 /* ARGSUSED */
 355 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
 356 #endif
 357 {
 358     ichar_t             nword[INPUTWORDLEN + MAXAFFIXLEN];
 359     register ichar_t *  p;
 360     register ichar_t *  q;
 361     register int        n;
 362     register struct dent * dp;
 363
 364     /*
 365     ** Make an uppercase copy of the word we are checking.
 366     */
 367     for (p = w, q = nword;  *p;  )
 368                 *q++ = mytoupper (*p++);
 369     *q = 0;
 370     n = q - nword;
 371
 372     m_numhits = 0;
 373
 374     if ((dp = ispell_lookup (nword, 1)) != NULL)
 375         {
 376                 m_hits[0].dictent = dp;
 377                 m_hits[0].prefix = NULL;
 378                 m_hits[0].suffix = NULL;
 379 #ifndef NO_CAPITALIZATION_SUPPORT
 380                 if (allhits  ||  cap_ok (w, &m_hits[0], n))
 381                         m_numhits = 1;
 382 #else
 383                 m_numhits = 1;
 384 #endif
 385         }
 386
 387     if (m_numhits  &&  !allhits)
 388                 return 1;
 389
 390     /* try stripping off affixes */
 391
 392     chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
 393
 394     return m_numhits;
 395 }
 396
 397
 398
 399