1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * good.c - see if a word or its root word
4 * is in the dictionary.
8 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All modifications to the source code must be clearly marked as
21 * such. Binary redistributions based on modified source code
22 * must be clearly marked as modified versions in the documentation
23 * and/or other materials provided with the distribution.
24 * 4. All advertising materials mentioning features or use of this software
25 * must display the following acknowledgment:
26 * This product includes software developed by Geoff Kuenning and
27 * other unpaid contributors.
28 * 5. The name of Geoff Kuenning may not be used to endorse or promote
29 * products derived from this software without specific prior
32 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * Revision 1.4 2003/08/14 17:51:26 dom
48 * update license - exception clause should be Lesser GPL
50 * Revision 1.3 2003/07/28 20:40:25 dom
51 * fix up the license clause, further win32-registry proof some directory getting functions
53 * Revision 1.2 2003/07/16 22:52:37 dom
54 * LGPL + exception license
56 * Revision 1.1 2003/07/15 01:15:04 dom
57 * ispell enchant backend
59 * Revision 1.2 2003/01/29 05:50:11 hippietrail
61 * Fixed my mess in EncodingManager.
62 * Changed many C casts to C++ casts.
64 * Revision 1.1 2003/01/24 05:52:32 hippietrail
66 * Refactored ispell code. Old ispell global variables had been put into
67 * an allocated structure, a pointer to which was passed to many functions.
68 * I have now made all such functions and variables private members of the
69 * ISpellChecker class. It was C OO, now it's C++ OO.
71 * I've fixed the makefiles and tested compilation but am unable to test
72 * operation. Please back out my changes if they cause problems which
73 * are not obvious or easy to fix.
75 * Revision 1.6 2003/01/06 18:48:38 dom
76 * ispell cleanup, start of using new 'add' save features
78 * Revision 1.5 2002/09/19 05:31:15 hippietrail
80 * More Ispell cleanup. Conditional globals and DEREF macros are removed.
81 * K&R function declarations removed, converted to Doxygen style comments
82 * where possible. No code has been changed (I hope). Compiles for me but
85 * Revision 1.4 2002/09/17 03:03:29 hippietrail
87 * After seeking permission on the developer list I've reformatted all the
88 * spelling source which seemed to have parts which used 2, 3, 4, and 8
89 * spaces for tabs. It should all look good with our standard 4-space
91 * I've concentrated just on indentation in the actual code. More prettying
93 * * NO code changes were made *
95 * Revision 1.3 2002/09/13 17:20:12 mpritchett
96 * Fix more warnings for Linux build
98 * Revision 1.2 2001/05/12 16:05:42 thomasf
99 * Big pseudo changes to ispell to make it pass around a structure rather
100 * than rely on all sorts of gloabals willy nilly here and there. Also
101 * fixed our spelling class to work with accepting suggestions once more.
102 * This code is dirty, gross and ugly (not to mention still not supporting
103 * multiple hash sized just yet) but it works on my machine and will no
104 * doubt break other machines.
106 * Revision 1.1 2001/04/15 16:01:24 tomas_f
109 * Revision 1.5 2000/02/09 22:35:25 sterwill
110 * Clean up some warnings
112 * Revision 1.4 1998/12/29 14:55:32 eric
114 * I've doctored the ispell code pretty extensively here. It is now
115 * warning-free on Win32. It also *works* on Win32 now, since I
116 * replaced all the I/O calls with ANSI standard ones.
118 * Revision 1.3 1998/12/28 23:11:30 eric
120 * modified spell code and integration to build on Windows.
121 * This is still a hack.
123 * Actually, it doesn't yet WORK on Windows. It just builds.
124 * SpellCheckInit is failing for some reason.
126 * Revision 1.2 1998/12/28 22:16:22 eric
128 * These changes begin to incorporate the spell checker into AbiWord. Most
131 * 1. added other/spell to the -I list in config/abi_defs
132 * 2. replaced other/spell/Makefile with one which is more like
134 * 3. added other/spell to other/Makefile so that the build will now
135 * dive down and build the spell check library.
136 * 4. added the AbiSpell library to the Makefiles in wp/main
137 * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
138 * This call is a HACK and should be replaced with something
140 * 6. added code to fv_View.cpp as follows:
141 * whenever you double-click on a word, the spell checker
142 * verifies that word and prints its status to stdout.
145 * 1. This will break the Windows build. I'm going to work on fixing it
147 * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
148 * The dictionary location is currently hard-coded. This will be
151 * Anyway, such as it is, it works.
153 * Revision 1.1 1998/12/28 18:04:43 davet
154 * Spell checker code stripped from ispell. At this point, there are
155 * two external routines... the Init routine, and a check-a-word routine
156 * which returns a boolean value, and takes a 16 bit char string.
157 * The code resembles the ispell code as much as possible still.
159 * Revision 1.43 1994/11/02 06:56:05 geoff
160 * Remove the anyword feature, which I've decided is a bad idea.
162 * Revision 1.42 1994/10/25 05:45:59 geoff
163 * Add support for an affix that will work with any word, even if there's
166 * Revision 1.41 1994/05/24 06:23:06 geoff
167 * Let tgood decide capitalization questions, rather than doing it ourselves.
169 * Revision 1.40 1994/05/17 06:44:10 geoff
170 * Add support for controlled compound formation and the COMPOUNDONLY
171 * option to affix flags.
173 * Revision 1.39 1994/01/25 07:11:31 geoff
174 * Get rid of all old RCS log lines in preparation for the 3.1 release.
183 #include "ispell_checker.h"
186 int good P ((ichar_t * word, int ignoreflagbits, int allhits,
187 int pfxopts, int sfxopts));
189 #ifndef NO_CAPITALIZATION_SUPPORT
192 ** See if this particular capitalization (dent) is legal with these
193 ** particular affixes.
200 static int entryhasaffixes (struct dent *dent, struct success *hit)
202 if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
204 if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
206 return 1; /* Yes, these affixes are legal */
216 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
218 register ichar_t * dword;
219 register ichar_t * w;
220 register struct dent * dent;
221 ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN];
229 thiscap = whatcap (word);
231 ** All caps is always legal, regardless of affixes.
233 preadd = prestrip = sufadd = 0;
234 if (thiscap == ALLCAPS)
236 else if (thiscap == FOLLOWCASE)
238 /* Set up some constants for the while(1) loop below */
241 preadd = hit->prefix->affl;
242 prestrip = hit->prefix->stripl;
245 preadd = prestrip = 0;
246 sufadd = hit->suffix ? hit->suffix->affl : 0;
249 ** Search the variants for one that matches what we have. Note
250 ** that thiscap can't be ALLCAPS, since we already returned
256 dentcap = captype (dent->flagfield);
257 if (dentcap != thiscap)
259 if (dentcap == ANYCASE && thiscap == CAPITALIZED
260 && entryhasaffixes (dent, hit))
263 else /* captypes match */
265 if (thiscap != FOLLOWCASE)
267 if (entryhasaffixes (dent, hit))
273 ** Make sure followcase matches exactly.
274 ** Life is made more difficult by the
275 ** possibility of affixes. Start with
278 strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
280 limit = word + preadd;
281 if (myupper (dword[prestrip]))
283 for (w = word; w < limit; w++)
291 for (w = word; w < limit; w++)
298 /* Do root part of word */
299 limit = dword + len - preadd - sufadd;
300 while (dword < limit)
302 if (*dword++ != *w++)
307 if (myupper (*dword))
324 ** All failure paths go to "doublecontinue,"
325 ** so if we get here it must match.
327 if (entryhasaffixes (dent, hit))
332 if ((dent->flagfield & MOREVARIANTS) == 0)
337 /* No matches found */
342 #ifndef NO_CAPITALIZATION_SUPPORT
344 * \param w Word to look up
345 * \param ignoreflagbits NZ to ignore affix flags in dict
346 * \param allhits NZ to ignore case, get every hit
347 * \param pfxopts Options to apply to prefixes
348 * \param sfxopts Options to apply to suffixes
352 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
355 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
358 ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
359 register ichar_t * p;
360 register ichar_t * q;
362 register struct dent * dp;
365 ** Make an uppercase copy of the word we are checking.
367 for (p = w, q = nword; *p; )
368 *q++ = mytoupper (*p++);
374 if ((dp = ispell_lookup (nword, 1)) != NULL)
376 m_hits[0].dictent = dp;
377 m_hits[0].prefix = NULL;
378 m_hits[0].suffix = NULL;
379 #ifndef NO_CAPITALIZATION_SUPPORT
380 if (allhits || cap_ok (w, &m_hits[0], n))
387 if (m_numhits && !allhits)
390 /* try stripping off affixes */
392 chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);